def generate(self, parsers, database, is_test): level_db = LevelDBCreator(database) window_slider = SlidingWindow() nr_instances = 0 if is_test: plain_text_instances_file = open( database + "/../test_instances.txt", "w") else: plain_text_instances_file = open( database + "/../train_instances.txt", "w") for i, talk_parser in enumerate(parsers): talks = talk_parser.parse() prev_progress = 0 print("") print("Processing file %s ..." % talk_parser.get_file_name()) for talk in talks: progress = int(talk_parser.progress() * 100) if progress > prev_progress: sys.stdout.write(str(progress) + "% ") sys.stdout.flush() prev_progress = progress talk.build_interval_tree() base_dir = os.path.dirname(talk_parser.get_file_name()) # get pitch feature values pitch_level_file = base_dir + "/" + talk.group_name + "_talkid" + str( talk.talk_id) + ".pitch" talk.parse_pitch_feature(pitch_level_file) # get energy feature values energy_level_file = base_dir + "/" + talk.group_name + "_talkid" + str( talk.talk_id) + ".energy" talk.parse_energy_feature(energy_level_file) # normalize features talk.normalize() # get the training instances training_instances = window_slider.list_windows(talk) # write training instances to level db for training_instance in training_instances: nr_instances += 1 # write instance to file s = unicode(training_instance) + "\n" s += "\n" plain_text_instances_file.write(s.encode('utf8')) # write to level db level_db.write_training_instance(training_instance) plain_text_instances_file.close()