def _split_to_tasks(self): self.scp_tmp_dir = System.get_global_temp_dir() scp_files = SCPFile(self.scp_file).split(self.max_num_tasks,self.scp_tmp_dir, self.num_speaker_chars if self.num_speaker_chars is not None else -1) for i, scp_file in enumerate(scp_files): self.tasks.append(HERestTask(self,i+1,scp_file))
def flat_start(self): tmp_dir = System.get_global_temp_dir() proto_file = os.path.join(tmp_dir, 'proto') vFloors = os.path.join(tmp_dir, 'vFloors') with open(proto_file, 'w') as proto_desc: print(htk_file_strings.PROTO, file=proto_desc) HCompV(self.htk_config, self.training_scp, proto_file).run() out_model = self._get_model_name_id() + ".mmf" model_def = "" phone_def = "" in_model_def = True for line in open(proto_file): if line[0:2] == '~h': in_model_def = False if in_model_def: model_def += line else: phone_def += line #Write model file with open(out_model, 'w') as model_desc: print(model_def, file=model_desc) for line in open(vFloors): print(line.strip(), file=model_desc) #Write the hmmdefs (replacing for each monophone, proto with the monophone) for line in open(self._get_model_name_id() + '.hmmlist'): print(phone_def.replace('proto', line.rstrip()), file=model_desc) shutil.rmtree(tmp_dir)
def transform_to_triphone(self): tmp_dir = System.get_global_temp_dir() mktri = os.path.join(tmp_dir, 'mktri.led') with open(mktri, 'w') as mktri_desc: print(htk_file_strings.MKTRI, file=mktri_desc) old_mlf = self.training_phone_mlf self.training_phone_mlf = os.path.join(self.train_files_dir, 'tri.mlf') self.id += 1 HLEd(self.htk_config, old_mlf, mktri, self._get_model_name_id() + '.hmmlist', self.training_dict, self.training_phone_mlf).run() self._remove_triphone_sil(self._get_model_name_id() + '.hmmlist', True) self._remove_triphone_sil(self.training_phone_mlf) tri_hed = os.path.join(tmp_dir, 'tri.hed') self._make_tri_hed(self._get_model_name_id() + '.hmmlist', self._get_model_name_id(1) + '.hmmlist', tri_hed) HHEd(self.htk_config, self._get_model_name_id(1) + '.mmf', self._get_model_name_id() + '.mmf', self._get_model_name_id(1) + '.hmmlist', script=tri_hed).run() shutil.rmtree(tmp_dir)
def _split_to_tasks(self): self.scp_tmp_dir = System.get_global_temp_dir() scp_files = SCPFile(self.scp_file).split( self.max_num_tasks, self.scp_tmp_dir, self.num_speaker_chars if self.num_speaker_chars is not None else -1) for i, scp_file in enumerate(scp_files): self.tasks.append(HERestTask(self, i + 1, scp_file))
def _split_to_tasks(self): self.tmp_dir = System.get_global_temp_dir() scp_files = SCPFile(self.scp_file).split(self.max_num_tasks,self.tmp_dir, -1) mlf_files = [scp_file + '.mlf' for scp_file in scp_files] for i, files in enumerate(izip(scp_files,mlf_files)): scp_file, output_mlf = files self.tasks.append(HViteTask(self,i+1,scp_file,output_mlf))
def _split_to_tasks(self): self.tmp_dir = System.get_global_temp_dir() scp_files = SCPFile(self.scp_file).split(self.max_num_tasks, self.tmp_dir, -1) mlf_files = [scp_file + '.mlf' for scp_file in scp_files] for i, files in enumerate(izip(scp_files, mlf_files)): scp_file, output_mlf = files self.tasks.append(HViteTask(self, i + 1, scp_file, output_mlf))
def introduce_short_pause_model(self): self.id += 1 phones = [ p.strip() for p in open(self._get_model_name_id(1) + '.hmmlist') ] phones.append('sp') with open(self._get_model_name_id() + '.hmmlist', 'w') as phone_out: for p in sorted(set(phones)): print(p, file=phone_out) #copy sil state 3 to sp in_sil = False in_state3 = False state = "" with open(self._get_model_name_id() + '.mmf', 'w') as model_desc: for line in open(self._get_model_name_id(1) + '.mmf'): print(line, file=model_desc) if line.startswith('~h'): if line.startswith('~h "sil"'): in_sil = True else: in_sil = False elif line.startswith('<STATE>'): if line.startswith('<STATE> 3'): in_state3 = True else: in_state3 = False elif in_sil and in_state3: state += line print("~h \"sp\" <BEGINHMM> <NUMSTATES> 3", file=model_desc) print("<STATE> 2", file=model_desc) print(state, file=model_desc) print(htk_file_strings.TRANSP3, file=model_desc) self.id += 1 shutil.copyfile( self._get_model_name_id(1) + '.hmmlist', self._get_model_name_id() + '.hmmlist') tmp_dir = System.get_global_temp_dir() with open(os.path.join(tmp_dir, 'sil.hed'), 'w') as sil_desc: print(htk_file_strings.SIL_HED, file=sil_desc) HHEd(self.htk_config, self._get_model_name_id(1) + '.mmf', self._get_model_name_id() + '.mmf', self._get_model_name_id() + '.hmmlist', script=os.path.join(tmp_dir, 'sil.hed')).run() shutil.rmtree(tmp_dir) self.expand_word_transcription(True)
def recognize(self, lm_scale, sub_name=None): tmp_dir = System.get_global_temp_dir() in_transform = None if len(self.adaptations) > 0: in_transform = [self.adaptations[-1], (self.classes_dir, None)] if sub_name is None: sub_name = str(self.id) if self.scp is None: t = [] for speaker, scp, model in self.split_scp_models: t.append( HDecode(self.htk_config, scp, model + '.mmf', self.dict, model + '.hmmlist', self.language_model, self.name + '.' + sub_name + '.' + speaker + '.mlf', lm_scale=lm_scale, adapt_dirs=in_transform, adapt_speaker_chars=self.adap_num_speaker_chars)) CollectionJob(t).run() HTK_recognizer._combine_output_files( self.name + '.' + sub_name + '.*.mlf', self.name + '.' + sub_name + '.mlf') else: HDecode(self.htk_config, self.scp, self.model + '.mmf', self.dict, self.model + '.hmmlist', self.language_model, self.name + '.' + sub_name + '.mlf', lm_scale=lm_scale, adapt_dirs=in_transform, adapt_speaker_chars=self.adap_num_speaker_chars).run() # trans = HTK_transcription() # trans.read_mlf(self.name+'.'+sub_name+'.mlf',target=HTK_transcription.WORD) # trans.write_trn(self.name+'.'+sub_name+'.trn') shutil.rmtree(tmp_dir, ignore_errors=True)
def split_mixtures_variably(self, power, num_iterations): self.id += 1 tmp_dir = System.get_global_temp_dir() hed_file = os.path.join(tmp_dir, 'mix.hed') with open(hed_file, 'w') as hed: print('LS "%s"' % (self._get_model_name_id(1) + '.stats'), file=hed) print("PS 16 %f %d" % (power, num_iterations), file=hed) shutil.copyfile( self._get_model_name_id(1) + '.hmmlist', self._get_model_name_id() + '.hmmlist') HHEd(self.htk_config, self._get_model_name_id(1) + '.mmf', self._get_model_name_id(0) + '.mmf', self._get_model_name_id() + '.hmmlist', script=hed_file).run() shutil.rmtree(tmp_dir)
def transfer_files_local(self): if not hasattr(self, 'training_scp_orig'): self.training_scp_orig = self.training_scp tmp_dir = System.get_local_temp_dir() self.training_scp = os.path.join(tmp_dir, 'training_scp_local.scp') files = [] with open(self.training_scp, 'w') as scp_desc: for file in open(self.training_scp_orig): file = file.strip() files.append(file) print(os.path.join(tmp_dir, os.path.basename(file)), file=scp_desc) pool = Pool() pool.map(Copier(tmp_dir), files) pool.close() pool.join()
def expand_word_transcription(self, use_sp=False): tmp_dir = System.get_global_temp_dir() mkmono = os.path.join(tmp_dir, 'mkmono.led') with open(mkmono, 'w') as mkmono_desc: print(htk_file_strings.MKMONO, file=mkmono_desc) if use_sp: self.training_phone_mlf = os.path.join(self.train_files_dir, 'phone1.mlf') else: self.training_phone_mlf = os.path.join(self.train_files_dir, 'phone0.mlf') print("DE sp", file=mkmono_desc) HLEd(self.htk_config, self.training_word_mlf, mkmono, self._get_model_name_id() + '.hmmlist', self.training_dict, self.training_phone_mlf).run() shutil.rmtree(tmp_dir)
def split_mixtures(self, num_mixes): self.id += 1 tmp_dir = System.get_global_temp_dir() hed_file = os.path.join(tmp_dir, 'mix.hed') with open(hed_file, 'w') as hed: print("MU {0:d} {{*.state[2-4].stream[1].mix}}".format(num_mixes), file=hed) print("MU {0:d} {{sil.state[2-4].stream[1].mix}}".format( 2 * num_mixes), file=hed) shutil.copyfile( self._get_model_name_id(1) + '.hmmlist', self._get_model_name_id() + '.hmmlist') HHEd(self.htk_config, self._get_model_name_id(1) + '.mmf', self._get_model_name_id(0) + '.mmf', self._get_model_name_id() + '.hmmlist', script=hed_file).run() shutil.rmtree(tmp_dir)
def align_transcription(self): i = 1 while os.path.exists( os.path.join(self.train_files_dir, 'phone_aligned_{0:d}.mlf'.format(i))): i += 1 tmp_dir = System.get_global_temp_dir() tmp_config = os.path.join(tmp_dir, 'hvite_config') with open(tmp_config, 'w') as tmp_desc: print(htk_file_strings.HVITE_CONFIG, file=tmp_desc) self.training_phone_mlf = os.path.join( self.train_files_dir, 'phone_aligned_{0:d}.mlf'.format(i)) HVite(self.htk_config, self.training_scp, self._get_model_name_id() + '.mmf', self.training_dict, self._get_model_name_id() + '.hmmlist', self.training_phone_mlf, self.training_word_mlf, config_file=tmp_config).run() shutil.rmtree(tmp_dir)
def tie_triphones(self): self.id += 1 tmp_dir = System.get_global_temp_dir() full_list = os.path.join(tmp_dir, 'full_list') self._make_full_list( self._get_model_name_id(5) + '.hmmlist', full_list) tree_hed = os.path.join(tmp_dir, 'tree.hed') self._make_tree_hed(self.htk_config.tying_rules, self._get_model_name_id(5) + '.hmmlist', tree_hed, self.htk_config.tying_threshold, self.htk_config.required_occupation, self._get_model_name_id(1) + '.stats', full_list, self._get_model_name_id() + '.hmmlist', os.path.join(tmp_dir, 'trees')) HHEd(self.htk_config, self._get_model_name_id(1) + '.mmf', self._get_model_name_id(0) + '.mmf', self._get_model_name_id(1) + '.hmmlist', script=tree_hed).run() shutil.rmtree(tmp_dir)
def recognize(self,lm_scale,sub_name = None): tmp_dir = System.get_global_temp_dir() in_transform = None if len(self.adaptations) > 0: in_transform = [self.adaptations[-1],(self.classes_dir,None)] if sub_name is None: sub_name = str(self.id) if self.scp is None: t = [] for speaker,scp,model in self.split_scp_models: t.append(HDecode(self.htk_config,scp,model+'.mmf',self.dict,model+'.hmmlist',self.language_model,self.name+'.'+sub_name+'.'+speaker+'.mlf',lm_scale=lm_scale,adapt_dirs=in_transform,adapt_speaker_chars=self.adap_num_speaker_chars)) CollectionJob(t).run() HTK_recognizer._combine_output_files(self.name+'.'+sub_name+'.*.mlf',self.name+'.'+sub_name+'.mlf') else: HDecode(self.htk_config,self.scp,self.model+'.mmf',self.dict,self.model+'.hmmlist',self.language_model,self.name+'.'+sub_name+'.mlf',lm_scale=lm_scale,adapt_dirs=in_transform,adapt_speaker_chars=self.adap_num_speaker_chars).run() # trans = HTK_transcription() # trans.read_mlf(self.name+'.'+sub_name+'.mlf',target=HTK_transcription.WORD) # trans.write_trn(self.name+'.'+sub_name+'.trn') shutil.rmtree(tmp_dir,ignore_errors=True)
def __init__(self, htk_config, scp_file, hmm_model, hmm_list, input_mlf, config_file = None, input_adaptation = None, parent_adaptation = None, output_adaptation = None, output_hmm_model=None, pruning = None, prune_threshold = None, num_speaker_chars=None, min_examples=None, mix_weight_floor=None, max_adap_sentences = None, stats= None): super(HERest,self).__init__() base_command = ["HERest"] base_command.extend(htk_config.get_flags(config_file)) #task specific flags #base_command.extend(htk_config.turn_to_config('-S','{scp_list}')) self.acc_tmp_dir = System.get_global_temp_dir() # Output dependent flags if output_hmm_model is not None: base_command.extend(htk_config.turn_to_config('-M', self.acc_tmp_dir)) #base_command.extend(htk_config.turn_to_config('-M', os.path.dirname(output_hmm_model))) if output_adaptation is not None: base_command.extend(['-u','a']) target_dir, extension = output_adaptation base_command.extend(['-K', target_dir]) if extension is not None: base_command.append(extension) num_speaker_chars = num_speaker_chars if num_speaker_chars is not None else htk_config.num_speaker_chars # Adaptation flags if input_adaptation is not None: for source_dir, extension in input_adaptation: base_command.extend(['-J', source_dir]) if extension is not None: base_command.append(extension) if parent_adaptation is not None: for source_dir, extension in parent_adaptation: base_command.extend(['-E', source_dir]) if extension is not None: base_command.append(extension) base_command.append('-a') if num_speaker_chars > 0: pattern = "*/" + ('%' * num_speaker_chars) + "*.*" base_command.extend(['-h',pattern]) #pruning flag if pruning is None: pruning = htk_config.pruning if isinstance(pruning, float): base_command.extend(['-t',pruning]) elif all(isinstance(p,float) for p in pruning): base_command.extend(['-t']+ [str(p) for p in pruning]) else: raise TypeError # other flags base_command.extend(htk_config.turn_to_config('-I', input_mlf)) base_command.extend(htk_config.turn_to_config('-H', hmm_model)) base_command.extend(htk_config.turn_to_config('-w', mix_weight_floor, type=float, default=htk_config.mix_weight_floor)) base_command.extend(htk_config.turn_to_config('-c', prune_threshold, type=float, default=htk_config.prune_threshold)) base_command.extend(htk_config.turn_to_config('-m', min_examples, type=int, default=htk_config.min_examples)) base_command.extend(htk_config.turn_to_config('-l', max_adap_sentences, type=int)) #positional arguments base_command.append(hmm_list) #store instance variables self.base_command = base_command self.hmm_model = hmm_model self.output_hmm_model = output_hmm_model self.output_adaptation = output_adaptation self.scp_file = scp_file self.num_speaker_chars = num_speaker_chars self.stats = stats
def __init__(self, htk_config, name, model, scp, dictionary, language_model): if not name.startswith('/'): name = os.path.join(os.getcwd(), name) if htk_config.num_speaker_chars < 0: htk_config.num_speaker_chars = 3 self.name = name if os.path.exists(name): shutil.rmtree(name, ignore_errors=True) os.mkdir(name) self.a_id = 0 self.xforms_dir = os.path.join(name, 'xforms%d' % self.a_id) os.mkdir(self.xforms_dir) self.classes_dir = os.path.join(name, 'classes%d' % self.a_id) os.mkdir(self.classes_dir) self.model = model self.split_scp_models = [] if '?' in scp: self.scp = None num_scp_speaker_chars = 1 while '?' * (num_scp_speaker_chars + 1) in scp: num_scp_speaker_chars += 1 s_index = scp.find('?' * num_scp_speaker_chars) speakers = [ s[s_index:s_index + num_scp_speaker_chars] for s in glob.iglob(scp) ] for s in speakers: real_scp = os.path.join(name, '%s_list.scp' % s) with open(real_scp, 'w') as scp_desc: for line in open( scp.replace('?' * num_scp_speaker_chars, s)): print(os.path.join(os.path.dirname(scp), line.strip()), file=scp_desc) self.split_scp_models.append( (s, real_scp, model.replace('?' * num_scp_speaker_chars, s))) else: self.scp = os.path.join(name, 'list.scp') with open(self.scp, 'w') as scp_desc: for line in open(scp): print(os.path.join(os.path.dirname(scp), line.strip()), file=scp_desc) self.dict = dictionary d = HTK_dictionary() d.read_dict(dictionary) self.dict = os.path.join(name, 'dict.hdecode') d.write_dict(self.dict, False) d = HTK_dictionary() d.read_dict(htk_config.adap_align_dict) self.adap_align_dict = os.path.join(name, 'dict.hvite') d.write_dict(self.adap_align_dict, True) self.language_model = language_model self.htk_config = htk_config self.adaptations = [] self.adap_num_speaker_chars = None self.id = 0 System.set_log_dir(os.path.basename(name))
def add_adaptation(self, scp_file, mlf_file, num_nodes=1, num_speaker_chars=None, files_per_speaker=None, split_threshold=1000): new_extension = 'mllr{0:d}'.format(len(self.adaptations)) tmp_dirs = [] hvite_tasks = [] hed_tasks = [] herest_tasks = [] real_scp_files = [scp_file] speakers = [""] models = [self.model] if self.scp is None: l = len(self.split_scp_models[0][0]) real_scp_files = [ scp_file.replace('?' * l, sp[0]) for sp in self.split_scp_models ] speakers = [sp[0] for sp in self.split_scp_models] models = [sp[2] for sp in self.split_scp_models] for scp_file, speaker, model in izip(real_scp_files, speakers, models): tmp_dir = System.get_global_temp_dir() tmp_dirs.append(tmp_dir) phone_mlf = os.path.join(tmp_dir, 'phone.mlf') tmp_scp_file = os.path.join(tmp_dir, 'adap.scp') with open(tmp_scp_file, 'w') as tmp_desc: smap = {} for line in open(scp_file): if basename(line)[:num_speaker_chars] not in smap: smap[basename(line)[:num_speaker_chars]] = [] smap[basename(line)[:num_speaker_chars]].append( line.strip()) for sp, f in smap.iteritems(): shuffle(f) for line in f: #for line in open(scp_file): if not line.startswith('/'): print(os.path.join(os.path.dirname(scp_file), line.strip()), file=tmp_desc) else: print(line.strip(), file=tmp_desc) tmp_config = os.path.join(tmp_dir, 'hvite_config') with open(tmp_config, 'w') as tmp_desc: print(htk_file_strings.HVITE_CONFIG, file=tmp_desc) hvite_tasks.append( HVite(self.htk_config, tmp_scp_file, model + '.mmf', self.adap_align_dict, model + '.hmmlist', phone_mlf, mlf_file, config_file=tmp_config)) in_transform = [] parent_transform = None if len(self.adaptations) > 0: in_transform = [self.adaptations[-1]] in_transform.append((self.classes_dir, None)) if len(in_transform) > 1: parent_transform = in_transform adap_config = os.path.join(tmp_dir, 'adap_config') if num_nodes == 1: # global adaptation global_name = 'global{0:d}'.format(len(self.adaptations)) global_file = os.path.join(self.classes_dir, global_name) with open(global_file, 'w') as global_desc: print(htk_file_strings.GLOBAL.format( global_name=global_name), file=global_desc) with open(adap_config, 'w') as adap_desc: print(htk_file_strings.BASE_ADAP_CONFIG.format( base_class=global_name), file=adap_desc) if self.adap_num_speaker_chars is not None: mask = "*/" + ('%' * self.adap_num_speaker_chars) + "*.*" print( "PAXFORMMASK = {mask:>s}\nINXFORMMASK = {mask:>s}". format(mask=mask), file=adap_desc) else: # tree adaptation regtree_name = '{0:>s}regtree{1:d}'.format( speaker, len(self.adaptations)) regtree_hed = os.path.join(tmp_dir, 'regtree.hed') with open(regtree_hed, 'w') as regtree_desc: print(htk_file_strings.REGTREE_HED.format( stats_file=model + '.stats', num_nodes=num_nodes, regtree=regtree_name), file=regtree_desc) hed_tasks.append( HHEd(self.htk_config, model + '.mmf', self.classes_dir, model + '.hmmlist', regtree_hed)) with open(adap_config, 'w') as adap_desc: print(htk_file_strings.TREE_ADAP_CONFIG.format( regtree=os.path.join(self.classes_dir, regtree_name) + '.tree'), file=adap_desc) if self.adap_num_speaker_chars is not None: mask = "*/" + ('%' * self.adap_num_speaker_chars) + "*.*" print( "PAXFORMMASK = {mask:>s}\nINXFORMMASK = {mask:>s}". format(mask=mask), file=adap_desc) if self.htk_config.split_threshold is not 1000: print("HADAPT:SPLITTHRESH = {0:.1f}".format( float(self.htk_config.split_threshold)), file=adap_desc) herest_tasks.append( HERest(self.htk_config, tmp_scp_file, model + '.mmf', model + '.hmmlist', phone_mlf, config_file=adap_config, num_speaker_chars=num_speaker_chars, max_adap_sentences=files_per_speaker, input_adaptation=in_transform, parent_adaptation=parent_transform, output_adaptation=(self.xforms_dir, new_extension))) if len(hvite_tasks) == 1: hvite_tasks[0].run() elif len(hvite_tasks) > 1: CollectionJob(hvite_tasks).run() if len(hed_tasks) == 1: hed_tasks[0].run() elif len(hed_tasks) > 1: CollectionJob(hed_tasks).run() if len(herest_tasks) == 1: herest_tasks[0].run() elif len(herest_tasks) > 1: CollectionJob(herest_tasks).run() self.adaptations.append((self.xforms_dir, new_extension)) self.adap_num_speaker_chars = num_speaker_chars [shutil.rmtree(tmp_dir, ignore_errors=True) for tmp_dir in tmp_dirs]
htk_config.load_config_vals(options.config) htk_config.load_object_vals(options) name, model, scp, dict, lm, t_scp, t_mlf, neighbourlist = args[:8] recognizer = HTK_recognizer(htk_config, name, model, scp, dict, lm) recognizer.recognize(None, 'baseline') #recognizer.add_adaptation(scp,recognizer.name+'.baseline.mlf',num_speaker_chars=options.eval_speaker_chars) #recognizer.add_adaptation(scp,recognizer.name+'.baseline.mlf',num_speaker_chars=options.eval_speaker_chars,num_nodes=64) # #recognizer.recognize(None,'adapted') #recognizer.clear_adaptations() tmp_dir = System.get_global_temp_dir() transform_scp = join(tmp_dir, 'neighbour_transform.scp') transform_mlf = join(tmp_dir, 'neighbour_transform.mlf') file_dir = join(tmp_dir, 'files') mkdir(file_dir) neighbour_dict = {} for line in open(neighbourlist): parts = line.strip().split() sp = parts[0] neighbors = [n for n in parts[2:]] if sp in neighbors: neighbors.remove(sp)
htk_config.load_object_vals(options) name,model,scp,dict,lm,t_scp,t_mlf,neighbourlist = args[:8] recognizer = HTK_recognizer(htk_config,name,model,scp,dict,lm) recognizer.recognize(None,'baseline') #recognizer.add_adaptation(scp,recognizer.name+'.baseline.mlf',num_speaker_chars=options.eval_speaker_chars) #recognizer.add_adaptation(scp,recognizer.name+'.baseline.mlf',num_speaker_chars=options.eval_speaker_chars,num_nodes=64) # #recognizer.recognize(None,'adapted') #recognizer.clear_adaptations() tmp_dir = System.get_global_temp_dir() transform_scp = join(tmp_dir, 'neighbour_transform.scp') transform_mlf = join(tmp_dir, 'neighbour_transform.mlf') file_dir = join(tmp_dir, 'files') mkdir(file_dir) neighbour_dict = {} for line in open(neighbourlist): parts = line.strip().split() sp = parts[0] neighbors = [n for n in parts[2:]] if sp in neighbors: neighbors.remove(sp)
def __init__(self, htk_config, name, model, scp, dictionary, language_model): if not name.startswith('/'): name = os.path.join(os.getcwd(), name) if htk_config.num_speaker_chars < 0: htk_config.num_speaker_chars = 3 self.name = name if os.path.exists(name): shutil.rmtree(name,ignore_errors=True) os.mkdir(name) self.a_id = 0 self.xforms_dir = os.path.join(name,'xforms%d'%self.a_id) os.mkdir(self.xforms_dir) self.classes_dir = os.path.join(name,'classes%d'%self.a_id) os.mkdir(self.classes_dir) self.model = model self.split_scp_models = [] if '?' in scp: self.scp = None num_scp_speaker_chars = 1 while '?' * (num_scp_speaker_chars + 1) in scp: num_scp_speaker_chars += 1 s_index = scp.find('?' * num_scp_speaker_chars) speakers = [s[s_index:s_index+num_scp_speaker_chars] for s in glob.iglob(scp)] for s in speakers: real_scp = os.path.join(name,'%s_list.scp'%s) with open(real_scp, 'w') as scp_desc: for line in open(scp.replace('?' * num_scp_speaker_chars, s)): print(os.path.join(os.path.dirname(scp), line.strip()),file=scp_desc) self.split_scp_models.append( (s,real_scp,model.replace('?' * num_scp_speaker_chars, s)) ) else: self.scp = os.path.join(name,'list.scp') with open(self.scp, 'w') as scp_desc: for line in open(scp): print(os.path.join(os.path.dirname(scp), line.strip()),file=scp_desc) self.dict = dictionary d = HTK_dictionary() d.read_dict(dictionary) self.dict = os.path.join(name, 'dict.hdecode') d.write_dict(self.dict,False) d = HTK_dictionary() d.read_dict(htk_config.adap_align_dict) self.adap_align_dict = os.path.join(name, 'dict.hvite') d.write_dict(self.adap_align_dict,True) self.language_model = language_model self.htk_config = htk_config self.adaptations = [] self.adap_num_speaker_chars = None self.id = 0 System.set_log_dir(os.path.basename(name))
def __init__(self, htk_config, scp_file, hmm_model, hmm_list, input_mlf, config_file=None, input_adaptation=None, parent_adaptation=None, output_adaptation=None, output_hmm_model=None, pruning=None, prune_threshold=None, num_speaker_chars=None, min_examples=None, mix_weight_floor=None, max_adap_sentences=None, stats=None): super(HERest, self).__init__() base_command = ["HERest"] base_command.extend(htk_config.get_flags(config_file)) #task specific flags #base_command.extend(htk_config.turn_to_config('-S','{scp_list}')) self.acc_tmp_dir = System.get_global_temp_dir() # Output dependent flags if output_hmm_model is not None: base_command.extend( htk_config.turn_to_config('-M', self.acc_tmp_dir)) #base_command.extend(htk_config.turn_to_config('-M', os.path.dirname(output_hmm_model))) if output_adaptation is not None: base_command.extend(['-u', 'a']) target_dir, extension = output_adaptation base_command.extend(['-K', target_dir]) if extension is not None: base_command.append(extension) num_speaker_chars = num_speaker_chars if num_speaker_chars is not None else htk_config.num_speaker_chars # Adaptation flags if input_adaptation is not None: for source_dir, extension in input_adaptation: base_command.extend(['-J', source_dir]) if extension is not None: base_command.append(extension) if parent_adaptation is not None: for source_dir, extension in parent_adaptation: base_command.extend(['-E', source_dir]) if extension is not None: base_command.append(extension) base_command.append('-a') if num_speaker_chars > 0: pattern = "*/" + ('%' * num_speaker_chars) + "*.*" base_command.extend(['-h', pattern]) #pruning flag if pruning is None: pruning = htk_config.pruning if isinstance(pruning, float): base_command.extend(['-t', pruning]) elif all(isinstance(p, float) for p in pruning): base_command.extend(['-t'] + [str(p) for p in pruning]) else: raise TypeError # other flags base_command.extend(htk_config.turn_to_config('-I', input_mlf)) base_command.extend(htk_config.turn_to_config('-H', hmm_model)) base_command.extend( htk_config.turn_to_config('-w', mix_weight_floor, type=float, default=htk_config.mix_weight_floor)) base_command.extend( htk_config.turn_to_config('-c', prune_threshold, type=float, default=htk_config.prune_threshold)) base_command.extend( htk_config.turn_to_config('-m', min_examples, type=int, default=htk_config.min_examples)) base_command.extend( htk_config.turn_to_config('-l', max_adap_sentences, type=int)) #positional arguments base_command.append(hmm_list) #store instance variables self.base_command = base_command self.hmm_model = hmm_model self.output_hmm_model = output_hmm_model self.output_adaptation = output_adaptation self.scp_file = scp_file self.num_speaker_chars = num_speaker_chars self.stats = stats
def initialize_new(self, scp_list, word_mlf, dict, remove_previous=False): System.set_log_dir(self.name) if remove_previous: for f in glob.iglob(System.get_log_dir() + '/*'): os.remove(f) if not remove_previous and ( os.path.exists(self.train_files_dir) or len(glob.glob(self.model_dir + '/' + self.name + '.*')) > 0): raise ExistingFilesException if os.path.exists(self.train_files_dir): shutil.rmtree(self.train_files_dir) for f in glob.iglob(self.model_dir + '/' + self.name + '.*'): os.remove(f) os.mkdir(self.train_files_dir) # handle dictionary dic = HTK_dictionary() if isinstance(dict, basestring): dic.read_dict(dict) elif all(isinstance(d, basestring) for d in dict): for d in dict: dic.read_dict(d) else: raise TypeError dic.write_dict(self.training_dict) self.phones = dic.get_phones() # handle transcription trans = HTK_transcription() # if isinstance(word_mlf,basestring): # trans.read_mlf(word_mlf, HTK_transcription.WORD) # elif all(isinstance(w,basestring) for w in word_mlf): # for w in word_mlf: # trans.read_mlf(w, HTK_transcription.WORD) # else: # raise TypeError word_mlf = word_mlf.strip().split(',') for w in word_mlf: trans.read_mlf(w, HTK_transcription.WORD) self.id = 1 phones_list = self._get_model_name_id() + '.hmmlist' with open(phones_list, 'w') as phones_desc: for p in self.phones: print(p, file=phones_desc) # handle scp files scp_list = scp_list.strip().split(',') # if isinstance(scp_list,basestring): # scp_list = [scp_list] real_trans = HTK_transcription() real_trans.transcriptions[real_trans.WORD] = {} with open(self.training_scp, 'w') as scp_desc: for scp in scp_list: for file in open(scp): id = os.path.splitext(os.path.basename(file.strip()))[0] if not file.startswith('/'): file = os.path.join(os.path.dirname(scp), file.strip()) ok = True for word in trans.transcriptions[ HTK_transcription.WORD][id]: if not dic.word_in_dict(word): print("%s skipped, because has missing word %s" % (file.strip(), word)) ok = False break if ok: print(file.strip(), file=scp_desc) real_trans.transcriptions[real_trans.WORD][ id] = trans.transcriptions[real_trans.WORD][id] real_trans.write_mlf(self.training_word_mlf, target=HTK_transcription.WORD) self.expand_word_transcription()
def add_adaptation(self,scp_file,mlf_file,num_nodes = 1,num_speaker_chars=None,files_per_speaker=None,split_threshold=1000): new_extension = 'mllr{0:d}'.format(len(self.adaptations)) tmp_dirs = [] hvite_tasks = [] hed_tasks = [] herest_tasks = [] real_scp_files = [scp_file] speakers = [""] models = [self.model] if self.scp is None: l = len(self.split_scp_models[0][0]) real_scp_files = [scp_file.replace('?'*l, sp[0]) for sp in self.split_scp_models] speakers = [sp[0] for sp in self.split_scp_models] models = [sp[2] for sp in self.split_scp_models] for scp_file, speaker, model in izip(real_scp_files,speakers,models): tmp_dir = System.get_global_temp_dir() tmp_dirs.append(tmp_dir) phone_mlf = os.path.join(tmp_dir,'phone.mlf') tmp_scp_file = os.path.join(tmp_dir,'adap.scp') with open(tmp_scp_file,'w') as tmp_desc: smap = {} for line in open(scp_file): if basename(line)[:num_speaker_chars] not in smap: smap[basename(line)[:num_speaker_chars]] = [] smap[basename(line)[:num_speaker_chars]].append(line.strip()) for sp,f in smap.iteritems(): shuffle(f) for line in f: #for line in open(scp_file): if not line.startswith('/'): print(os.path.join(os.path.dirname(scp_file), line.strip()),file=tmp_desc) else: print(line.strip(),file=tmp_desc) tmp_config = os.path.join(tmp_dir,'hvite_config') with open(tmp_config,'w') as tmp_desc: print(htk_file_strings.HVITE_CONFIG, file=tmp_desc) hvite_tasks.append(HVite(self.htk_config,tmp_scp_file,model+'.mmf',self.adap_align_dict,model+'.hmmlist',phone_mlf,mlf_file,config_file=tmp_config)) in_transform = [] parent_transform = None if len(self.adaptations) > 0: in_transform = [self.adaptations[-1]] in_transform.append((self.classes_dir,None)) if len(in_transform) > 1: parent_transform = in_transform adap_config = os.path.join(tmp_dir, 'adap_config') if num_nodes == 1: # global adaptation global_name = 'global{0:d}'.format(len(self.adaptations)) global_file = os.path.join(self.classes_dir,global_name) with open(global_file, 'w') as global_desc: print(htk_file_strings.GLOBAL.format(global_name=global_name),file=global_desc) with open(adap_config, 'w') as adap_desc: print(htk_file_strings.BASE_ADAP_CONFIG.format(base_class=global_name),file=adap_desc) if self.adap_num_speaker_chars is not None: mask = "*/" + ('%' * self.adap_num_speaker_chars) + "*.*" print("PAXFORMMASK = {mask:>s}\nINXFORMMASK = {mask:>s}".format(mask=mask),file=adap_desc) else: # tree adaptation regtree_name = '{0:>s}regtree{1:d}'.format(speaker,len(self.adaptations)) regtree_hed = os.path.join(tmp_dir,'regtree.hed') with open(regtree_hed,'w') as regtree_desc: print(htk_file_strings.REGTREE_HED.format(stats_file=model+'.stats',num_nodes=num_nodes,regtree=regtree_name),file=regtree_desc) hed_tasks.append(HHEd(self.htk_config,model+'.mmf',self.classes_dir,model+'.hmmlist',regtree_hed)) with open(adap_config, 'w') as adap_desc: print(htk_file_strings.TREE_ADAP_CONFIG.format(regtree=os.path.join(self.classes_dir,regtree_name)+'.tree'),file=adap_desc) if self.adap_num_speaker_chars is not None: mask = "*/" + ('%' * self.adap_num_speaker_chars) + "*.*" print("PAXFORMMASK = {mask:>s}\nINXFORMMASK = {mask:>s}".format(mask=mask),file=adap_desc) if self.htk_config.split_threshold is not 1000: print("HADAPT:SPLITTHRESH = {0:.1f}".format(float(self.htk_config.split_threshold)), file=adap_desc) herest_tasks.append(HERest(self.htk_config,tmp_scp_file,model+'.mmf',model+'.hmmlist',phone_mlf,config_file=adap_config, num_speaker_chars=num_speaker_chars, max_adap_sentences=files_per_speaker, input_adaptation=in_transform,parent_adaptation=parent_transform,output_adaptation=(self.xforms_dir,new_extension))) if len(hvite_tasks) == 1: hvite_tasks[0].run() elif len(hvite_tasks) > 1: CollectionJob(hvite_tasks).run() if len(hed_tasks) == 1: hed_tasks[0].run() elif len(hed_tasks) > 1: CollectionJob(hed_tasks).run() if len(herest_tasks) == 1: herest_tasks[0].run() elif len(herest_tasks) > 1: CollectionJob(herest_tasks).run() self.adaptations.append((self.xforms_dir,new_extension)) self.adap_num_speaker_chars = num_speaker_chars [shutil.rmtree(tmp_dir,ignore_errors=True) for tmp_dir in tmp_dirs]