current_step += 1 if current_step >= options.step: logger.info("Start step: %d (%s)" % (current_step, 'Rescoring lattices with lattice-tool')) if os.path.exists(baseline_lat_dir_rescored): shutil.rmtree(baseline_lat_dir_rescored) htk.lattice_rescore(current_step, baseline_lat_dir, baseline_lat_dir_rescored, lm_rescore + '.gz', lm_scale) current_step += 1 if current_step >= options.step: logger.info("Start step: %d (%s)" % (current_step, 'Decoding lattices with lattice-tool')) htk.lattice_decode(current_step, baseline_lat_dir_rescored, baseline_pass2_mlf, lm_scale) data_manipulation.mlf_to_trn(baseline_pass1_mlf, pass1_trn, speaker_name_width) data_manipulation.mlf_to_trn(baseline_pass2_mlf, pass2_trn, speaker_name_width) unsupsi_dir = 'unsup_si' unsupsi_lat_dir = unsupsi_dir + '/lattices.htk' unsupsi_lat_dir_rescored = unsupsi_dir + '/lattices.rescored' if 'unsupsi' in experiments: adapt_mlf = unsupsi_dir + '/adapt.mlf' pass1_mlf = unsupsi_dir + '/pass1.mlf' pass2_mlf = unsupsi_dir + '/pass2.mlf'
def run(self): # with open(self.name + '/outtest', 'w') as o: # print >> o, self.configuration work_dir = self.name xforms_dir = work_dir + '/xforms' classes_dir = work_dir + '/classes' files_dir = work_dir + '/files' model = self.model.configuration['model_dir'] + '/' + self.configuration['model_name'] recog_scp = work_dir +"/recog.scp" rscp = self.model.configuration['recognize_scp'] if not self.configuration['recognize_scp'].startswith('_'): rscp = self.configuration['recognize_scp'] data_manipulation.copy_scp_file(rscp, recog_scp) #shutil.copyfile(self.model.configuration['recognize_scp'], recog_scp) #dict_hvite = self.model.configuration['dict_hvite'] dict_hdecode = self.model.configuration['dict_hdecode'] tiedlist = self.model.configuration['tiedlist'] lm = self.model.configuration['lm'] lm_rescore = self.model.configuration['lm_rescore'] lm_scale = self.configuration['lm_scale'] beam = self.configuration['beam'] end_beam = self.configuration['end_beam'] max_pruning = self.configuration['max_pruning'] num_tokens = self.configuration['num_tokens'] hdecode_mlf = work_dir + '/hdecode.mlf' rescore_mlf = work_dir + '/rescore.mlf' recog_trn = work_dir + '/recog.trn' configs = [self.model.configuration['config']] speaker_name_width = self.model.configuration['speaker_name_width'] # adaptation_dir=work_dir + '/xforms' try: #if os.path.exists(work_dir): shutil.rmtree(work_dir) htk_lat_dir = os.path.join(work_dir, 'lattices.htk') rescore_lat_dir = os.path.join(work_dir, 'lattices.rescore') log_dir = os.path.join(work_dir, 'log') for dir in [work_dir, htk_lat_dir, rescore_lat_dir, log_dir, xforms_dir, classes_dir, files_dir]: if not os.path.exists(dir): os.mkdir(dir) current_parent_transform = None for i, adaptation in enumerate(self.adaptations): current_parent_transform = adaptation.make_adaptation(i,current_parent_transform,self) adap_dirs = None speaker_name_width = self.model.configuration['speaker_name_width'] if current_parent_transform is not None: configs.append(current_parent_transform[1]) extension = current_parent_transform[0] adap_dirs = [(xforms_dir, extension),(classes_dir, None)] speaker_name_width = current_parent_transform[2] print "Start step: %d (%s)" % (0, 'Generating lattices with HDecode') htk.HDecode(log_dir, recog_scp, model, dict_hdecode, tiedlist, lm, htk_lat_dir, num_tokens, hdecode_mlf, configs, lm_scale, beam, end_beam, max_pruning, adap_dirs, speaker_name_width) if lm_rescore is not None: print "Start step: %d (%s)" % (0, 'Rescoring lattices with lattice-tool') htk.lattice_rescore(log_dir, htk_lat_dir, rescore_lat_dir, lm_rescore + '.gz', lm_scale) print "Start step: %d (%s)" % (0, 'Decoding lattices with lattice-tool') htk.lattice_decode(log_dir, rescore_lat_dir, rescore_mlf, lm_scale) data_manipulation.mlf_to_trn(rescore_mlf, recog_trn, self.model.configuration['speaker_name_width']) else: with open(rescore_mlf, 'w') as out_file: prev = "" for line in open(hdecode_mlf): if line.rstrip() == '.' and prev != '</s>': print >> out_file, "</s>" print >> out_file, line.rstrip().replace(htk_lat_dir, '*') prev = line.rstrip() #shutil.copyfile(hdecode_mlf, rescore_mlf) data_manipulation.mlf_to_trn(hdecode_mlf, recog_trn, self.model.configuration['speaker_name_width']) self.done = True return except Experiment: self.done = False pass
logger.info("Start step: %d (%s)" % (current_step, 'Generating lattices with HDecode')) if os.path.exists(baseline_lat_dir): shutil.rmtree(baseline_lat_dir) os.mkdir(baseline_lat_dir) htk.HDecode(current_step, scp_file, si_model, dict_hdecode, phones_list, lm, baseline_lat_dir, num_tokens, baseline_pass1_mlf, [config_hdecode], lm_scale, beam, end_beam, max_pruning) current_step += 1 if current_step >= options.step: logger.info("Start step: %d (%s)" % (current_step, 'Rescoring lattices with lattice-tool')) if os.path.exists(baseline_lat_dir_rescored): shutil.rmtree(baseline_lat_dir_rescored) htk.lattice_rescore(current_step, baseline_lat_dir, baseline_lat_dir_rescored, lm_rescore + '.gz', lm_scale) current_step += 1 if current_step >= options.step: logger.info("Start step: %d (%s)" % (current_step, 'Decoding lattices with lattice-tool')) htk.lattice_decode(current_step,baseline_lat_dir_rescored, baseline_pass2_mlf, lm_scale) data_manipulation.mlf_to_trn(baseline_pass1_mlf, pass1_trn, speaker_name_width) data_manipulation.mlf_to_trn(baseline_pass2_mlf, pass2_trn, speaker_name_width) unsupsi_dir = 'unsup_si' unsupsi_lat_dir = unsupsi_dir + '/lattices.htk' unsupsi_lat_dir_rescored = unsupsi_dir + '/lattices.rescored' if 'unsupsi' in experiments: adapt_mlf = unsupsi_dir + '/adapt.mlf' pass1_mlf = unsupsi_dir + '/pass1.mlf' pass2_mlf = unsupsi_dir + '/pass2.mlf' pass1_trn = unsupsi_dir + '/pass1.trn'
def run(self): # with open(self.name + '/outtest', 'w') as o: # print >> o, self.configuration work_dir = self.name xforms_dir = work_dir + '/xforms' classes_dir = work_dir + '/classes' files_dir = work_dir + '/files' model = self.model.configuration[ 'model_dir'] + '/' + self.configuration['model_name'] recog_scp = work_dir + "/recog.scp" rscp = self.model.configuration['recognize_scp'] if not self.configuration['recognize_scp'].startswith('_'): rscp = self.configuration['recognize_scp'] data_manipulation.copy_scp_file(rscp, recog_scp) #shutil.copyfile(self.model.configuration['recognize_scp'], recog_scp) #dict_hvite = self.model.configuration['dict_hvite'] dict_hdecode = self.model.configuration['dict_hdecode'] tiedlist = self.model.configuration['tiedlist'] lm = self.model.configuration['lm'] lm_rescore = self.model.configuration['lm_rescore'] lm_scale = self.configuration['lm_scale'] beam = self.configuration['beam'] end_beam = self.configuration['end_beam'] max_pruning = self.configuration['max_pruning'] num_tokens = self.configuration['num_tokens'] hdecode_mlf = work_dir + '/hdecode.mlf' rescore_mlf = work_dir + '/rescore.mlf' recog_trn = work_dir + '/recog.trn' configs = [self.model.configuration['config']] speaker_name_width = self.model.configuration['speaker_name_width'] # adaptation_dir=work_dir + '/xforms' try: #if os.path.exists(work_dir): shutil.rmtree(work_dir) htk_lat_dir = os.path.join(work_dir, 'lattices.htk') rescore_lat_dir = os.path.join(work_dir, 'lattices.rescore') log_dir = os.path.join(work_dir, 'log') for dir in [ work_dir, htk_lat_dir, rescore_lat_dir, log_dir, xforms_dir, classes_dir, files_dir ]: if not os.path.exists(dir): os.mkdir(dir) current_parent_transform = None for i, adaptation in enumerate(self.adaptations): current_parent_transform = adaptation.make_adaptation( i, current_parent_transform, self) adap_dirs = None speaker_name_width = self.model.configuration['speaker_name_width'] if current_parent_transform is not None: configs.append(current_parent_transform[1]) extension = current_parent_transform[0] adap_dirs = [(xforms_dir, extension), (classes_dir, None)] speaker_name_width = current_parent_transform[2] print "Start step: %d (%s)" % (0, 'Generating lattices with HDecode') htk.HDecode(log_dir, recog_scp, model, dict_hdecode, tiedlist, lm, htk_lat_dir, num_tokens, hdecode_mlf, configs, lm_scale, beam, end_beam, max_pruning, adap_dirs, speaker_name_width) if lm_rescore is not None: print "Start step: %d (%s)" % ( 0, 'Rescoring lattices with lattice-tool') htk.lattice_rescore(log_dir, htk_lat_dir, rescore_lat_dir, lm_rescore + '.gz', lm_scale) print "Start step: %d (%s)" % ( 0, 'Decoding lattices with lattice-tool') htk.lattice_decode(log_dir, rescore_lat_dir, rescore_mlf, lm_scale) data_manipulation.mlf_to_trn( rescore_mlf, recog_trn, self.model.configuration['speaker_name_width']) else: with open(rescore_mlf, 'w') as out_file: prev = "" for line in open(hdecode_mlf): if line.rstrip() == '.' and prev != '</s>': print >> out_file, "</s>" print >> out_file, line.rstrip().replace( htk_lat_dir, '*') prev = line.rstrip() #shutil.copyfile(hdecode_mlf, rescore_mlf) data_manipulation.mlf_to_trn( hdecode_mlf, recog_trn, self.model.configuration['speaker_name_width']) self.done = True return except Experiment: self.done = False pass