def initialize_new(self, scp_list, word_mlf, dict, remove_previous=False): System.set_log_dir(self.name) if remove_previous: for f in glob.iglob(System.get_log_dir() + '/*'): os.remove(f) if not remove_previous and ( os.path.exists(self.train_files_dir) or len(glob.glob(self.model_dir + '/' + self.name + '.*')) > 0): raise ExistingFilesException if os.path.exists(self.train_files_dir): shutil.rmtree(self.train_files_dir) for f in glob.iglob(self.model_dir + '/' + self.name + '.*'): os.remove(f) os.mkdir(self.train_files_dir) # handle dictionary dic = HTK_dictionary() if isinstance(dict, basestring): dic.read_dict(dict) elif all(isinstance(d, basestring) for d in dict): for d in dict: dic.read_dict(d) else: raise TypeError dic.write_dict(self.training_dict) self.phones = dic.get_phones() # handle transcription trans = HTK_transcription() # if isinstance(word_mlf,basestring): # trans.read_mlf(word_mlf, HTK_transcription.WORD) # elif all(isinstance(w,basestring) for w in word_mlf): # for w in word_mlf: # trans.read_mlf(w, HTK_transcription.WORD) # else: # raise TypeError word_mlf = word_mlf.strip().split(',') for w in word_mlf: trans.read_mlf(w, HTK_transcription.WORD) self.id = 1 phones_list = self._get_model_name_id() + '.hmmlist' with open(phones_list, 'w') as phones_desc: for p in self.phones: print(p, file=phones_desc) # handle scp files scp_list = scp_list.strip().split(',') # if isinstance(scp_list,basestring): # scp_list = [scp_list] real_trans = HTK_transcription() real_trans.transcriptions[real_trans.WORD] = {} with open(self.training_scp, 'w') as scp_desc: for scp in scp_list: for file in open(scp): id = os.path.splitext(os.path.basename(file.strip()))[0] if not file.startswith('/'): file = os.path.join(os.path.dirname(scp), file.strip()) ok = True for word in trans.transcriptions[ HTK_transcription.WORD][id]: if not dic.word_in_dict(word): print("%s skipped, because has missing word %s" % (file.strip(), word)) ok = False break if ok: print(file.strip(), file=scp_desc) real_trans.transcriptions[real_trans.WORD][ id] = trans.transcriptions[real_trans.WORD][id] real_trans.write_mlf(self.training_word_mlf, target=HTK_transcription.WORD) self.expand_word_transcription()
def __init__(self, htk_config, name, model, scp, dictionary, language_model): if not name.startswith('/'): name = os.path.join(os.getcwd(), name) if htk_config.num_speaker_chars < 0: htk_config.num_speaker_chars = 3 self.name = name if os.path.exists(name): shutil.rmtree(name, ignore_errors=True) os.mkdir(name) self.a_id = 0 self.xforms_dir = os.path.join(name, 'xforms%d' % self.a_id) os.mkdir(self.xforms_dir) self.classes_dir = os.path.join(name, 'classes%d' % self.a_id) os.mkdir(self.classes_dir) self.model = model self.split_scp_models = [] if '?' in scp: self.scp = None num_scp_speaker_chars = 1 while '?' * (num_scp_speaker_chars + 1) in scp: num_scp_speaker_chars += 1 s_index = scp.find('?' * num_scp_speaker_chars) speakers = [ s[s_index:s_index + num_scp_speaker_chars] for s in glob.iglob(scp) ] for s in speakers: real_scp = os.path.join(name, '%s_list.scp' % s) with open(real_scp, 'w') as scp_desc: for line in open( scp.replace('?' * num_scp_speaker_chars, s)): print(os.path.join(os.path.dirname(scp), line.strip()), file=scp_desc) self.split_scp_models.append( (s, real_scp, model.replace('?' * num_scp_speaker_chars, s))) else: self.scp = os.path.join(name, 'list.scp') with open(self.scp, 'w') as scp_desc: for line in open(scp): print(os.path.join(os.path.dirname(scp), line.strip()), file=scp_desc) self.dict = dictionary d = HTK_dictionary() d.read_dict(dictionary) self.dict = os.path.join(name, 'dict.hdecode') d.write_dict(self.dict, False) d = HTK_dictionary() d.read_dict(htk_config.adap_align_dict) self.adap_align_dict = os.path.join(name, 'dict.hvite') d.write_dict(self.adap_align_dict, True) self.language_model = language_model self.htk_config = htk_config self.adaptations = [] self.adap_num_speaker_chars = None self.id = 0 System.set_log_dir(os.path.basename(name))
def __init__(self, htk_config, name, model, scp, dictionary, language_model): if not name.startswith('/'): name = os.path.join(os.getcwd(), name) if htk_config.num_speaker_chars < 0: htk_config.num_speaker_chars = 3 self.name = name if os.path.exists(name): shutil.rmtree(name,ignore_errors=True) os.mkdir(name) self.a_id = 0 self.xforms_dir = os.path.join(name,'xforms%d'%self.a_id) os.mkdir(self.xforms_dir) self.classes_dir = os.path.join(name,'classes%d'%self.a_id) os.mkdir(self.classes_dir) self.model = model self.split_scp_models = [] if '?' in scp: self.scp = None num_scp_speaker_chars = 1 while '?' * (num_scp_speaker_chars + 1) in scp: num_scp_speaker_chars += 1 s_index = scp.find('?' * num_scp_speaker_chars) speakers = [s[s_index:s_index+num_scp_speaker_chars] for s in glob.iglob(scp)] for s in speakers: real_scp = os.path.join(name,'%s_list.scp'%s) with open(real_scp, 'w') as scp_desc: for line in open(scp.replace('?' * num_scp_speaker_chars, s)): print(os.path.join(os.path.dirname(scp), line.strip()),file=scp_desc) self.split_scp_models.append( (s,real_scp,model.replace('?' * num_scp_speaker_chars, s)) ) else: self.scp = os.path.join(name,'list.scp') with open(self.scp, 'w') as scp_desc: for line in open(scp): print(os.path.join(os.path.dirname(scp), line.strip()),file=scp_desc) self.dict = dictionary d = HTK_dictionary() d.read_dict(dictionary) self.dict = os.path.join(name, 'dict.hdecode') d.write_dict(self.dict,False) d = HTK_dictionary() d.read_dict(htk_config.adap_align_dict) self.adap_align_dict = os.path.join(name, 'dict.hvite') d.write_dict(self.adap_align_dict,True) self.language_model = language_model self.htk_config = htk_config self.adaptations = [] self.adap_num_speaker_chars = None self.id = 0 System.set_log_dir(os.path.basename(name))