def train_gmm_set(model_list, training_list, lab_dir, model_dir, orig_model_file, n_train_iterations=10, min_var_macro=None, update_transitions=False): ''' Train GMM files for different classes. Input: model_list : file with names of the models to be trained (one per line) the model names should match the labels in the annotations training_list : list of training feature files (absolute paths) orig_dir : directory where the initial model files lie (HTK mmf-formatted file is required) model_dir : directory where the output models will be stored n_train_iterations : number of Baum-Welch iterations min_var_macro : file with the minimum variance macros (vFloors) ''' if min_var_macro != None and os.path.exists(min_var_macro): args = [ '-u', 'mvwt', '-S', training_list, '-L', lab_dir, '-H', orig_model_file, '-H', min_var_macro, '-M', model_dir, '-m', str(1), model_list ] else: args = [ '-u', 'mvwt', '-S', training_list, '-L', lab_dir, '-H', orig_model_file, '-M', model_dir, '-m', str(1), model_list ] herest.herest(args) model_file = os.path.join(model_dir, 'newMacros') if not os.path.exists(model_file): model_file = os.path.join(model_dir, 'hmmdef') assert (os.path.exists(model_file)) #print "Trained for a single iteration" if min_var_macro != None and os.path.exists(min_var_macro): args = [ '-u', 'mvwt', '-S', training_list, '-L', lab_dir, '-H', model_file, '-H', min_var_macro, '-M', model_dir, '-m', str(1), model_list ] else: args = [ '-u', 'mvwt', '-S', training_list, '-L', lab_dir, '-H', model_file, '-M', model_dir, '-m', str(1), model_list ] # Ideally a convergence criterion should be applied instead for iter in range(1, n_train_iterations): #print "Iteration {0}/{1}".format(iter, n_train_iterations) #print args herest.herest(args)
def train_hlda(model_list, n_nuisance_dims, training_list, lab_dir, model_dir, min_var_macro=None, n_states=1): ''' Estimate HLDA transformation by following the guidelines in the HTK tutorial (HTKBook, p.49 'Semi-Tied and HLDA transforms'). Found that HLDA estimation in HTK v3.4-1 only works ''' # Find the number of GMM components model_file = os.path.join(model_dir,'newMacros') assert(os.path.exists(model_file)) m_file = open(model_file,'r') n_comps = 0 for ln in m_file: m = re.match('<NUMMIXES>\s+(\d+)',ln) if m: n_comps = m.group(1) break if n_comps==0: raise Exception # Write global class file class_filename = os.path.join(model_dir,'global') class_file = open(class_filename,'w') class_file.write(get_global_class_string(n_comps, n_states)) class_file.close() # Write configuration file config_filename = os.path.join(model_dir,'config.hlda') hlda_conf_map = config_hlda(n_nuisance_dims, max_iterations) write_htk_config(hlda_conf_map, config_filename) # Call HERest to estimate the HLDA transform args = ['-H',os.path.join(model_dir,'newMacros'),'-u','s', '-C',config_filename,'-L',lab_dir,'-K',model_dir, '-J',model_dir,'-S',training_list,'-M',model_dir,model_list] if min_var_macro != None and os.path.exists(min_var_macro): args.insert(0, '-H') args.insert(1, min_var_macro) herest(args) # Reiterate after the HLDA estimation min_var_macro = os.path.join(model_dir,'vFloors') if min_var_macro != None and os.path.exists(min_var_macro): args = ['-H', os.path.join(model_dir,'newMacros'),'-C', config_filename, '-L', lab_dir, '-S', training_list, '-H', min_var_macro, '-M', model_dir, model_list] else: args = ['-H', os.path.join(model_dir,'newMacros'),'-C', config_filename, '-L', lab_dir, '-S', training_list, '-M', model_dir, model_list] for _ in range(1, 3): herest(args)
def train_gmm_set( model_list, training_list, lab_dir, model_dir, orig_model_file, n_train_iterations=10, min_var_macro=None, update_transitions=False, ): """ Train GMM files for different classes. Input: model_list : file with names of the models to be trained (one per line) the model names should match the labels in the annotations training_list : list of training feature files (absolute paths) orig_dir : directory where the initial model files lie (HTK mmf-formatted file is required) model_dir : directory where the output models will be stored n_train_iterations : number of Baum-Welch iterations min_var_macro : file with the minimum variance macros (vFloors) """ if min_var_macro != None and os.path.exists(min_var_macro): args = [ "-u", "mvwt", "-S", training_list, "-L", lab_dir, "-H", orig_model_file, "-H", min_var_macro, "-M", model_dir, "-m", str(1), model_list, ] else: args = [ "-u", "mvwt", "-S", training_list, "-L", lab_dir, "-H", orig_model_file, "-M", model_dir, "-m", str(1), model_list, ] herest.herest(args) model_file = os.path.join(model_dir, "newMacros") if not os.path.exists(model_file): model_file = os.path.join(model_dir, "hmmdef") assert os.path.exists(model_file) # print "Trained for a single iteration" if min_var_macro != None and os.path.exists(min_var_macro): args = [ "-u", "mvwt", "-S", training_list, "-L", lab_dir, "-H", model_file, "-H", min_var_macro, "-M", model_dir, "-m", str(1), model_list, ] else: args = [ "-u", "mvwt", "-S", training_list, "-L", lab_dir, "-H", model_file, "-M", model_dir, "-m", str(1), model_list, ] # Ideally a convergence criterion should be applied instead for iter in range(1, n_train_iterations): # print "Iteration {0}/{1}".format(iter, n_train_iterations) # print args herest.herest(args)