def train_hlda(model_list, n_nuisance_dims, training_list, lab_dir, model_dir, min_var_macro=None, n_states=1):
    '''
    Estimate HLDA transformation by following the guidelines in the HTK
    tutorial (HTKBook, p.49 'Semi-Tied and HLDA transforms'). Found that
    HLDA estimation in HTK v3.4-1 only works
    '''
    # Find the number of GMM components
    model_file = os.path.join(model_dir,'newMacros')
    assert(os.path.exists(model_file))
    m_file = open(model_file,'r')
    n_comps = 0
    for ln in m_file:
        m = re.match('<NUMMIXES>\s+(\d+)',ln)
        if m:
            n_comps = m.group(1)
            break
    if n_comps==0:
        raise Exception

    # Write global class file
    class_filename = os.path.join(model_dir,'global')
    class_file = open(class_filename,'w')
    class_file.write(get_global_class_string(n_comps, n_states))
    class_file.close()

    # Write configuration file
    config_filename = os.path.join(model_dir,'config.hlda')
    hlda_conf_map = config_hlda(n_nuisance_dims, max_iterations)
    write_htk_config(hlda_conf_map, config_filename)

    # Call HERest to estimate the HLDA transform
    args = ['-H',os.path.join(model_dir,'newMacros'),'-u','s',
            '-C',config_filename,'-L',lab_dir,'-K',model_dir,
            '-J',model_dir,'-S',training_list,'-M',model_dir,model_list]
    if min_var_macro != None and os.path.exists(min_var_macro):
        args.insert(0, '-H')
        args.insert(1, min_var_macro)
    herest(args)

    # Reiterate after the HLDA estimation
    min_var_macro = os.path.join(model_dir,'vFloors')
    if min_var_macro != None and os.path.exists(min_var_macro):
        args = ['-H', os.path.join(model_dir,'newMacros'),'-C', config_filename,
                '-L', lab_dir, '-S', training_list, '-H', min_var_macro,
                '-M', model_dir, model_list]
    else:
        args = ['-H', os.path.join(model_dir,'newMacros'),'-C', config_filename,
                '-L', lab_dir, '-S', training_list, '-M', model_dir, model_list]

    for _ in range(1, 3):
        herest(args)
Exemple #2
0
def train_gmm_set(
    model_list,
    training_list,
    lab_dir,
    model_dir,
    orig_model_file,
    n_train_iterations=10,
    min_var_macro=None,
    update_transitions=False,
):
    """
    Train GMM files for different classes.

    Input:
    model_list : file with names of the models to be trained (one per line)
                 the model names should match the labels in the annotations
    training_list : list of training feature files (absolute paths)
    orig_dir : directory where the initial model files lie (HTK mmf-formatted file is required)
    model_dir : directory where the output models will be stored
    n_train_iterations : number of Baum-Welch iterations
    min_var_macro : file with the minimum variance macros (vFloors)
    """
    if min_var_macro != None and os.path.exists(min_var_macro):
        args = [
            "-u",
            "mvwt",
            "-S",
            training_list,
            "-L",
            lab_dir,
            "-H",
            orig_model_file,
            "-H",
            min_var_macro,
            "-M",
            model_dir,
            "-m",
            str(1),
            model_list,
        ]
    else:
        args = [
            "-u",
            "mvwt",
            "-S",
            training_list,
            "-L",
            lab_dir,
            "-H",
            orig_model_file,
            "-M",
            model_dir,
            "-m",
            str(1),
            model_list,
        ]

    herest.herest(args)
    model_file = os.path.join(model_dir, "newMacros")
    if not os.path.exists(model_file):
        model_file = os.path.join(model_dir, "hmmdef")
    assert os.path.exists(model_file)

    # print "Trained for a single iteration"

    if min_var_macro != None and os.path.exists(min_var_macro):
        args = [
            "-u",
            "mvwt",
            "-S",
            training_list,
            "-L",
            lab_dir,
            "-H",
            model_file,
            "-H",
            min_var_macro,
            "-M",
            model_dir,
            "-m",
            str(1),
            model_list,
        ]
    else:
        args = [
            "-u",
            "mvwt",
            "-S",
            training_list,
            "-L",
            lab_dir,
            "-H",
            model_file,
            "-M",
            model_dir,
            "-m",
            str(1),
            model_list,
        ]

    # Ideally a convergence criterion should be applied instead
    for iter in range(1, n_train_iterations):
        # print "Iteration {0}/{1}".format(iter, n_train_iterations)
        # print args
        herest.herest(args)