Example #1
0
def train_model(in_filename, mlf_filename, scp_filename, out_filename, scp_prefix_subst=None):
    # Read models and create managers
    with open(in_filename) as f:
        model_dict, gmm_mgr, hmm_mgr = read_acoustic_model(f, log_domain=True)

    # Some tricky business to make sure we have an epsilon Hmm to use later
    assert hmm_mgr.input_arity_set == hmm_mgr.output_arity_set
    assert len(hmm_mgr.input_arity_set) == 1
    in_out_arity = hmm_mgr.input_arity_set.pop()
    epsilon_index = hmm_mgr.add_epsilon_model(gmm_mgr, in_out_arity, log_domain=True)

    # Initialize managers for training
    gmm_mgr.set_adaptation_state("INITIALIZING")
    hmm_mgr.set_adaptation_state("INITIALIZING")
    gmm_mgr.clear_all_accumulators()

    utterance_num = [1]
    
    def train_one_utterance(labeled_data_event):
        print("Beginning training on utterance %d (%s) (%d frames)" % (utterance_num[0], labeled_data_event.name,
                                                                  labeled_data_event.data.shape[0]))
        sys.stdout.flush()
        label_lattice = labels_to_lattice(labeled_data_event.labels)
        # Build model lattice
        model_lattice = build_model_lattice(label_lattice, model_dict, epsilon_index)
        # Build training graph      
        training_graph = TrainingGraph(model_lattice, hmm_mgr, dict())
        # Train on data
        
        if gmm_mgr.get_adaptation_state() != "INITIALIZING":
            gmm_mgr.set_adaptation_state("INITIALIZING")
            hmm_mgr.set_adaptation_state("INITIALIZING")
        training_graph.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        hmm_mgr.set_adaptation_state("ACCUMULATING")

        training_graph.train_one_sequence(labeled_data_event.data)
        training_graph.end_training()
        print("Finished training on utterance %d (%s)" % (utterance_num[0], labeled_data_event.name))
        sys.stdout.flush()
        utterance_num[0] += 1

    # Do processing:
    with contextlib.nested(open(mlf_filename), open(scp_filename)) as (mlf_file, scp_file):
        process_htk_files(mlf_file, scp_file, train_one_utterance, scp_prefix_subst)

    # Finalize training
    gmm_mgr.set_adaptation_state("APPLYING")
    hmm_mgr.set_adaptation_state("APPLYING")
    gmm_mgr.apply_all_accumulators()
    hmm_mgr.apply_all_accumulators()
    gmm_mgr.set_adaptation_state("NOT_ADAPTING")
    hmm_mgr.set_adaptation_state("NOT_ADAPTING")

    # Write out models
    with open(out_filename, "w") as f:
        write_acoustic_model(model_dict, gmm_mgr, hmm_mgr, f)
Example #2
0
def train_model(in_filename,
                mlf_filename,
                scp_filename,
                out_filename,
                scp_prefix_subst=None):
    # Read models and create managers
    with open(in_filename) as f:
        model_dict, gmm_mgr, hmm_mgr = read_acoustic_model(f, log_domain=True)

    # Some tricky business to make sure we have an epsilon Hmm to use later
    assert hmm_mgr.input_arity_set == hmm_mgr.output_arity_set
    assert len(hmm_mgr.input_arity_set) == 1
    in_out_arity = hmm_mgr.input_arity_set.pop()
    epsilon_index = hmm_mgr.add_epsilon_model(gmm_mgr,
                                              in_out_arity,
                                              log_domain=True)

    # Initialize managers for training
    gmm_mgr.set_adaptation_state("INITIALIZING")
    hmm_mgr.set_adaptation_state("INITIALIZING")
    gmm_mgr.clear_all_accumulators()

    utterance_num = [1]

    def train_one_utterance(labeled_data_event):
        print("Beginning training on utterance %d (%s) (%d frames)" %
              (utterance_num[0], labeled_data_event.name,
               labeled_data_event.data.shape[0]))
        sys.stdout.flush()
        label_lattice = labels_to_lattice(labeled_data_event.labels)
        # Build model lattice
        model_lattice = build_model_lattice(label_lattice, model_dict,
                                            epsilon_index)
        # Build training graph
        training_graph = TrainingGraph(model_lattice, hmm_mgr, dict())
        # Train on data

        if gmm_mgr.get_adaptation_state() != "INITIALIZING":
            gmm_mgr.set_adaptation_state("INITIALIZING")
            hmm_mgr.set_adaptation_state("INITIALIZING")
        training_graph.begin_training()
        gmm_mgr.set_adaptation_state("ACCUMULATING")
        hmm_mgr.set_adaptation_state("ACCUMULATING")

        training_graph.train_one_sequence(labeled_data_event.data)
        training_graph.end_training()
        print("Finished training on utterance %d (%s)" %
              (utterance_num[0], labeled_data_event.name))
        sys.stdout.flush()
        utterance_num[0] += 1

    # Do processing:
    with contextlib.nested(open(mlf_filename),
                           open(scp_filename)) as (mlf_file, scp_file):
        process_htk_files(mlf_file, scp_file, train_one_utterance,
                          scp_prefix_subst)

    # Finalize training
    gmm_mgr.set_adaptation_state("APPLYING")
    hmm_mgr.set_adaptation_state("APPLYING")
    gmm_mgr.apply_all_accumulators()
    hmm_mgr.apply_all_accumulators()
    gmm_mgr.set_adaptation_state("NOT_ADAPTING")
    hmm_mgr.set_adaptation_state("NOT_ADAPTING")

    # Write out models
    with open(out_filename, "w") as f:
        write_acoustic_model(model_dict, gmm_mgr, hmm_mgr, f)
def htkmmf2native(htk_file_name, native_file_name):
     with open(htk_file_name) as f_in:
         models, hmm_mgr, gmm_mgr = read_htk_mmf_file(f_in)
     with open(native_file_name, 'wb') as f_out:
         write_acoustic_model(models, gmm_mgr, hmm_mgr, f_out)
Example #4
0
def htkmmf2native(htk_file_name, native_file_name):
    with open(htk_file_name) as f_in:
        models, hmm_mgr, gmm_mgr = read_htk_mmf_file(f_in)
    with open(native_file_name, 'wb') as f_out:
        write_acoustic_model(models, gmm_mgr, hmm_mgr, f_out)