Esempio n. 1
0
        map_file_to_start_end = {}
        mfcc_file_name = 'tmp_allen_mfcc_' + str(int(input_n_frames)) + '.npy'
        map_mfcc_file_name = 'tmp_allen_map_file_to_start_end_' + str(
            int(input_n_frames)) + '.pickle'
        try:
            print("loading concat MFCC from pickled file")
            with open(mfcc_file_name) as concat_mfcc:
                all_mfcc = np.load(concat_mfcc)
            with open(map_mfcc_file_name) as map_mfcc:
                map_file_to_start_end = pickle.load(map_mfcc)
        except:
            for ind, mfcc_file in enumerate(list_of_mfcc_files):
                start = all_mfcc.shape[0]
                x = htkmfc.open(mfcc_file).getall()
                if input_n_frames > 1:
                    x = padding(input_n_frames, x)
                all_mfcc = np.append(all_mfcc, x, axis=0)
                map_file_to_start_end[mfcc_file] = (start, all_mfcc.shape[0])
                print("did", mfcc_file, "ind", ind)
            with open(mfcc_file_name, 'w') as concat_mfcc:
                np.save(concat_mfcc, all_mfcc)
            with open(map_mfcc_file_name, 'w') as map_mfcc:
                pickle.dump(map_file_to_start_end, map_mfcc)

        tmp_likelihoods = likelihoods_computer(all_mfcc)
        depth_1_likelihoods = depth_1_computer(all_mfcc)
        depth_2_likelihoods = depth_2_computer(all_mfcc)
        #depth_3_likelihoods = depth_1_computer(all_mfcc) TODO
        print(map_states_to_phones)
        print(dbn_phones_to_states)
        columns_remapping = [
Esempio n. 2
0
def process(ofname, iscpfname, ihmmfname, 
        ilmfname=None, iwdnetfname=None, unibifname=None, 
        idbnfname=None, idbndictstuple=None):

    with open(ihmmfname) as ihmmf:
        n_states, transitions, gmms = parse_hmm(ihmmf)

    gmms_ = precompute_det_inv(gmms)
    map_states_to_phones = phones_mapping(gmms)
    likelihoods_computer = functools.partial(compute_likelihoods, gmms_)
    gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE

    dbn = None
    dbn_to_int_to_state_tuple = None
    if idbnfname != None:
        with open(idbnfname) as idbnf:
            dbn = cPickle.load(idbnf)
        with open(idbndictstuple) as idbndtf:
            dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)
        # like that = for GRBM first layer (normalize=True, unit=False)
        # TODO correct the normalize/unit to work on full test dataset

    if iwdnetfname != None:
        with open(iwdnetfname) as iwdnf:
            transitions = parse_wdnet(transitions, iwdnf) # parse wordnet
    elif ilmfname != None:
        with open(ilmfname) as ilmf:
            if MATRIX_BIGRAM:
                transitions = parse_lm_matrix(transitions, ilmf) # parse bigram LM in matrix format in ilmf
            else:
                transitions = parse_lm(transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf
    elif unibifname != None: # our own unigram and bigram counts,
                             # c.f. src/produce_LM.py
        with open(unibifname) as ubf:
            transitions = initialize_transitions(transitions, ubf, 
                    unigrams_only=UNIGRAMS_ONLY)
    else:
        # uniform transitions between phones
        transitions = initialize_transitions(transitions)
    transitions = penalty_scale(transitions, 
            insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR)


    dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes
    
    if dbn != None:
        input_n_frames_mfcc = dbn.rbm_layers[0].n_visible / 39 # TODO generalize
        print "this is a DBN with", input_n_frames_mfcc, "MFCC frames"
        input_n_frames_arti = dbn.rbm_layers[1].n_visible / 59 # 60 # TODO generalize
        print "this is a DBN with", input_n_frames_arti, "articulatory frames"
        input_file_name = 'tmp_input_mocha.npy'
        map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle'
        try: # TODO remove?
            print "loading concat MFCC from pickled file"
            with open(input_file_name) as concat:
                all_input = np.load(concat)
            with open(map_input_file_name) as map_input:
                map_file_to_start_end = cPickle.load(map_input)
        except:
            print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate
            all_input = np.ndarray((0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32')
            map_file_to_start_end = {}
            with open(iscpfname) as iscpf:
                for line in iscpf:
                    cline = clean(line)
                    start = all_input.shape[0]
                    # get the 1 framed signals
                    x_mfcc = htkmfc.open(cline).getall()
                    with open(cline[:-4] + '_ema.npy') as ema:
                        x_arti = np.load(ema)[:, 2:]
                    # compute deltas and deltas deltas for articulatory features
                    _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(x_mfcc, x_arti)
                    # add the adjacent frames
                    if input_n_frames_mfcc > 1:
                        x_mfcc = padding(input_n_frames_mfcc, x_mfcc)
                    if input_n_frames_arti > 1:
                        x_arti = padding(input_n_frames_arti, x_arti)
                    # do feature transformations if any
                    # TODO with mocha_timit_params.json params
                    # concatenate
                    x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1)
                    all_input = np.append(all_input, x_mfcc_arti, axis=0)
                    map_file_to_start_end[cline] = (start, all_input.shape[0])
            with open(input_file_name, 'w') as concat:
                np.save(concat, all_input)
            with open(map_input_file_name, 'w') as map_input:
                cPickle.dump(map_file_to_start_end, map_input)
    else: # GMM
        all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize

    print "computing likelihoods"
    if dbn != None: # TODO clean
        tmp_likelihoods = likelihoods_computer(all_input)
        #mean_dbns = np.mean(tmp_likelihoods, 0)
        #tmp_likelihoods *= (mean_gmms / mean_dbns)
        print tmp_likelihoods
        print tmp_likelihoods.shape
        columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])]
        print columns_remapping
        likelihoods = (tmp_likelihoods[:, columns_remapping],
            map_file_to_start_end)
        print likelihoods[0]
        print likelihoods[0].shape
    else:
        likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end)

    print "computing viterbi paths"
    list_mlf_string = []
    with open(iscpfname) as iscpf:
        il = InnerLoop(likelihoods,
                map_states_to_phones, transitions,
                using_bigram=(ilmfname != None 
                    or iwdnetfname != None 
                    or unibifname != None))
        p = Pool(cpu_count())
        list_mlf_string = p.map(il, iscpf)
    with open(ofname, 'w') as of:
        of.write('#!MLF!#\n')
        for line in list_mlf_string:
            of.write(line)
Esempio n. 3
0
    all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible), dtype='float32')
    map_file_to_start_end = {}
    mfcc_file_name = 'tmp_allen_mfcc_' + str(int(input_n_frames)) + '.npy'
    map_mfcc_file_name = 'tmp_allen_map_file_to_start_end_' + str(int(input_n_frames)) + '.pickle'
    try:
        print "loading concat MFCC from pickled file"
        with open(mfcc_file_name) as concat_mfcc:
            all_mfcc = np.load(concat_mfcc)
        with open(map_mfcc_file_name) as map_mfcc:
            map_file_to_start_end = cPickle.load(map_mfcc)
    except:
        for ind, mfcc_file in enumerate(list_of_mfcc_files):
            start = all_mfcc.shape[0]
            x = htkmfc.open(mfcc_file).getall()
            if input_n_frames > 1:
                x = padding(input_n_frames, x)
            all_mfcc = np.append(all_mfcc, x, axis=0)
            map_file_to_start_end[mfcc_file] = (start, all_mfcc.shape[0])
            print "did", mfcc_file, "ind", ind
        with open(mfcc_file_name, 'w') as concat_mfcc:
            np.save(concat_mfcc, all_mfcc)
        with open(map_mfcc_file_name, 'w') as map_mfcc:
            cPickle.dump(map_file_to_start_end, map_mfcc)

    tmp_likelihoods = likelihoods_computer(all_mfcc)
    columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])]
    likelihoods = (tmp_likelihoods[:, columns_remapping],
        map_file_to_start_end)
else:
    all_mfcc = np.ndarray((0, 39), dtype='float32')
    map_file_to_start_end = {}
Esempio n. 4
0
def process(ofname,
            iscpfname,
            ihmmfname,
            ilmfname=None,
            iwdnetfname=None,
            unibifname=None,
            idbnfname=None,
            idbndictstuple=None):

    with open(ihmmfname) as ihmmf:
        n_states, transitions, gmms = parse_hmm(ihmmf)

    gmms_ = precompute_det_inv(gmms)
    map_states_to_phones = phones_mapping(gmms)
    likelihoods_computer = functools.partial(compute_likelihoods, gmms_)
    gmm_likelihoods_computer = functools.partial(compute_likelihoods,
                                                 gmms_)  #TODO REMOVE

    dbn = None
    dbn_to_int_to_state_tuple = None
    if idbnfname != None:
        with open(idbnfname) as idbnf:
            dbn = cPickle.load(idbnf)
        with open(idbndictstuple) as idbndtf:
            dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)
        # like that = for GRBM first layer (normalize=True, unit=False)
        # TODO correct the normalize/unit to work on full test dataset

    if iwdnetfname != None:
        with open(iwdnetfname) as iwdnf:
            transitions = parse_wdnet(transitions, iwdnf)  # parse wordnet
    elif ilmfname != None:
        with open(ilmfname) as ilmf:
            if MATRIX_BIGRAM:
                transitions = parse_lm_matrix(
                    transitions,
                    ilmf)  # parse bigram LM in matrix format in ilmf
            else:
                transitions = parse_lm(
                    transitions, ilmf)  # parse bigram LM in ARPA-MIT in ilmf
    elif unibifname != None:  # our own unigram and bigram counts,
        # c.f. src/produce_LM.py
        with open(unibifname) as ubf:
            transitions = initialize_transitions(transitions,
                                                 ubf,
                                                 unigrams_only=UNIGRAMS_ONLY)
    else:
        # uniform transitions between phones
        transitions = initialize_transitions(transitions)
    transitions = penalty_scale(transitions,
                                insertion_penalty=INSERTION_PENALTY,
                                scale_factor=SCALE_FACTOR)

    dummy = np.ndarray((2, 2))  # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {})  # also for this compile's debug purposes

    if dbn != None:
        input_n_frames_mfcc = dbn.rbm_layers[
            0].n_visible / 39  # TODO generalize
        print "this is a DBN with", input_n_frames_mfcc, "MFCC frames"
        input_n_frames_arti = dbn.rbm_layers[
            1].n_visible / 59  # 60 # TODO generalize
        print "this is a DBN with", input_n_frames_arti, "articulatory frames"
        input_file_name = 'tmp_input_mocha.npy'
        map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle'
        try:  # TODO remove?
            print "loading concat MFCC from pickled file"
            with open(input_file_name) as concat:
                all_input = np.load(concat)
            with open(map_input_file_name) as map_input:
                map_file_to_start_end = cPickle.load(map_input)
        except:
            print "concatenating MFCC and articulatory files"  # TODO parallelize + use np.concatenate
            all_input = np.ndarray(
                (0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible),
                dtype='float32')
            map_file_to_start_end = {}
            with open(iscpfname) as iscpf:
                for line in iscpf:
                    cline = clean(line)
                    start = all_input.shape[0]
                    # get the 1 framed signals
                    x_mfcc = htkmfc.open(cline).getall()
                    with open(cline[:-4] + '_ema.npy') as ema:
                        x_arti = np.load(ema)[:, 2:]
                    # compute deltas and deltas deltas for articulatory features
                    _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(
                        x_mfcc, x_arti)
                    # add the adjacent frames
                    if input_n_frames_mfcc > 1:
                        x_mfcc = padding(input_n_frames_mfcc, x_mfcc)
                    if input_n_frames_arti > 1:
                        x_arti = padding(input_n_frames_arti, x_arti)
                    # do feature transformations if any
                    # TODO with mocha_timit_params.json params
                    # concatenate
                    x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1)
                    all_input = np.append(all_input, x_mfcc_arti, axis=0)
                    map_file_to_start_end[cline] = (start, all_input.shape[0])
            with open(input_file_name, 'w') as concat:
                np.save(concat, all_input)
            with open(map_input_file_name, 'w') as map_input:
                cPickle.dump(map_file_to_start_end, map_input)
    else:  # GMM
        all_mfcc = np.ndarray((0, 39), dtype='float32')  # TODO generalize

    print "computing likelihoods"
    if dbn != None:  # TODO clean
        tmp_likelihoods = likelihoods_computer(all_input)
        #mean_dbns = np.mean(tmp_likelihoods, 0)
        #tmp_likelihoods *= (mean_gmms / mean_dbns)
        print tmp_likelihoods
        print tmp_likelihoods.shape
        columns_remapping = [
            dbn_phones_to_states[map_states_to_phones[i]]
            for i in xrange(tmp_likelihoods.shape[1])
        ]
        print columns_remapping
        likelihoods = (tmp_likelihoods[:, columns_remapping],
                       map_file_to_start_end)
        print likelihoods[0]
        print likelihoods[0].shape
    else:
        likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end)

    print "computing viterbi paths"
    list_mlf_string = []
    with open(iscpfname) as iscpf:
        il = InnerLoop(likelihoods,
                       map_states_to_phones,
                       transitions,
                       using_bigram=(ilmfname != None or iwdnetfname != None
                                     or unibifname != None))
        p = Pool(cpu_count())
        list_mlf_string = p.map(il, iscpf)
    with open(ofname, 'w') as of:
        of.write('#!MLF!#\n')
        for line in list_mlf_string:
            of.write(line)