Example #1
0
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        depth_1_computer = functools.partial(compute_likelihoods_dbn,
                                             dbn,
                                             depth=1)
        depth_2_computer = functools.partial(compute_likelihoods_dbn,
                                             dbn,
                                             depth=2)
        likelihoods_computer = functools.partial(compute_likelihoods_dbn,
                                                 dbn,
                                                 depth=None)

    # TODO bigrams
    transitions = initialize_transitions(transitions)
    #print transitions
    transitions = penalty_scale(transitions,
                                insertion_penalty=INSERTION_PENALTY,
                                scale_factor=SCALE_FACTOR)

    dummy = np.ndarray((2, 2))  # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {})  # also for this compile's debug purposes

    list_of_mfcc_files = []
    for d, ds, fs in os.walk(sys.argv[1]):
        for fname in fs:
            if fname[-4:] != '.mfc':
                continue
            fullname = d.rstrip('/') + '/' + fname
            list_of_mfcc_files.append(fullname)

    if dbn != None:
        input_n_frames = dbn.rbm_layers[0].n_visible / 39  # TODO generalize
Example #2
0
def process(ofname, iscpfname, ihmmfname, 
        ilmfname=None, iwdnetfname=None, unibifname=None, 
        idbnfname=None, idbndictstuple=None):

    with open(ihmmfname) as ihmmf:
        n_states, transitions, gmms = parse_hmm(ihmmf)

    gmms_ = precompute_det_inv(gmms)
    map_states_to_phones = phones_mapping(gmms)
    likelihoods_computer = functools.partial(compute_likelihoods, gmms_)
    gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE

    dbn = None
    dbn_to_int_to_state_tuple = None
    if idbnfname != None:
        with open(idbnfname) as idbnf:
            dbn = cPickle.load(idbnf)
        with open(idbndictstuple) as idbndtf:
            dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)
        # like that = for GRBM first layer (normalize=True, unit=False)
        # TODO correct the normalize/unit to work on full test dataset

    if iwdnetfname != None:
        with open(iwdnetfname) as iwdnf:
            transitions = parse_wdnet(transitions, iwdnf) # parse wordnet
    elif ilmfname != None:
        with open(ilmfname) as ilmf:
            if MATRIX_BIGRAM:
                transitions = parse_lm_matrix(transitions, ilmf) # parse bigram LM in matrix format in ilmf
            else:
                transitions = parse_lm(transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf
    elif unibifname != None: # our own unigram and bigram counts,
                             # c.f. src/produce_LM.py
        with open(unibifname) as ubf:
            transitions = initialize_transitions(transitions, ubf, 
                    unigrams_only=UNIGRAMS_ONLY)
    else:
        # uniform transitions between phones
        transitions = initialize_transitions(transitions)
    transitions = penalty_scale(transitions, 
            insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR)


    dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes
    
    if dbn != None:
        input_n_frames_mfcc = dbn.rbm_layers[0].n_visible / 39 # TODO generalize
        print "this is a DBN with", input_n_frames_mfcc, "MFCC frames"
        input_n_frames_arti = dbn.rbm_layers[1].n_visible / 59 # 60 # TODO generalize
        print "this is a DBN with", input_n_frames_arti, "articulatory frames"
        input_file_name = 'tmp_input_mocha.npy'
        map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle'
        try: # TODO remove?
            print "loading concat MFCC from pickled file"
            with open(input_file_name) as concat:
                all_input = np.load(concat)
            with open(map_input_file_name) as map_input:
                map_file_to_start_end = cPickle.load(map_input)
        except:
            print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate
            all_input = np.ndarray((0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32')
            map_file_to_start_end = {}
            with open(iscpfname) as iscpf:
                for line in iscpf:
                    cline = clean(line)
                    start = all_input.shape[0]
                    # get the 1 framed signals
                    x_mfcc = htkmfc.open(cline).getall()
                    with open(cline[:-4] + '_ema.npy') as ema:
                        x_arti = np.load(ema)[:, 2:]
                    # compute deltas and deltas deltas for articulatory features
                    _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(x_mfcc, x_arti)
                    # add the adjacent frames
                    if input_n_frames_mfcc > 1:
                        x_mfcc = padding(input_n_frames_mfcc, x_mfcc)
                    if input_n_frames_arti > 1:
                        x_arti = padding(input_n_frames_arti, x_arti)
                    # do feature transformations if any
                    # TODO with mocha_timit_params.json params
                    # concatenate
                    x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1)
                    all_input = np.append(all_input, x_mfcc_arti, axis=0)
                    map_file_to_start_end[cline] = (start, all_input.shape[0])
            with open(input_file_name, 'w') as concat:
                np.save(concat, all_input)
            with open(map_input_file_name, 'w') as map_input:
                cPickle.dump(map_file_to_start_end, map_input)
    else: # GMM
        all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize

    print "computing likelihoods"
    if dbn != None: # TODO clean
        tmp_likelihoods = likelihoods_computer(all_input)
        #mean_dbns = np.mean(tmp_likelihoods, 0)
        #tmp_likelihoods *= (mean_gmms / mean_dbns)
        print tmp_likelihoods
        print tmp_likelihoods.shape
        columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])]
        print columns_remapping
        likelihoods = (tmp_likelihoods[:, columns_remapping],
            map_file_to_start_end)
        print likelihoods[0]
        print likelihoods[0].shape
    else:
        likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end)

    print "computing viterbi paths"
    list_mlf_string = []
    with open(iscpfname) as iscpf:
        il = InnerLoop(likelihoods,
                map_states_to_phones, transitions,
                using_bigram=(ilmfname != None 
                    or iwdnetfname != None 
                    or unibifname != None))
        p = Pool(cpu_count())
        list_mlf_string = p.map(il, iscpf)
    with open(ofname, 'w') as of:
        of.write('#!MLF!#\n')
        for line in list_mlf_string:
            of.write(line)
likelihoods_computer = functools.partial(compute_likelihoods, gmms_)

dbn = None
if len(sys.argv) == 5:
    from DBN_Gaussian_timit import DBN # not Gaussian if no GRBM
    with open(sys.argv[3]) as idbnf:
        dbn = cPickle.load(idbnf)
    with open(sys.argv[4]) as idbndtf:
        dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
    dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
    likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)

# TODO bigrams
transitions = initialize_transitions(transitions)
#print transitions
transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY,
        scale_factor=SCALE_FACTOR)

dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C
viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes

list_of_mfcc_files = []
for d, ds, fs in os.walk(sys.argv[1]):
    for fname in fs:
        if fname[-4:] != '.mfc':
            continue
        fullname = d.rstrip('/') + '/' + fname
        list_of_mfcc_files.append(fullname)
#print list_of_mfcc_files

if dbn != None:
    input_n_frames = dbn.rbm_layers[0].n_visible / 39 # TODO generalize
Example #4
0
def process(ofname,
            iscpfname,
            ihmmfname,
            ilmfname=None,
            iwdnetfname=None,
            unibifname=None,
            idbnfname=None,
            idbndictstuple=None):

    with open(ihmmfname) as ihmmf:
        n_states, transitions, gmms = parse_hmm(ihmmf)

    gmms_ = precompute_det_inv(gmms)
    map_states_to_phones = phones_mapping(gmms)
    likelihoods_computer = functools.partial(compute_likelihoods, gmms_)
    gmm_likelihoods_computer = functools.partial(compute_likelihoods,
                                                 gmms_)  #TODO REMOVE

    dbn = None
    dbn_to_int_to_state_tuple = None
    if idbnfname != None:
        with open(idbnfname) as idbnf:
            dbn = cPickle.load(idbnf)
        with open(idbndictstuple) as idbndtf:
            dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)
        # like that = for GRBM first layer (normalize=True, unit=False)
        # TODO correct the normalize/unit to work on full test dataset

    if iwdnetfname != None:
        with open(iwdnetfname) as iwdnf:
            transitions = parse_wdnet(transitions, iwdnf)  # parse wordnet
    elif ilmfname != None:
        with open(ilmfname) as ilmf:
            if MATRIX_BIGRAM:
                transitions = parse_lm_matrix(
                    transitions,
                    ilmf)  # parse bigram LM in matrix format in ilmf
            else:
                transitions = parse_lm(
                    transitions, ilmf)  # parse bigram LM in ARPA-MIT in ilmf
    elif unibifname != None:  # our own unigram and bigram counts,
        # c.f. src/produce_LM.py
        with open(unibifname) as ubf:
            transitions = initialize_transitions(transitions,
                                                 ubf,
                                                 unigrams_only=UNIGRAMS_ONLY)
    else:
        # uniform transitions between phones
        transitions = initialize_transitions(transitions)
    transitions = penalty_scale(transitions,
                                insertion_penalty=INSERTION_PENALTY,
                                scale_factor=SCALE_FACTOR)

    dummy = np.ndarray((2, 2))  # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {})  # also for this compile's debug purposes

    if dbn != None:
        input_n_frames_mfcc = dbn.rbm_layers[
            0].n_visible / 39  # TODO generalize
        print "this is a DBN with", input_n_frames_mfcc, "MFCC frames"
        input_n_frames_arti = dbn.rbm_layers[
            1].n_visible / 59  # 60 # TODO generalize
        print "this is a DBN with", input_n_frames_arti, "articulatory frames"
        input_file_name = 'tmp_input_mocha.npy'
        map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle'
        try:  # TODO remove?
            print "loading concat MFCC from pickled file"
            with open(input_file_name) as concat:
                all_input = np.load(concat)
            with open(map_input_file_name) as map_input:
                map_file_to_start_end = cPickle.load(map_input)
        except:
            print "concatenating MFCC and articulatory files"  # TODO parallelize + use np.concatenate
            all_input = np.ndarray(
                (0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible),
                dtype='float32')
            map_file_to_start_end = {}
            with open(iscpfname) as iscpf:
                for line in iscpf:
                    cline = clean(line)
                    start = all_input.shape[0]
                    # get the 1 framed signals
                    x_mfcc = htkmfc.open(cline).getall()
                    with open(cline[:-4] + '_ema.npy') as ema:
                        x_arti = np.load(ema)[:, 2:]
                    # compute deltas and deltas deltas for articulatory features
                    _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(
                        x_mfcc, x_arti)
                    # add the adjacent frames
                    if input_n_frames_mfcc > 1:
                        x_mfcc = padding(input_n_frames_mfcc, x_mfcc)
                    if input_n_frames_arti > 1:
                        x_arti = padding(input_n_frames_arti, x_arti)
                    # do feature transformations if any
                    # TODO with mocha_timit_params.json params
                    # concatenate
                    x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1)
                    all_input = np.append(all_input, x_mfcc_arti, axis=0)
                    map_file_to_start_end[cline] = (start, all_input.shape[0])
            with open(input_file_name, 'w') as concat:
                np.save(concat, all_input)
            with open(map_input_file_name, 'w') as map_input:
                cPickle.dump(map_file_to_start_end, map_input)
    else:  # GMM
        all_mfcc = np.ndarray((0, 39), dtype='float32')  # TODO generalize

    print "computing likelihoods"
    if dbn != None:  # TODO clean
        tmp_likelihoods = likelihoods_computer(all_input)
        #mean_dbns = np.mean(tmp_likelihoods, 0)
        #tmp_likelihoods *= (mean_gmms / mean_dbns)
        print tmp_likelihoods
        print tmp_likelihoods.shape
        columns_remapping = [
            dbn_phones_to_states[map_states_to_phones[i]]
            for i in xrange(tmp_likelihoods.shape[1])
        ]
        print columns_remapping
        likelihoods = (tmp_likelihoods[:, columns_remapping],
                       map_file_to_start_end)
        print likelihoods[0]
        print likelihoods[0].shape
    else:
        likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end)

    print "computing viterbi paths"
    list_mlf_string = []
    with open(iscpfname) as iscpf:
        il = InnerLoop(likelihoods,
                       map_states_to_phones,
                       transitions,
                       using_bigram=(ilmfname != None or iwdnetfname != None
                                     or unibifname != None))
        p = Pool(cpu_count())
        list_mlf_string = p.map(il, iscpf)
    with open(ofname, 'w') as of:
        of.write('#!MLF!#\n')
        for line in list_mlf_string:
            of.write(line)