def __call__(self, mfcc_file): start, end = self.likelihoods[1][mfcc_file] if VERBOSE: print mfcc_file print start, end _, posteriorgrams = viterbi(self.likelihoods[0][start:end], self.transitions, self.map_states_to_phones, using_bigram=self.using_bigram) assert(not (posteriorgrams == np.NaN).any()) assert(not (self.likelihoods[0][start:end] == np.NaN).any()) self.write_file(mfcc_file, start, end, posteriorgrams)
def __call__(self, mfcc_file): start, end = self.likelihoods[1][mfcc_file] if VERBOSE: print(mfcc_file) print(start, end) _, posteriorgrams = viterbi(self.likelihoods[0][start:end], self.transitions, self.map_states_to_phones, using_bigram=self.using_bigram) assert(not (posteriorgrams == np.NaN).any()) assert(not (posteriorgrams == 0).all()) assert(not (self.likelihoods[0][start:end] == np.NaN).any()) self.write_file(mfcc_file, start, end, posteriorgrams)
def __call__(self, mfcc_file): print("doing", mfcc_file) start, end = self.likelihoods[1][mfcc_file] if VERBOSE: print(mfcc_file) print(start, end) _, posteriorgrams = viterbi(self.likelihoods[0][start:end], self.transitions, self.map_states_to_phones, using_bigram=self.using_bigram) if DEBUG: assert (not (posteriorgrams == np.NaN).any()) assert (not (posteriorgrams < -1000.0).all()) assert (not (self.depth_1_likelihoods[start:end] == np.NaN).any()) assert (not (self.depth_2_likelihoods[start:end] == np.NaN).any()) assert (not (self.likelihoods[0][start:end] == np.NaN).any()) assert (not (self.likelihoods[0][start:end] < -31.0).all()) self.write_file(mfcc_file, start, end, posteriorgrams)
def __call__(self, mfcc_file): print "doing", mfcc_file start, end = self.likelihoods[1][mfcc_file] if VERBOSE: print mfcc_file print start, end _, posteriorgrams = viterbi(self.likelihoods[0][start:end], self.transitions, self.map_states_to_phones, using_bigram=self.using_bigram) if DEBUG: assert(not (posteriorgrams == np.NaN).any()) assert(not (posteriorgrams < -1000.0).all()) assert(not (self.depth_1_likelihoods[start:end] == np.NaN).any()) assert(not (self.depth_2_likelihoods[start:end] == np.NaN).any()) assert(not (self.likelihoods[0][start:end] == np.NaN).any()) assert(not (self.likelihoods[0][start:end] < -31.0).all()) self.write_file(mfcc_file, start, end, posteriorgrams)
depth_2_computer = functools.partial(compute_likelihoods_dbn, dbn, depth=2) likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn, depth=None) # TODO bigrams transitions = initialize_transitions(transitions) #print transitions transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR) dummy = np.ndarray((2, 2)) # to force only 1 compile of Viterbi's C viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes list_of_mfcc_files = [] for d, ds, fs in os.walk(sys.argv[1]): for fname in fs: if fname[-4:] != '.mfc': continue fullname = d.rstrip('/') + '/' + fname list_of_mfcc_files.append(fullname) if dbn != None: input_n_frames = dbn.rbm_layers[0].n_visible / 39 # TODO generalize print("this is a DBN with", input_n_frames, "frames on the input layer") print("concatenating MFCC files") all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible),
from DBN_Gaussian_timit import DBN # not Gaussian if no GRBM with open(sys.argv[3]) as idbnf: dbn = cPickle.load(idbnf) with open(sys.argv[4]) as idbndtf: dbn_to_int_to_state_tuple = cPickle.load(idbndtf) dbn_phones_to_states = dbn_to_int_to_state_tuple[0] likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn) # TODO bigrams transitions = initialize_transitions(transitions) #print transitions transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR) dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes list_of_mfcc_files = [] for d, ds, fs in os.walk(sys.argv[1]): for fname in fs: if fname[-4:] != '.mfc': continue fullname = d.rstrip('/') + '/' + fname list_of_mfcc_files.append(fullname) #print list_of_mfcc_files if dbn != None: input_n_frames = dbn.rbm_layers[0].n_visible / 39 # TODO generalize print "this is a DBN with", input_n_frames, "frames on the input layer" print "concatenating MFCC files" all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible), dtype='float32')
def process(ofname, iscpfname, ihmmfname, ilmfname=None, iwdnetfname=None, unibifname=None, idbnfname=None, idbndictstuple=None): with open(ihmmfname) as ihmmf: n_states, transitions, gmms = parse_hmm(ihmmf) gmms_ = precompute_det_inv(gmms) map_states_to_phones = phones_mapping(gmms) likelihoods_computer = functools.partial(compute_likelihoods, gmms_) gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE dbn = None dbn_to_int_to_state_tuple = None if idbnfname != None: with open(idbnfname) as idbnf: dbn = cPickle.load(idbnf) with open(idbndictstuple) as idbndtf: dbn_to_int_to_state_tuple = cPickle.load(idbndtf) dbn_phones_to_states = dbn_to_int_to_state_tuple[0] likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn) # like that = for GRBM first layer (normalize=True, unit=False) # TODO correct the normalize/unit to work on full test dataset if iwdnetfname != None: with open(iwdnetfname) as iwdnf: transitions = parse_wdnet(transitions, iwdnf) # parse wordnet elif ilmfname != None: with open(ilmfname) as ilmf: if MATRIX_BIGRAM: transitions = parse_lm_matrix(transitions, ilmf) # parse bigram LM in matrix format in ilmf else: transitions = parse_lm(transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf elif unibifname != None: # our own unigram and bigram counts, # c.f. src/produce_LM.py with open(unibifname) as ubf: transitions = initialize_transitions(transitions, ubf, unigrams_only=UNIGRAMS_ONLY) else: # uniform transitions between phones transitions = initialize_transitions(transitions) transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR) dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes if dbn != None: input_n_frames_mfcc = dbn.rbm_layers[0].n_visible / 39 # TODO generalize print "this is a DBN with", input_n_frames_mfcc, "MFCC frames" input_n_frames_arti = dbn.rbm_layers[1].n_visible / 59 # 60 # TODO generalize print "this is a DBN with", input_n_frames_arti, "articulatory frames" input_file_name = 'tmp_input_mocha.npy' map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle' try: # TODO remove? print "loading concat MFCC from pickled file" with open(input_file_name) as concat: all_input = np.load(concat) with open(map_input_file_name) as map_input: map_file_to_start_end = cPickle.load(map_input) except: print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate all_input = np.ndarray((0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32') map_file_to_start_end = {} with open(iscpfname) as iscpf: for line in iscpf: cline = clean(line) start = all_input.shape[0] # get the 1 framed signals x_mfcc = htkmfc.open(cline).getall() with open(cline[:-4] + '_ema.npy') as ema: x_arti = np.load(ema)[:, 2:] # compute deltas and deltas deltas for articulatory features _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(x_mfcc, x_arti) # add the adjacent frames if input_n_frames_mfcc > 1: x_mfcc = padding(input_n_frames_mfcc, x_mfcc) if input_n_frames_arti > 1: x_arti = padding(input_n_frames_arti, x_arti) # do feature transformations if any # TODO with mocha_timit_params.json params # concatenate x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1) all_input = np.append(all_input, x_mfcc_arti, axis=0) map_file_to_start_end[cline] = (start, all_input.shape[0]) with open(input_file_name, 'w') as concat: np.save(concat, all_input) with open(map_input_file_name, 'w') as map_input: cPickle.dump(map_file_to_start_end, map_input) else: # GMM all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize print "computing likelihoods" if dbn != None: # TODO clean tmp_likelihoods = likelihoods_computer(all_input) #mean_dbns = np.mean(tmp_likelihoods, 0) #tmp_likelihoods *= (mean_gmms / mean_dbns) print tmp_likelihoods print tmp_likelihoods.shape columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])] print columns_remapping likelihoods = (tmp_likelihoods[:, columns_remapping], map_file_to_start_end) print likelihoods[0] print likelihoods[0].shape else: likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end) print "computing viterbi paths" list_mlf_string = [] with open(iscpfname) as iscpf: il = InnerLoop(likelihoods, map_states_to_phones, transitions, using_bigram=(ilmfname != None or iwdnetfname != None or unibifname != None)) p = Pool(cpu_count()) list_mlf_string = p.map(il, iscpf) with open(ofname, 'w') as of: of.write('#!MLF!#\n') for line in list_mlf_string: of.write(line)
def process(ofname, iscpfname, ihmmfname, ilmfname=None, iwdnetfname=None, unibifname=None, idbnfname=None, idbndictstuple=None): with open(ihmmfname) as ihmmf: n_states, transitions, gmms = parse_hmm(ihmmf) gmms_ = precompute_det_inv(gmms) map_states_to_phones = phones_mapping(gmms) likelihoods_computer = functools.partial(compute_likelihoods, gmms_) gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE dbn = None dbn_to_int_to_state_tuple = None if idbnfname != None: with open(idbnfname) as idbnf: dbn = cPickle.load(idbnf) with open(idbndictstuple) as idbndtf: dbn_to_int_to_state_tuple = cPickle.load(idbndtf) dbn_phones_to_states = dbn_to_int_to_state_tuple[0] likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn) # like that = for GRBM first layer (normalize=True, unit=False) # TODO correct the normalize/unit to work on full test dataset if iwdnetfname != None: with open(iwdnetfname) as iwdnf: transitions = parse_wdnet(transitions, iwdnf) # parse wordnet elif ilmfname != None: with open(ilmfname) as ilmf: if MATRIX_BIGRAM: transitions = parse_lm_matrix( transitions, ilmf) # parse bigram LM in matrix format in ilmf else: transitions = parse_lm( transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf elif unibifname != None: # our own unigram and bigram counts, # c.f. src/produce_LM.py with open(unibifname) as ubf: transitions = initialize_transitions(transitions, ubf, unigrams_only=UNIGRAMS_ONLY) else: # uniform transitions between phones transitions = initialize_transitions(transitions) transitions = penalty_scale(transitions, insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR) dummy = np.ndarray((2, 2)) # to force only 1 compile of Viterbi's C viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes if dbn != None: input_n_frames_mfcc = dbn.rbm_layers[ 0].n_visible / 39 # TODO generalize print "this is a DBN with", input_n_frames_mfcc, "MFCC frames" input_n_frames_arti = dbn.rbm_layers[ 1].n_visible / 59 # 60 # TODO generalize print "this is a DBN with", input_n_frames_arti, "articulatory frames" input_file_name = 'tmp_input_mocha.npy' map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle' try: # TODO remove? print "loading concat MFCC from pickled file" with open(input_file_name) as concat: all_input = np.load(concat) with open(map_input_file_name) as map_input: map_file_to_start_end = cPickle.load(map_input) except: print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate all_input = np.ndarray( (0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32') map_file_to_start_end = {} with open(iscpfname) as iscpf: for line in iscpf: cline = clean(line) start = all_input.shape[0] # get the 1 framed signals x_mfcc = htkmfc.open(cline).getall() with open(cline[:-4] + '_ema.npy') as ema: x_arti = np.load(ema)[:, 2:] # compute deltas and deltas deltas for articulatory features _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple( x_mfcc, x_arti) # add the adjacent frames if input_n_frames_mfcc > 1: x_mfcc = padding(input_n_frames_mfcc, x_mfcc) if input_n_frames_arti > 1: x_arti = padding(input_n_frames_arti, x_arti) # do feature transformations if any # TODO with mocha_timit_params.json params # concatenate x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1) all_input = np.append(all_input, x_mfcc_arti, axis=0) map_file_to_start_end[cline] = (start, all_input.shape[0]) with open(input_file_name, 'w') as concat: np.save(concat, all_input) with open(map_input_file_name, 'w') as map_input: cPickle.dump(map_file_to_start_end, map_input) else: # GMM all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize print "computing likelihoods" if dbn != None: # TODO clean tmp_likelihoods = likelihoods_computer(all_input) #mean_dbns = np.mean(tmp_likelihoods, 0) #tmp_likelihoods *= (mean_gmms / mean_dbns) print tmp_likelihoods print tmp_likelihoods.shape columns_remapping = [ dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1]) ] print columns_remapping likelihoods = (tmp_likelihoods[:, columns_remapping], map_file_to_start_end) print likelihoods[0] print likelihoods[0].shape else: likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end) print "computing viterbi paths" list_mlf_string = [] with open(iscpfname) as iscpf: il = InnerLoop(likelihoods, map_states_to_phones, transitions, using_bigram=(ilmfname != None or iwdnetfname != None or unibifname != None)) p = Pool(cpu_count()) list_mlf_string = p.map(il, iscpf) with open(ofname, 'w') as of: of.write('#!MLF!#\n') for line in list_mlf_string: of.write(line)