Beispiel #1
0
def context_gen(i):

    # get feature filename
    filename = f[i][len(feature_addr):-4]
    print(filename)

    # read using htk: nframes X channels
    data_read = htk.open(feature_addr + filename + '.htk')
    x = data_read.getall()

    # apply cmvn
    varnorm = 1
    # get mean across time along each channel
    mu = np.mean(x, 0)
    mu = mu.reshape(1, mu.shape[0])

    # get standard deviation across time along each channel
    eps = np.spacing(np.float32(1.0))
    if (varnorm == 1):
        stddev = np.std(x, 0)
        stddev = stddev.reshape(1, stddev.shape[0])
    else:
        stddev = 1
    y = (x - mu) / (stddev + eps
                    )  # uses broadcasting for element-wise division

    # store feature
    writer = htk.open(store_addr + filename + '.htk',
                      mode='w',
                      veclen=y.shape[1])
    writer.writeall(y)
    writer.close()
Beispiel #2
0
def Data_Getter(trainfile,valfile):
	print('Getting and Prepping Data')
        train=htk.open(trainfile)
        train_data=train.getall()
        np.random.shuffle(train_data)
        print('train data loaded')
        print(train_data.shape)


        val=htk.open(valfile)
        val_data=val.getall()
        np.random.shuffle(val_data)
        print('val data loaded')
        print(val_data.shape)


        Y_train=train_data[:,-1]
        X_train=train_data[:,:-1]
        del train_data
        time.sleep(5)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        Y_train=Y_train.astype(np.int8)
        Y_train=np_utils.to_categorical(Y_train,3)

	
	Y_val=val_data[:,-1]
        X_val=val_data[:,:-1]
        del val_data
        time.sleep(5)
        Y_val=Y_val.reshape(Y_val.shape[0],1)
        Y_val=Y_val.astype(np.int8)
        Y_val=np_utils.to_categorical(Y_val,3)
	print 'Shapes of train and val data'
        print(X_train.shape,X_val.shape,Y_train.shape,Y_val.shape)
        return (X_train,X_val,Y_train,Y_val)
Beispiel #3
0
def data_creator(num, addr, file_reader, filename):
    corrupt_files = 0
    noscdlab = 0
    scdlab = 0
    matrix = np.empty((0, num))
    changedir()
    writer = htk.open(
        filename + '.htk', mode='w', veclen=num
    )  #num is the final feature vector size to be written(including the label. Ensure that by looking at the botttom entry)
    for i in range(len(file_reader)):
        print "Starting with file: ", i
        data_read = htk.open(addr + file_reader[i] +
                             '.htk')  #opening the Gamma-Label HTK file
        print file_reader[i]
        # kurt_matrix=sio.loadmat(kurt_addr+file_reader[i]+'.mat')['kurt'] #opening the kurtosis matrix for a file
        # sfm_matrix=sio.loadmat(sfm_addr+file_reader[i]+'.mat')['sfm'] #opening the sfm_matrix file
        # labels_this_file=sio.loadmat(label_addr+file_reader[i]+'.mat')['labels']

        ### Kurtosis and sfm are row vectors, that is (1,Number of frames)
        ### GAMMATONE -- LABEL   <--- Structure of the final matrix
        try:
            read_data = data_read.getall()
            id1 = (1, 2)[(file_reader[i][0] == 'M') == True]
            temp_index = file_reader[i].index("-")
            id2 = (1, 2)[(file_reader[i][temp_index + 1] == 'M') == True]
            print "ID1: ", id1, " ID2: ", id2
            gender_label = return_vec(read_data[:, -1], id1, id2)
            read_data = np.hstack((read_data, gender_label))
            # print "Raw shape: ",read_data.shape
            read_data = filter_data(
                read_data
            )  #We lose the structure of the file because of shuffling
            # print "Filtered data shape: ",read_data.shape
            scdlab += len(np.where(read_data[:, -1] == 1)[0])
            noscdlab += read_data.shape[0] - len(
                np.where(read_data[:, -1] == 1)[0])
            #id1 and id2 are integers essentially. if male then 2, if female than 1
            # kurt_vector=np.transpose(kurt_matrix)
            # sfm_vector=np.transpose(sfm_matrix)
            # label_vector=np.transpose(labels_this_file)
            # final_vector=np.hstack((read_data,kurt_vector,sfm_vector,label_vector))
            final_vector = read_data
            # matrix=np.vstack((matrix,final_vector))
            del read_data
        except:
            print "In the corrupt file section"
            corrupt_files += 1
            continue
            # ind=ind+read_data.shape[0]
        #HTK supports concatenation, so we don't have to deal with numpy matrix again and again
        writer.writeall(final_vector)
    print('corrput_files', corrupt_files)
    f = open(save_extra, 'w')
    write_string = str(scdlab) + "," + str(noscdlab) + ", Corrupt: " + str(
        corrupt_files)
    f.write(write_string)
    f.close()
def data_creator(num,addr,file_reader,filename):
        corrupt_files=0
        noscdlab=0
        scdlab=0
        changedir()
        writer=htk.open(filename+'.htk',mode='w',veclen=num) #num is the final feature vector size to be written(including the label. Ensure that by looking at the botttom entry)
        # for i in range(1):
        for i in range(len(file_reader)):
                print "Starting with file: ",i
                data_read=htk.open(addr+file_reader[i]+'.htk') #opening the Gamma-Label HTK file
                pitch_read=htk.open(paddr+file_reader[i]+'.htk') #opening the pitch variance file
                # kurt_matrix=sio.loadmat(kurt_addr+file_reader[i]+'.mat')['kurt'] #opening the kurtosis matrix for a file
                # sfm_matrix=sio.loadmat(sfm_addr+file_reader[i]+'.mat')['sfm'] #opening the sfm_matrix file
                # labels_this_file=sio.loadmat(label_addr+file_reader[i]+'.mat')['labels']

                ### Kurtosis and sfm are row vectors, that is (1,Number of frames)
                ### GAMMATONE -- LABEL --GenderLabel  <--- Structure of the final matrix
                try:
                        read_data=data_read.getall()
                        read_pitch=pitch_read.getall()
                        variance_vector=read_pitch[:,-2] #Gettin the variance vector
                        variance_vector=variance_vector[0:read_data.shape[0]]
                        # print "Variance obtained"
                        # print "read_data shape: ",read_data.shape
                        # print "variance shape: ",variance_vector.shape
                        read_data=np.insert(read_data,-1,variance_vector,axis=1)
                        print "Variance inserted"
                        id1=(1,2)[(file_reader[i][0]=='M')==True]
                        temp_index=file_reader[i].index("-")
                        id2=(1,2)[(file_reader[i][temp_index+1]=='M')==True]
                        read_data[:,-1]=read_data[:,-1]-1
                        gender_label=return_vec(read_data[:,-1],id1,id2)
                        read_data=np.hstack((read_data,gender_label))
                        read_data=filter_data(read_data)
                        scdlab+=len(np.where(read_data[:,-2]==1)[0])
                        noscdlab+=len(np.where(read_data[:,-2]==0)[0])
                        #id1 and id2 are integers essentially. if male then 1, if female than 0
                        # kurt_vector=np.transpose(kurt_matrix)
                        # sfm_vector=np.transpose(sfm_matrix)
                        # label_vector=np.transpose(labels_this_file)
                        # final_vector=np.hstack((read_data,kurt_vector,sfm_vector,label_vector))
                        final_vector=read_data
                        # matrix=np.vstack((matrix,final_vector))
                        del read_data
                except:
                        corrupt_files+=1
                        print "In the corrupt file section",corrupt_files
                        continue
                        # ind=ind+read_data.shape[0]
                #HTK supports concatenation, so we don't have to deal with numpy matrix again and again
                writer.writeall(final_vector)
        print('Corrupt_files',corrupt_files)
        f=open(save_extra,'w')
        write_string=str(scdlab)+","+str(noscdlab)+", Corrupt: "+str(corrupt_files)
        f.write(write_string)
        f.close()
Beispiel #5
0
def Data_Getter(filename):
        gamma=htk.open('/home/siddharthm/scd/context/600/gamma/train/'+filename+'.htk') #Getting gamma context feats
        pitch=htk.open('/home/siddharthm/scd/context/600/pitch/train/'+filename+'.htk') #Getting pitch context feats
        temp_gamma=gamma.getall()
        temp_pitch=pitch.getall()
        only_pitch=temp_pitch[:,0] #Extracting only the pitch value
        x_val=temp_gamma[:,:-1] #Only gammatone values, here 64*61
        y_val=temp_gamma[:,-1] #These are the real labels, that is from the ground truth
        y_val=y_val.reshape(y_val.shape[0],1)
        y_val=y_val.astype(np.int8)
        print(x_val.shape,y_val.shape)
        return (x_val,only_pitch,y_val)
Beispiel #6
0
def loadData(inputData):
	featsReader = htk.open(inputData)
	trainData = featsReader.getall()
	yTrain = trainData[:, -1]
	xTrain = np.delete(trainData, -1, 1)
	del trainData
	return (xTrain, yTrain)
Beispiel #7
0
def generate_file_statistics(filename, mixture_number):
    """
        Load GMM at current iteration/mixture and calculate the statistics
        for filename. 
        At the end, store the statistics in the 'stats' directory. 
    """
    # Here we use a sci-kit format gmm, so to be able to use its methods

    model_file_name = 'models/gmm' + mixture_number
    gmm = pickle.load(open(model_file_name, 'rb'))

    # Load features:
    features = htkmfc.open(filename)
    data = features.getall()

    # Calculate Prob(X/gmm)
    prob_data_given_model = gmm.scikitGmm.predict_proba(data)

    # Calculate 0th, 1st, and 2nd order statistics
    zeroth_order_stats = np.sum(prob_data_given_model, axis=0)
    first_order_stats = np.dot(data.T, prob_data_given_model)
    second_order_stats = np.dot(np.power(data.T, 2), prob_data_given_model)

    print zeroth_order_stats.shape, first_order_stats.shape, second_order_stats.shape
    file_stats = [zeroth_order_stats, first_order_stats, second_order_stats]

    # Store statistics:
    basename = filename.split('/')[-1]
    output_name = '/erasable/nxs113020/stats/' + basename + '.stats'
    with open(output_name, 'wb') as pickle_file:
        pickle.dump(file_stats, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)
def generate_file_statistics(filename, mixture_number):
    """
        Load GMM at current iteration/mixture and calculate the statistics
        for filename. 
        At the end, store the statistics in the 'stats' directory. 
    """
    # Here we use a sci-kit format gmm, so to be able to use its methods
    
    model_file_name = 'models/gmm'+mixture_number
    gmm = pickle.load(open( model_file_name, 'rb' ))
        
    # Load features:
    features = htkmfc.open(filename)
    data     = features.getall()
    
    # Calculate Prob(X/gmm)
    prob_data_given_model = gmm.scikitGmm.predict_proba(data)
    
    # Calculate 0th, 1st, and 2nd order statistics
    zeroth_order_stats   = np.sum(prob_data_given_model,axis = 0)
    first_order_stats    = np.dot(data.T,prob_data_given_model)
    second_order_stats   = np.dot(np.power(data.T,2),prob_data_given_model)
     
    print zeroth_order_stats.shape, first_order_stats.shape, second_order_stats.shape
    file_stats = [zeroth_order_stats, first_order_stats, second_order_stats]
    
    # Store statistics:
    basename = filename.split('/')[-1]
    output_name = '/erasable/nxs113020/stats/'+basename+'.stats'
    with open(output_name, 'wb') as pickle_file:
        pickle.dump(file_stats, pickle_file, protocol=pickle.HIGHEST_PROTOCOL)
Beispiel #9
0
    def loadMFCCs(self, URI_recording_noExt, extractedPitchList, sectionLink):
        '''
        for now lead extracted with HTK, read in matlab and seriqlized to txt file
        '''

        URI_recording = URI_recording_noExt + '.wav'

        URIRecordingChunkResynthesized = sectionLink.URIRecordingChunk + '.wav'

        logging.info("working on sectionLink: {}".format(
            URIRecordingChunkResynthesized))

        # resynthesize audio chunk:
        if ParametersAlgo.POLYPHONIC:
            if not os.path.isfile(URIRecordingChunkResynthesized
                                  ):  # only if resynth file does not exist
                logging.info(
                    "doing harmonic models and resynthesis for segment: {} ..."
                    .format(URIRecordingChunkResynthesized))

                if extractedPitchList == None:
                    extractedPitchList = extractPredominantPitch(
                        URI_recording_noExt,
                        2048,
                        128,
                        jointAnalysis=True,
                    )
                hfreq, hmag, hphase, fs, hopSizeMelodia, inputAudioFromTsToTs = extractHarmSpec(
                    URI_recording, extractedPitchList, sectionLink.beginTs,
                    sectionLink.endTs, ParametersAlgo.THRESHOLD_PEAKS)
                resynthesize(hfreq, hmag, hphase, fs, hopSizeMelodia,
                             URIRecordingChunkResynthesized)
        else:
            sampleRate = 44100
            loader = essentia.standard.MonoLoader(filename=URI_recording,
                                                  sampleRate=sampleRate)
            audio = loader()
            audioChunk = audio[sectionLink.beginTs *
                               sampleRate:sectionLink.endTs * sampleRate]
            monoWriter = essentia.standard.MonoWriter(
                filename=URIRecordingChunkResynthesized)
            monoWriter(audioChunk)

        # call htk to extract features
        URImfcFile = self._extractMFCCs(URIRecordingChunkResynthesized)

        # read features form binary htk file
        logging.debug("reading MFCCs from {} ...".format(URImfcFile))
        HTKFeat_reader = htkmfc.open(URImfcFile, 'rb')
        mfccsFeatrues = HTKFeat_reader.getall()

        if ParametersAlgo.FOR_MAKAM and ParametersAlgo.OBS_MODEL == 'GMM':  # makam mdoels  are trained with 25-dim features (no energy, no deltadeltas )
            mfccs_no_energy = mfccsFeatrues[:, 0:12]
            mfccDeltas = mfccsFeatrues[:, 13:26]
            mfccsFeatrues = np.hstack((mfccs_no_energy, mfccDeltas))

        return mfccsFeatrues
Beispiel #10
0
def data_creator(num, addr, file_reader, filename):
    corrupt_files = 0
    ind = 0
    writer = htk.open(filename + '.htk', mode='w', veclen=num)
    for i in range(int(len(file_reader))):
        print(i)
        data_read = htk.open(addr + file_reader[i] + '.htk')
        try:
            read_data = data_read.getall()

        except:
            corrupt_files += 1
            continue

        ind = ind + read_data.shape[0]
        print(read_data.shape)
        writer.writeall(read_data)
    print('corrput_files', corrupt_files)
def load_data_val(valfile):
        a=htk.open(valfile)
        data=a.getall()
        print "Done loading the validation data: ",data.shape
        data=filter_data(data)
        x_val=data[:,:-1]
        Y_val=data[:,-1]
        Y_val=np.reshape(Y_val,(Y_val.shape[0],1))
        y_val=np_utils.to_categorical(Y_val,2)
        del data
        return x_val,y_val
def load_data_test(testfile):
        a=htk.open(testfile)
        data=a.getall()
        print "Done loading the testing data: ",data.shape
        data=filter_data(data)
        x_test=data[:,:-1]
        Y_test=data[:,-1]
        print np.where(Y_test==2)
        # Y_test=np.reshape(Y_test,(Y_test.shape[0],1))
        # y_test=np_utils.to_categorical(Y_test,2)
        del data
        return x_test,Y_test
Beispiel #13
0
 def __call__(self, line):
     cline = clean(line)
     if VERBOSE:
         print(cline)
     likelihoods = self.comp_likelihoods(htkmfc.open(cline).getall())
     s = '"' + cline[:-3] + 'rec"\n' + \
             string_mlf(self.map_states_to_phones,
                     viterbi(likelihoods, self.transitions,
                         self.map_states_to_phones,
                         using_bigram=self.using_bigram)[0],
                     phones_only=True) + '.\n'
     return s
Beispiel #14
0
def load_data_test(testfile):
    a = htk.open(testfile)
    data = a.getall()
    print "Done loading the testing data: ", data.shape
    x_test = cnn_reshaper(data[:, :-2])
    Y_test = data[:, -2]
    print np.where(Y_test == 2)
    # Y_test=np.reshape(Y_test,(Y_test.shape[0],1))
    # y_test=np_utils.to_categorical(Y_test,2)
    gender_labels = data[:, -1]
    del data
    return x_test, Y_test, gender_labels
Beispiel #15
0
 def __call__(self, line):
     cline = clean(line)
     if VERBOSE:
         print cline
     likelihoods = self.comp_likelihoods(htkmfc.open(cline).getall())
     s = '"' + cline[:-3] + 'rec"\n' + \
             string_mlf(self.map_states_to_phones,
                     viterbi(likelihoods, self.transitions, 
                         self.map_states_to_phones,
                         using_bigram=self.using_bigram)[0],
                     phones_only=True) + '.\n'
     return s
Beispiel #16
0
def loadData(inputData):
	featsReader = htk.open(inputData)
	trainData = featsReader.getall()
	np.random.shuffle(trainData)
	yUtt = trainData[:, -1]
	trainData = np.delete(trainData, -1, 1)
	ySpkTrain = trainData[:, -1]
	trainData = np.delete(trainData, -1, 1)
	yKwTrain = trainData[:, -1]
	xTrain = np.delete(trainData, -1, 1)
	del trainData
	return (xTrain, ySpkTrain.astype(int), yKwTrain.astype(int) ,yUtt.astype(int))
Beispiel #17
0
def data_getter(testfile):

    print 'getting and prepping data'
    val = htk.open(testfile)
    val_data = val.getall()
    Y_test = val_data[:, -1]
    X_test = val_data[:, :-1]
    del val_data
    time.sleep(5)
    Y_test = Y_test.reshape(Y_test.shape[0], 1)
    Y_test = Y_test.astype(np.int8)

    return X_test, Y_test
Beispiel #18
0
def extract_from_mlf(mlf):
    x = np.ndarray((0, N_MFCC_COEFFS + N_EMA_COEFFS), dtype='float32')
    y = []

    with open(mlf) as f:
        tmp_len_x = 0  # verify sizes
        for line in f:
            line = line.rstrip('\n')
            if len(line) < 1:
                continue
            if line[0] == '"':
                if tmp_len_x != 0:
                    print(
                        "the file above this one was mismatching x and y lengths",
                        line)
                t = htkmfc.open(line.strip('"')[:-3] + 'mfc')  # .lab -> .mfc
                mfc_file = t.getall()
                with open(line.strip('"')[:-4] +
                          '_ema.npy') as ema_f:  # .lab -> _ema.npy
                    ema_file = np.load(ema_f)[:, 2:]
                x_file = np.concatenate(from_mfcc_ema_to_mfcc_arti_tuple(
                    mfc_file, ema_file),
                                        axis=1)
                x = np.append(x, x_file, axis=0)
                tmp_len_x = mfc_file.shape[0]
            elif line[0].isdigit():
                start, end, state = line.split()[:3]
                start = (int(start) + 1) / (MFCC_TIMESTEP * 10000)  # htk
                end = (int(end) + 1) / (MFCC_TIMESTEP * 10000)  # htk
                for i in range(start, end):
                    tmp_len_x -= 1
                    y.append(state)

    assert (len(y) == x.shape[0])
    rootname = mlf[:-4]
    np.save(rootname + '_xdata.npy', x)
    yy = np.array(y)
    np.save(rootname + '_ylabels.npy', yy)

    print("length x:", len(x), " length y:", len(y))
    print("shape x:", x.shape, "shape yy:", yy.shape)

    if TEST:
        tx = np.load(rootname + '_xdata.npy')
        ty = np.load(rootname + '_ylabels.npy')
        if np.all(tx == x) and np.all(ty == yy):
            print("SUCCESS: serialized and current in-memory arrays are equal")
            sys.exit(0)
        else:
            print("ERROR: serialized and current in-memory arrays differ!")
            sys.exit(-1)
Beispiel #19
0
 def load_features(self, file_list):
     """Read text file containing list of file-names and load feature files 
     as numpy arrays."""
     fin = open(file_list, 'r')
     for i in fin:
         filename = i.strip()
         features = htkmfc.open(filename)
         data = features.getall()
         if (self.training_data == None):
             self.training_data = data
         else:
             self.training_data = np.vstack((self.training_data, data))
     self.number_of_frames, self.feature_dimension = self.training_data.shape
     fin.close()
Beispiel #20
0
def load_data_val(valfile):
    a = htk.open(valfile)
    data = a.getall()
    print "Done loading the validation data: ", data.shape
    data = filter_data_val(data)
    x_val = data[:, :-2]
    Y_val = data[:, -2]
    # print np.where(Y_val==1)
    Y_val = np.reshape(Y_val, (Y_val.shape[0], 1))
    y_val = np_utils.to_categorical(Y_val, 2)
    # print np.where(y_val[:,1]==1)
    gender_val = data[:, -1]
    del data
    return x_val, y_val, gender_val
Beispiel #21
0
 def load_features(self,file_list):
     """Read text file containing list of file-names and load feature files 
     as numpy arrays."""
     fin = open(file_list,'r')
     for i in fin:
         filename = i.strip()
         features = htkmfc.open(filename)
         data = features.getall()
         if (self.training_data == None):
             self.training_data = data
         else:
             self.training_data = np.vstack((self.training_data,data))
     self.number_of_frames, self.feature_dimension = self.training_data.shape
     fin.close()
Beispiel #22
0
def GetHTKfea( wav_fd, fe_fd, n_delete ):
		print('Entre a getHTKfea')
		print(wav_fd)
		names = [ na for na in os.listdir(wav_fd) if na.endswith('.16kHz.fb40') ]
		print(names)
		extlen=len('16kHz.fb40')+1
		names = sorted(names)
		for na in names:
				print na
				path = wav_fd + '/' + na
				print path
				mfc_reader=htkmfc.open(path,mode='rb')
				X=mfc_reader.getall()
				X = X[:, n_delete:]
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data(train_data)
        x_train=data[:,:-1] #Set to different column based on different model
        Y_train=data[:,-1]
        print Y_train.shape
        print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        Y_train=Y_train.astype(np.int8)
        y_train=np_utils.to_categorical(Y_train,2)
        del data
        return x_train,y_train
Beispiel #24
0
def get_frames(bname, warpfreq):
    """Return concatenated vad frames from bname
    """
    key = (bname, warpfreq)
    if key not in _cache:
        r = []
        for start, end in intervals[bname]:
            mfcfile = path.join(english_vtln_dir,
                                'warp_freq_{:.2f}'.format(warpfreq),
                                bname + '.mfc')
            mfc = htkmfc.open(mfcfile).getall()
            start_fr = start * FRATE
            end_fr = end * FRATE
            r.append(mfc[start_fr:end_fr])
        _cache[key] = np.vstack(r)
    return _cache[key]
Beispiel #25
0
def load_data_val(valfile,scaler):
        a=htk.open(valfile)
        data=a.getall()
        print "Done loading the validation data: ",data.shape
        data=filter_data_val(data)
        x_val=data[:,:-2]
        # x_val=scaler.transform(x_val)
        Y_val=data[:,-2]
        # print np.where(Y_val==1)
        Y_val=np.reshape(Y_val,(Y_val.shape[0],1))
        y_val=np_utils.to_categorical(Y_val,2)
        # print np.where(y_val[:,1]==1)
        gender_val=data[:,-1]
        del data
        #x_val has the pitch variances and also the gammatone values
        return x_val,y_val,gender_val
def extract_from_mlf(mlf):
    x = np.ndarray((0, N_MFCC_COEFFS + N_EMA_COEFFS), dtype='float32')
    y = []
    
    with open(mlf) as f:
        tmp_len_x = 0 # verify sizes
        for line in f:
            line = line.rstrip('\n')
            if len(line) < 1:
                continue
            if line[0] == '"': 
                if tmp_len_x != 0:
                    print "the file above this one was mismatching x and y lengths", line
                t = htkmfc.open(line.strip('"')[:-3] + 'mfc') # .lab -> .mfc
                mfc_file = t.getall()
                with open(line.strip('"')[:-4] + '_ema.npy') as ema_f: # .lab -> _ema.npy
                    ema_file = np.load(ema_f)[:,2:]
                x_file = np.concatenate(from_mfcc_ema_to_mfcc_arti_tuple(
                    mfc_file, ema_file), axis=1)
                x = np.append(x, x_file, axis=0)
                tmp_len_x = mfc_file.shape[0]
            elif line[0].isdigit():
                start, end, state = line.split()[:3]
                start = (int(start)+1)/(MFCC_TIMESTEP * 10000) # htk
                end = (int(end)+1)/(MFCC_TIMESTEP * 10000) # htk
                for i in xrange(start, end):
                    tmp_len_x -= 1
                    y.append(state)
                
    assert(len(y) == x.shape[0])
    rootname = mlf[:-4] 
    np.save(rootname + '_xdata.npy', x)
    yy = np.array(y)
    np.save(rootname + '_ylabels.npy', yy)

    print "length x:", len(x), " length y:", len(y)
    print "shape x:", x.shape, "shape yy:", yy.shape 

    if TEST:
        tx = np.load(rootname + '_xdata.npy')
        ty = np.load(rootname + '_ylabels.npy')
        if np.all(tx==x) and np.all(ty==yy):
            print "SUCCESS: serialized and current in-memory arrays are equal"
            sys.exit(0)
        else:
            print "ERROR: serialized and current in-memory arrays differ!"
            sys.exit(-1)
Beispiel #27
0
def GetHTKfea(wav_fd, fe_fd, n_delete):
    names = [na for na in os.listdir(wav_fd) if na.endswith('.16kHz.fb40')]
    extlen = len('16kHz.fb40') + 1
    names = sorted(names)
    for na in names:
        print na
        path = wav_fd + '/' + na
        print path
        mfc_reader = htkmfc.open(path, mode='rb')
        X = mfc_reader.getall()
        X = X[:, n_delete:]
        print X.shape  # (1291,40)

        out_path = fe_fd + '/' + na[0:-extlen] + '.f'  #### change na[0:-4]
        cPickle.dump(X,
                     open(out_path, 'wb'),
                     protocol=cPickle.HIGHEST_PROTOCOL)
Beispiel #28
0
Datei: vtln.py Projekt: mwv/vtln
def get_frames(bname, warpfreq):
    """Return concatenated vad frames from bname
    """
    key = (bname, warpfreq)
    if key not in _cache:
        r = []
        for start, end in intervals[bname]:
            mfcfile = path.join(
                english_vtln_dir,
                'warp_freq_{:.2f}'.format(warpfreq),
                bname + '.mfc')
            mfc = htkmfc.open(mfcfile).getall()
            start_fr = start * FRATE
            end_fr = end * FRATE
            r.append(mfc[start_fr:end_fr])
        _cache[key] = np.vstack(r)
    return _cache[key]
Beispiel #29
0
def load_data_train(trainfile):
    print "Getting the training data"
    a = htk.open(trainfile)
    train_data = a.getall()
    print "Done with Loading the training data: ", train_data.shape
    data = filter_data_train(train_data)
    # x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
    x_train = data[:, :-2]  #Set to different column based on different model
    Y_train = data[:, -2]
    print Y_train.shape
    # print np.where(Y_train==2)
    Y_train = Y_train.reshape(Y_train.shape[0], 1)
    y_train = np_utils.to_categorical(Y_train, 2)
    print y_train[0:5, :]
    gender_train = data[:, -1]
    del data
    return x_train, y_train, gender_train
Beispiel #30
0
def concat_all(folder):
    l = []
    for d, ds, fs in os.walk(folder):
        for fname in fs:
            if fname[-4:] != '.mfc':
                continue
            fullfname = d + '/' + fname
            print fullfname
            t = htkmfc.open(fullfname)
            l.append(t.getall())
    stats = np.concatenate(l)
    mean = np.mean(stats, 0)
    stddev = np.std(stats, 0)
    for i, e in enumerate(l):
        l[i] = padding(NFRAMES_DBN, (e - mean) / stddev)
    a = np.concatenate(l) 
    np.save(folder + '/' + 'x_all_mfcc.npy', a)
Beispiel #31
0
def concat_all(folder):
    l = []
    for d, ds, fs in os.walk(folder):
        for fname in fs:
            if fname[-4:] != '.mfc':
                continue
            fullfname = d + '/' + fname
            print fullfname
            t = htkmfc.open(fullfname)
            l.append(t.getall())
    stats = np.concatenate(l)
    mean = np.mean(stats, 0)
    stddev = np.std(stats, 0)
    for i, e in enumerate(l):
        l[i] = padding(NFRAMES_DBN, (e - mean) / stddev)
    a = np.concatenate(l)
    np.save(folder + '/' + 'x_all_mfcc.npy', a)
Beispiel #32
0
def extract_MFCC(audio_URI, output_URI):

    fe = FeatureExtractor(
        '/usr/local/bin/HCopy',
        None)  ## TODO: replace htk-mfcc extraction with essentia
    # call htk to extract features
    URImfcFile = fe._extractMFCCs(audio_URI)

    # read features form binary htk file
    logging.debug("reading MFCCs from {} ...".format(URImfcFile))
    HTKFeat_reader = htkmfc.open(URImfcFile, 'rb')
    mfccsFeatrues = HTKFeat_reader.getall()

    labels = numpy.zeros(len(mfccsFeatrues), dtype='float32')

    with open(output_URI, 'w') as f:
        pickle.dump((mfccsFeatrues, labels), f)
    return output_URI
Beispiel #33
0
def load_data_train(trainfile):
        print "Getting the training data"
        a=htk.open(trainfile)
        train_data=a.getall()
        print "Done with Loading the training data: ",train_data.shape
        data=filter_data_train(train_data)
        # x_train=cnn_reshaper(data[:,:-2]) #Set to different column based on different model
        x_train=data[:,:-2] #Set to different column based on different model
        scaler=StandardScaler().fit(x_train)
        # x_train=scaler.transform(x_train)
        Y_train=data[:,-2]
        print Y_train.shape
        # print np.where(Y_train==2)
        Y_train=Y_train.reshape(Y_train.shape[0],1)
        y_train=np_utils.to_categorical(Y_train,2)
        print y_train[0:5,:]
        gender_train=data[:,-1]
        del data
        #x_train has complete data, that is gammatone and also the pitch variance values.
        return x_train,y_train,gender_train,scaler
Beispiel #34
0
def normalize(folder):
    corpus = {}
    full = np.ndarray((0, 39))

    for d, ds, fs in os.walk(folder):
        for fname in fs:
            if fname[-11:] != '.mfc_unnorm':
                continue
            fullfname = d + '/' + fname
            t = htkmfc.open(fullfname)
            corpus[fullfname[:-11] + '_mfc.npy'] = copy.deepcopy(t.getall())
            full = np.append(full, t.getall(), axis=0)

    mean = np.mean(full)
    stddev = sss.tstd(full)
    if stddev == 0:
        print >> sys.stderr, "*** null stddev, no *.mfc_unnorm file ??? ***"
        sys.exit(-1)

    for key, val in corpus.iteritems():
        corpus[key] = (val - mean) / stddev

    # verification:
    ### full = np.ndarray((0,39))
    ### for key,val in corpus.iteritems():
    ###     full = np.append(full, val, axis=0)
    ### print "verification of 0-mean 1-stddev"
    ### print "mean (big numeric errors, beware)"
    ### print np.mean(full)
    ### print "stddev"
    ### print sss.tvar(full)
    # /verification

    for key, val in corpus.iteritems():
        print "Dealt with:", key
        np.save(key, val)
Beispiel #35
0
def normalize(folder):
    corpus = {}
    full = np.ndarray((0,39))

    for d, ds, fs in os.walk(folder):
        for fname in fs:
            if fname[-11:] != '.mfc_unnorm':
                continue
            fullfname = d + '/'+fname
            t = htkmfc.open(fullfname)
            corpus[fullfname[:-11]+'_mfc.npy'] = copy.deepcopy(t.getall())
            full = np.append(full, t.getall(), axis=0)

    mean = np.mean(full)
    stddev = sss.tstd(full)
    if stddev == 0:
        print >> sys.stderr, "*** null stddev, no *.mfc_unnorm file ??? ***"
        sys.exit(-1)

    for key,val in corpus.iteritems():
        corpus[key] = (val - mean) / stddev

    # verification:
    ### full = np.ndarray((0,39))
    ### for key,val in corpus.iteritems():
    ###     full = np.append(full, val, axis=0)
    ### print "verification of 0-mean 1-stddev"
    ### print "mean (big numeric errors, beware)"
    ### print np.mean(full)
    ### print "stddev"
    ### print sss.tvar(full)
    # /verification

    for key,val in corpus.iteritems():
        print "Dealt with:", key
        np.save(key, val)
Beispiel #36
0
def process(ofname, iscpfname, ihmmfname, 
        ilmfname=None, iwdnetfname=None, unibifname=None, 
        idbnfname=None, idbndictstuple=None):

    with open(ihmmfname) as ihmmf:
        n_states, transitions, gmms = parse_hmm(ihmmf)

    gmms_ = precompute_det_inv(gmms)
    map_states_to_phones = phones_mapping(gmms)
    likelihoods_computer = functools.partial(compute_likelihoods, gmms_)
    gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE

    dbn = None
    dbn_to_int_to_state_tuple = None
    if idbnfname != None:
        with open(idbnfname) as idbnf:
            dbn = cPickle.load(idbnf)
        with open(idbndictstuple) as idbndtf:
            dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)
        # like that = for GRBM first layer (normalize=True, unit=False)
        # TODO correct the normalize/unit to work on full test dataset

    if iwdnetfname != None:
        with open(iwdnetfname) as iwdnf:
            transitions = parse_wdnet(transitions, iwdnf) # parse wordnet
    elif ilmfname != None:
        with open(ilmfname) as ilmf:
            if MATRIX_BIGRAM:
                transitions = parse_lm_matrix(transitions, ilmf) # parse bigram LM in matrix format in ilmf
            else:
                transitions = parse_lm(transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf
    elif unibifname != None: # our own unigram and bigram counts,
                             # c.f. src/produce_LM.py
        with open(unibifname) as ubf:
            transitions = initialize_transitions(transitions, ubf, 
                    unigrams_only=UNIGRAMS_ONLY)
    else:
        # uniform transitions between phones
        transitions = initialize_transitions(transitions)
    transitions = penalty_scale(transitions, 
            insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR)


    dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes
    
    if dbn != None:
        input_n_frames_mfcc = dbn.rbm_layers[0].n_visible / 39 # TODO generalize
        print "this is a DBN with", input_n_frames_mfcc, "MFCC frames"
        input_n_frames_arti = dbn.rbm_layers[1].n_visible / 59 # 60 # TODO generalize
        print "this is a DBN with", input_n_frames_arti, "articulatory frames"
        input_file_name = 'tmp_input_mocha.npy'
        map_input_file_name = 'tmp_map_file_to_start_end_mocha.pickle'
        try: # TODO remove?
            print "loading concat MFCC from pickled file"
            with open(input_file_name) as concat:
                all_input = np.load(concat)
            with open(map_input_file_name) as map_input:
                map_file_to_start_end = cPickle.load(map_input)
        except:
            print "concatenating MFCC and articulatory files" # TODO parallelize + use np.concatenate
            all_input = np.ndarray((0, dbn.rbm_layers[0].n_visible + dbn.rbm_layers[1].n_visible), dtype='float32')
            map_file_to_start_end = {}
            with open(iscpfname) as iscpf:
                for line in iscpf:
                    cline = clean(line)
                    start = all_input.shape[0]
                    # get the 1 framed signals
                    x_mfcc = htkmfc.open(cline).getall()
                    with open(cline[:-4] + '_ema.npy') as ema:
                        x_arti = np.load(ema)[:, 2:]
                    # compute deltas and deltas deltas for articulatory features
                    _, x_arti = from_mfcc_ema_to_mfcc_arti_tuple(x_mfcc, x_arti)
                    # add the adjacent frames
                    if input_n_frames_mfcc > 1:
                        x_mfcc = padding(input_n_frames_mfcc, x_mfcc)
                    if input_n_frames_arti > 1:
                        x_arti = padding(input_n_frames_arti, x_arti)
                    # do feature transformations if any
                    # TODO with mocha_timit_params.json params
                    # concatenate
                    x_mfcc_arti = np.concatenate((x_mfcc, x_arti), axis=1)
                    all_input = np.append(all_input, x_mfcc_arti, axis=0)
                    map_file_to_start_end[cline] = (start, all_input.shape[0])
            with open(input_file_name, 'w') as concat:
                np.save(concat, all_input)
            with open(map_input_file_name, 'w') as map_input:
                cPickle.dump(map_file_to_start_end, map_input)
    else: # GMM
        all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize

    print "computing likelihoods"
    if dbn != None: # TODO clean
        tmp_likelihoods = likelihoods_computer(all_input)
        #mean_dbns = np.mean(tmp_likelihoods, 0)
        #tmp_likelihoods *= (mean_gmms / mean_dbns)
        print tmp_likelihoods
        print tmp_likelihoods.shape
        columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])]
        print columns_remapping
        likelihoods = (tmp_likelihoods[:, columns_remapping],
            map_file_to_start_end)
        print likelihoods[0]
        print likelihoods[0].shape
    else:
        likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end)

    print "computing viterbi paths"
    list_mlf_string = []
    with open(iscpfname) as iscpf:
        il = InnerLoop(likelihoods,
                map_states_to_phones, transitions,
                using_bigram=(ilmfname != None 
                    or iwdnetfname != None 
                    or unibifname != None))
        p = Pool(cpu_count())
        list_mlf_string = p.map(il, iscpf)
    with open(ofname, 'w') as of:
        of.write('#!MLF!#\n')
        for line in list_mlf_string:
            of.write(line)
Beispiel #37
0
def extract_from_mlf(mlf, do_gammatones):
    x = np.ndarray((0, N_MFCC_COEFFS), dtype='float32')
    x_fbank = np.ndarray((0, N_FILTERBANK_COEFFS), dtype='float32')
    x_gamma = np.ndarray((0, N_GAMMATONES*3), dtype='float32')
    y = []
    y_spkr = []
    
    with open(mlf) as f:
        tmp_len_x = 0 # verify sizes
        len_x = 0
        end = 0
        speaker_label = ''
        for line in f:
            line = line.rstrip('\n')
            if len(line) < 1:
                continue
            if line[0] == '"':
                assert tmp_len_x == 0, "the file above this one %s was mismatching x (%d frames) and y (%d frames) lengths by %d" % (line, 
                        len_x, end, tmp_len_x)
                speaker_label = line.split('/')[-2]

                # load HTK's MFCC
                t = htkmfc.open(line.strip('"')[:-3] + 'mfc') # .lab -> .mfc
                x = np.append(x, t.getall(), axis=0)
                len_x = t.getall().shape[0]
                tmp_len_x = len_x

                if TALKBOX_FBANKS:  # do our own filterbanks TODO
                    fr, snd = wavfile.read(line.strip('"')[:-3] + 'wav') # .lab -> .wav
                    assert fr == SAMPLING_RATE, "SAMPLING_RATE is not what is found in the wav file"
                    _, fbank, _ = tbmfcc(snd, nwin=HAMMING_SIZE/1000.*SAMPLING_RATE, nfft=2048, fs=SAMPLING_RATE, nceps=13)
                    x_fbank = np.append(x_fbank, fbank, axis=0)
                    assert t.getall().shape[0] == fbank.shape[0], "MFCC and filterbank not of the same length (not on the same sampling rate)"
                else:
                    fbank = None
                    with open(line.strip('"')[:-4] + '_fbanks.npy') as fbanksf:
                        fbank = np.load(fbanksf)
                    if fbank != None:
                        # it seems filterbanks obtained with spectral are a little longer at the end
                        if DEBUG:
                            print "cutting the last", fbank.shape[0] - t.getall().shape[0], "frames from the filterbank"
                        fbank = fbank[:t.getall().shape[0]]
                        x_fbank = np.append(x_fbank, fbank, axis=0)
                        assert t.getall().shape[0] == fbank.shape[0], "MFCC and filterbank not of the same length (not on the same sampling rate)"

                if do_gammatones:
                    # load the wav sound (with Brian)
                    sound = loadsound(line.strip('"')[:-3] + 'wav') # .lab -> .wav
                    # compute the gammatones (see Brian's doc)
                    bw = 10**(0.037+0.785*log10(center_frequencies))
                    gammatone = ApproximateGammatone(sound, center_frequencies, 
                                                     bw, order=3)
                    g = gammatone.process()
                    # subsample the gammatones at the same rate than the MFCC's
                    # (just for practicality so that they are aligned...)
                    n_samples = g.shape[0]*1./(t.getall().shape[0] + 1) # TODO check "+1"
                    ### # do the harmonic mean (nth root of the product of the terms)
                    ### g_sub = subsample_apply_f(g, n_samples, lambda z: np.power(np.prod(z), 1./n_samples))
                    g_sub = subsample_apply_f(g, n_samples, lambda z: np.sqrt(np.sum(np.square(z))))
                    # compute the delta and delta of the subsampled gammatones
                    gamma_speed_accel = compute_speed_and_accel(g_sub)
                    # append
                    tmp = gamma_speed_accel[:t.getall().shape[0]] # TODO check
                    if tmp.shape[0] != t.getall().shape[0]: # TODO remove
                        print line
                        print tmp.shape
                        print t.getall().shape
                        print n_samples
                        print g.shape
                        print "exiting because of the mismatch"
                        sys.exit(-1)
                    x_gamma = np.append(x_gamma, tmp, axis=0)

            elif line[0].isdigit():
                start, end, state = line.split()[:3]
                start = (int(start)+9999)/(MFCC_TIMESTEP * 10000) # htk
                end = (int(end)+9999)/(MFCC_TIMESTEP * 10000) # htk
                for i in xrange(start, end):
                    tmp_len_x -= 1
                    y.append(state)
                    y_spkr.append(speaker_label)
                
    assert(len(y) == x.shape[0])
    assert(len(y_spkr) == x.shape[0])
    rootname = mlf[:-4] 
    np.save(rootname + '_xdata.npy', x)
    np.save(rootname + '_xfbank.npy', x_fbank)
    if do_gammatones:
        np.save(rootname + '_xgamma.npy', x_gamma)
    yy = np.array(y)
    yy_spkr = np.array(y_spkr)
    np.save(rootname + '_ylabels.npy', yy)
    np.save(rootname + '_yspeakers.npy', yy_spkr)

    print "length x:", len(x), "length y:", len(y), "length y_spkr:", len(y_spkr)
    print "shape x:", x.shape, "shape yy:", yy.shape, "shape yy_spkr:", yy_spkr.shape

    if TEST:
        tx = np.load(rootname + '_xdata.npy')
        tx_fbank = np.load(rootname + '_xfbank.npy')
        if do_gammatones:
            tx_gamma = np.load(rootname + '_xgamma.npy')
        ty = np.load(rootname + '_ylabels.npy')
        ty_spkr = np.load(rootname + '_yspeakers.npy')
        if np.all(tx==x) and np.all(ty==yy) and np.all(ty_spkr==yy_spkr):
            assert_allclose(tx_fbank, x_fbank, err_msg="x_fbank and its serialized version are not allclose")
            if do_gammatones:
                assert_allclose(tx_gamma, x_gamma, err_msg="x_gamma and its serialized version are not allclose")
            print "SUCCESS: serialized and current in-memory arrays are equal"
            sys.exit(0)
        else:
            print "ERROR: serialized and current X (MFCC) or Y in-memory arrays differ!"
            print "x (MFCC):", np.all(tx==x)
            print "y (labels):", np.all(ty==yy)
            print "y (speakers):", np.all(ty_spkr==yy_spkr)
            sys.exit(-1)
Beispiel #38
0
        all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible),
                              dtype='float32')
        map_file_to_start_end = {}
        mfcc_file_name = 'tmp_allen_mfcc_' + str(int(input_n_frames)) + '.npy'
        map_mfcc_file_name = 'tmp_allen_map_file_to_start_end_' + str(
            int(input_n_frames)) + '.pickle'
        try:
            print("loading concat MFCC from pickled file")
            with open(mfcc_file_name) as concat_mfcc:
                all_mfcc = np.load(concat_mfcc)
            with open(map_mfcc_file_name) as map_mfcc:
                map_file_to_start_end = pickle.load(map_mfcc)
        except:
            for ind, mfcc_file in enumerate(list_of_mfcc_files):
                start = all_mfcc.shape[0]
                x = htkmfc.open(mfcc_file).getall()
                if input_n_frames > 1:
                    x = padding(input_n_frames, x)
                all_mfcc = np.append(all_mfcc, x, axis=0)
                map_file_to_start_end[mfcc_file] = (start, all_mfcc.shape[0])
                print("did", mfcc_file, "ind", ind)
            with open(mfcc_file_name, 'w') as concat_mfcc:
                np.save(concat_mfcc, all_mfcc)
            with open(map_mfcc_file_name, 'w') as map_mfcc:
                pickle.dump(map_file_to_start_end, map_mfcc)

        tmp_likelihoods = likelihoods_computer(all_mfcc)
        depth_1_likelihoods = depth_1_computer(all_mfcc)
        depth_2_likelihoods = depth_2_computer(all_mfcc)
        #depth_3_likelihoods = depth_1_computer(all_mfcc) TODO
        print(map_states_to_phones)
def load_htkfile_full(input_file):
    feat_reader = htk.open(input_file)  #extracting features from the htk files
    feat1 = feat_reader.getall()
    feat = np.reshape(feat1, (1, -1, 60))
    return feat
    print "this is a DBN with", input_n_frames, "frames on the input layer"
    print "concatenating MFCC files" 
    all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible), dtype='float32')
    map_file_to_start_end = {}
    mfcc_file_name = 'tmp_allen_mfcc_' + str(int(input_n_frames)) + '.npy'
    map_mfcc_file_name = 'tmp_allen_map_file_to_start_end_' + str(int(input_n_frames)) + '.pickle'
    try:
        print "loading concat MFCC from pickled file"
        with open(mfcc_file_name) as concat_mfcc:
            all_mfcc = np.load(concat_mfcc)
        with open(map_mfcc_file_name) as map_mfcc:
            map_file_to_start_end = cPickle.load(map_mfcc)
    except:
        for ind, mfcc_file in enumerate(list_of_mfcc_files):
            start = all_mfcc.shape[0]
            x = htkmfc.open(mfcc_file).getall()
            if input_n_frames > 1:
                x = padding(input_n_frames, x)
            all_mfcc = np.append(all_mfcc, x, axis=0)
            map_file_to_start_end[mfcc_file] = (start, all_mfcc.shape[0])
            print "did", mfcc_file, "ind", ind
        with open(mfcc_file_name, 'w') as concat_mfcc:
            np.save(concat_mfcc, all_mfcc)
        with open(map_mfcc_file_name, 'w') as map_mfcc:
            cPickle.dump(map_file_to_start_end, map_mfcc)

    tmp_likelihoods = likelihoods_computer(all_mfcc)
    columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])]
    likelihoods = (tmp_likelihoods[:, columns_remapping],
        map_file_to_start_end)
else:
Beispiel #41
0
def process(ofname, iscpfname, ihmmfname, 
        ilmfname=None, iwdnetfname=None, unibifname=None, 
        idbnfname=None, idbndictstuple=None):

    with open(ihmmfname) as ihmmf:
        n_states, transitions, gmms = parse_hmm(ihmmf)

    gmms_ = precompute_det_inv(gmms)
    map_states_to_phones = phones_mapping(gmms)
    likelihoods_computer = functools.partial(compute_likelihoods, gmms_)
    gmm_likelihoods_computer = functools.partial(compute_likelihoods, gmms_) #TODO REMOVE

    dbn = None
    dbn_to_int_to_state_tuple = None
    if idbnfname != None:
        with open(idbnfname) as idbnf:
            dbn = cPickle.load(idbnf)
        with open(idbndictstuple) as idbndtf:
            dbn_to_int_to_state_tuple = cPickle.load(idbndtf)
        dbn_phones_to_states = dbn_to_int_to_state_tuple[0]
        likelihoods_computer = functools.partial(compute_likelihoods_dbn, dbn)
        # like that = for GRBM first layer (normalize=True, unit=False)
        # TODO correct the normalize/unit to work on full test dataset

    if iwdnetfname != None:
        with open(iwdnetfname) as iwdnf:
            transitions = parse_wdnet(transitions, iwdnf) # parse wordnet
    elif ilmfname != None:
        with open(ilmfname) as ilmf:
            if MATRIX_BIGRAM:
                transitions = parse_lm_matrix(transitions, ilmf) # parse bigram LM in matrix format in ilmf
            else:
                transitions = parse_lm(transitions, ilmf) # parse bigram LM in ARPA-MIT in ilmf
    elif unibifname != None: # our own unigram and bigram counts,
                             # c.f. src/produce_LM.py
        with open(unibifname) as ubf:
            transitions = initialize_transitions(transitions, ubf, 
                    unigrams_only=UNIGRAMS_ONLY)
    else:
        # uniform transitions between phones
        transitions = initialize_transitions(transitions)
    transitions = penalty_scale(transitions, 
            insertion_penalty=INSERTION_PENALTY, scale_factor=SCALE_FACTOR)


    dummy = np.ndarray((2,2)) # to force only 1 compile of Viterbi's C
    viterbi(dummy, [None, dummy], {}) # also for this compile's debug purposes
    
    if dbn != None:
        input_n_frames = dbn.rbm_layers[0].n_visible / 39 # TODO generalize
        print "this is a DBN with", input_n_frames, "frames on the input layer"
        mfcc_file_name = 'tmp_mfcc_' + str(int(input_n_frames)) + '.npy'
        map_mfcc_file_name = 'tmp_map_file_to_start_end_' + str(int(input_n_frames)) + '.pickle'
        try: # TODO remove?
            print "loading concat MFCC from pickled file", mfcc_file_name
            with open(mfcc_file_name) as concat_mfcc:
                all_mfcc = np.load(concat_mfcc)
            with open(map_mfcc_file_name) as map_mfcc:
                map_file_to_start_end = cPickle.load(map_mfcc)
        except:
            print "concatenating MFCC files" # TODO parallelize + use np.concatenate
            all_mfcc = np.ndarray((0, dbn.rbm_layers[0].n_visible), dtype='float32')
            map_file_to_start_end = {}
            with open(iscpfname) as iscpf:
                for line in iscpf:
                    cline = clean(line)
                    start = all_mfcc.shape[0]
                    x = htkmfc.open(cline).getall()
                    if input_n_frames > 1:
                        x = padding(input_n_frames, x)
                    print all_mfcc.shape
                    print x.shape
                    all_mfcc = np.append(all_mfcc, x, axis=0)
                    map_file_to_start_end[cline] = (start, all_mfcc.shape[0])

            with open(mfcc_file_name, 'w') as concat_mfcc:
                np.save(concat_mfcc, all_mfcc)
            with open(map_mfcc_file_name, 'w') as map_mfcc:
                cPickle.dump(map_file_to_start_end, map_mfcc)
    else: # GMM
        all_mfcc = np.ndarray((0, 39), dtype='float32') # TODO generalize

    print "computing likelihoods"
    if dbn != None: # TODO clean
        # TODO REMOVE
        #gmm_likelihoods = gmm_likelihoods_computer(all_mfcc[:, xrange(195,234)])
        #mean_gmms = np.mean(gmm_likelihoods, 0)
        #print gmm_likelihoods
        #print gmm_likelihoods.shape
        tmp_likelihoods = likelihoods_computer(all_mfcc)
        #mean_dbns = np.mean(tmp_likelihoods, 0)
        #tmp_likelihoods *= (mean_gmms / mean_dbns)
        if VERBOSE:
            print tmp_likelihoods
            print tmp_likelihoods.shape
        print map_states_to_phones
        print dbn_phones_to_states
        assert set(map_states_to_phones.values()) == set(dbn_phones_to_states.keys()), "Phones differ between the HMM and the DBN"
        columns_remapping = [dbn_phones_to_states[map_states_to_phones[i]] for i in xrange(tmp_likelihoods.shape[1])]
        if VERBOSE:
            print columns_remapping
        likelihoods = (tmp_likelihoods[:, columns_remapping],
            map_file_to_start_end)
        #if VERBOSE:
            #print map_file_to_start_end
            #print len(map_file_to_start_end)
            #print likelihoods[0]
            #print likelihoods[0].shape
    else:
        likelihoods = (likelihoods_computer(all_mfcc), map_file_to_start_end)

    print "computing viterbi paths"
    list_mlf_string = []
    with open(iscpfname) as iscpf:
        il = InnerLoop(likelihoods,
                map_states_to_phones, transitions,
                using_bigram=(ilmfname != None 
                    or iwdnetfname != None 
                    or unibifname != None))
        #p = Pool(1)
        p = Pool(cpu_count())
        list_mlf_string = p.map(il, iscpf)
    with open(ofname, 'w') as of:
        of.write('#!MLF!#\n')
        for line in list_mlf_string:
            of.write(line)
Beispiel #42
0
speakers = sorted(set([bname2speaker(bname)
                       for bname in bnames]))

bnames_per_speaker = {
    speaker: [bname for bname in bnames if bname2speaker(bname) == speaker]
    for speaker in speakers
}

if __name__ == '__main__':
    ideal_warpfreq_file = 'ideal_warpfreq_5.txt'
    ideal_warps = pd.read_csv(ideal_warpfreq_file)

    # outrawdir = path.join(datadir, 'raw')
    outwarpeddir = path.join(english_vtln_dir, 'vtln2')
    try:
        os.makedirs(outwarpeddir)
    except OSError:
        pass

    for ix, (_, filename, warpfreq) in ideal_warps.iterrows():
        # print '{} ({}/{})'.format(speaker, ix+1, len(ideal_warps))
        # for filename in bnames_per_speaker[speaker]:
        print '{} ({}/{})'.format(filename, ix+1, len(ideal_warps))

        infile_warp = path.join(
            english_vtln_dir, 'warp_freq_{:.2f}'.format(warpfreq),
            filename+'.mfc'
        )
        mfc_warp = htkmfc.open(infile_warp).getall()
        np.save(path.join(outwarpeddir, filename+'.npy'), mfc_warp)