Python calcfeat_delta_delta Exemples, core.calcmfcc.calcfeat_delta_delta Python Exemples

Exemple #1

0

Afficher le fichier

def feature_generation(audio_path, save_path, win_length=0.02, win_step=0.01, mode='fbank', 
		feature_len=40, noise_name='clean', noiseSNR=0.5,csv=None):
    '''
    <input>
    audio_path = '/sda3/DATA/jsbae/Google_Speech_Command'
    save_path = '/home/jsbae/STT2/KWS/feature_saved'
    win_length: default=0.02, "specify the window length of feature"
    win_step: default=0.01, "specify the window step length of feature"
    mode: choices=['mfcc', 'fbank']
    feature_len: default=40,'Features length'
    <output>
    No output. Save featuere and label(int) to npy filetpye.
    '''
    # Read testing_list and validation_list and make it to likst.
    testing_list_path = os.path.join(audio_path, 'testing_list.txt')
    validation_list_path = os.path.join(audio_path, 'validation_list.txt')
    def read_txt(path):
        text_file = open(path,'r')
        lines = text_file.read().split('\n')
        return lines
    testing_list = read_txt(testing_list_path)
    validation_list = read_txt(validation_list_path)
    # end.
    #
    # make dirs.
    if noise_name == 'clean':
        dirs = [f for f in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, f))] #label save at dirs
        dirs.sort()
        if '_background_noise_' in dirs:
            dirs.remove('_background_noise_')
    else:
        audio_path = os.path.join(audio_path,noise_name)
        dirs = [f for f in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, f))] #label save at dirs
        '''
        dirs=[]
        dirs1 = [f for f in os.listdir(audio_path) if os.path.isdir(os.path.join(audio_path, f))] #label save at dirs
        dirs1.sort()
        for dirs1_ in dirs1:
            dirs1_path = os.path.join(audio_path,dirs1_)
            dirs2 = [f for f in os.listdir(dirs1_path) if os.path.isdir(os.path.join(dirs1_path,f))]
            for dirs2_ in dirs2:
                dirs.append(dirs1_ + '/' + dirs2_)
        '''
    print(dirs)
    print('Number of labels: ' + str(len(dirs)))
    # end.
    #
    # Make directory if not exits.
    noise_name = noise_name + '_SNR' + str(noiseSNR)# change noise_name with SNR
    def make_dir(path):
        if not os.path.exists(path):
            os.makedirs(path)
    make_dir(os.path.join(save_path,'TEST',mode, noise_name))
    make_dir(os.path.join(save_path,'VALID',mode, noise_name))
    make_dir(os.path.join(save_path,'TRAIN',mode, noise_name))
    make_dir(os.path.join(save_path,'TEST','label'))
    make_dir(os.path.join(save_path,'VALID','label'))
    make_dir(os.path.join(save_path,'TRAIN','label'))
    #end.
    #
    ############# TO COMPUTE TOTAL SNR #############
    total_SNR = 0
    count = 0
    # Make feature and label files.
    for dirname in dirs:
        full_dirname = os.path.join(audio_path,dirname)
        cprint('Processing in '+full_dirname,'yellow')
        teCount, vaCount, trCount = 0,0,0
        for filename in os.listdir(full_dirname):
            full_filename = os.path.join(full_dirname,filename)
            #print full_filename
            filenameNoSuffix =  os.path.splitext(full_filename)[0]
#            print filenameNoSuffix: output: /sda3/DATA/jsbae/Google_Speech_Command/nine/24b82192_nohash_2
            ext = os.path.splitext(full_filename)[-1]
            if ext == '.wav':
                #print dirname #label
                #print full_filename #wavfile path
                rate = None
                sig = None
                
                
                #(rate,sig)= wav.read(full_filename)
                '''
                else:
                    (c_rate,c_sig)= wav.read(full_filename)
                    (n_rate,n_sig)= wav.read(bgmfile)
                    assert c_rate == n_rate ==  16000
                    rate = c_rate
                    n_startpoint = randint(0,len(n_sig)-len(c_sig))
                    while n_startpoint == 0:
                        cprint('n_startpoint 0','red')
                        n_startpoint = randint(0,len(n_sig)-len(c_sig))
                    sig = (1-noiseSNR) * c_sig + noiseSNR * n_sig[n_startpoint:n_startpoint+len(c_sig)]
                    
                    snr_ = SNR(sig,n_sig[n_startpoint:n_startpoint+len(c_sig)]*noiseSNR)
                    total_SNR += snr_
                    count += 1
                    csv.write(str(count)+','+str(snr_)+'\n')
                    csv.flush()
                '''

                
                try:
                    (rate,sig)= wav.read(full_filename)
                except ValueError as e:
                    if e.message == "File format 'NIST'... not understood.":
                        sf = Sndfile(full_filename, 'r')
                        nframes = sf.nframes
                        sig = sf.read_frames(nframes)
                        rate = sf.samplerate
                
                feat = calcfeat_delta_delta(sig,rate,win_length=win_length,win_step=win_step,mode=mode,feature_len=feature_len)
                feat = preprocessing.scale(feat)
                feat = np.transpose(feat) 
                #print(np.max(feat),np.min(feat),feat.shape) #(120, almost 99)
                label = text_to_label(dirname)
                #print label
                if label == 30: raise ValueError('wrong') 
                # Save to TEST, VALID, TRAIN folder.
                if os.path.join(dirname,filename) in testing_list:
                    featureFilename = os.path.join(save_path,'TEST',mode, noise_name, dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy')
                    labelFilename = os.path.join(save_path,'TEST','label', dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy')
                    assert label == np.load(labelFilename)
                    print featureFilename
                    np.save(featureFilename, feat)
                    #np.save(labelFilename, label)
                    teCount +=1

                elif os.path.join(dirname,filename) in validation_list:
                    featureFilename = os.path.join(save_path,'VALID',mode, noise_name, dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy')
                    labelFilename = os.path.join(save_path,'VALID','label', dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy')
                    #np.save(featureFilename, feat)
                    #np.save(labelFilename, label)
                    vaCount += 1
                    raise ValueError
                else:
                    featureFilename = os.path.join(save_path,'TRAIN',mode, noise_name, dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy')
                    labelFilename = os.path.join(save_path,'TRAIN','label', dirname+'_'+filenameNoSuffix.split('/')[-1]+'.npy')
                    #np.save(featureFilename, feat)
                    #np.save(labelFilename, label)
                    trCount +=1
                    raise ValueError
        print trCount, vaCount, teCount

Exemple #2

0

Afficher le fichier

Fichier : libri_preprocess.py Projet : deep2dream/Automatic_Speech_Recognition

def wav2feature(root_directory, save_directory, name, win_len, win_step, mode,
                feature_len, seq2seq, save):
    count = 0
    dirid = 0
    level = 'cha' if seq2seq is False else 'seq2seq'
    data_dir = os.path.join(root_directory, name)
    print(data_dir)
    preprocess(data_dir)
    for subdir, dirs, files in os.walk(data_dir):
        for f in files:
            fullFilename = os.path.join(subdir, f)
            filenameNoSuffix = os.path.splitext(fullFilename)[0]
            if f.endswith('.wav'):
                rate = None
                sig = None
                try:
                    (rate, sig) = wav.read(fullFilename)
                except ValueError as e:
                    if e.message == "File format 'NIST'... not understood.":
                        sf = Sndfile(fullFilename, 'r')
                    nframes = sf.nframes
                    sig = sf.read_frames(nframes)
                    rate = sf.samplerate
                feat = calcfeat_delta_delta(sig,
                                            rate,
                                            win_length=win_len,
                                            win_step=win_step,
                                            mode=mode,
                                            feature_len=feature_len)
                feat = preprocessing.scale(feat)
                feat = np.transpose(feat)
                print(feat.shape)
                labelFilename = filenameNoSuffix + '.label'
                with open(labelFilename, 'r') as f:
                    characters = f.readline().strip().lower()
                targets = []
                if seq2seq is True:
                    targets.append(28)
                for c in characters:
                    if c == ' ':
                        targets.append(0)
                    elif c == "'":
                        targets.append(27)
                    else:
                        targets.append(ord(c) - 96)
                if seq2seq is True:
                    targets.append(29)
                print(targets)
                if save:
                    count += 1
                    if count % 4000 == 0:
                        dirid += 1
                    print('file index:', count)
                    print('dir index:', dirid)
                    label_dir = os.path.join(save_directory, level, name,
                                             str(dirid), 'label')
                    feat_dir = os.path.join(save_directory, level, name,
                                            str(dirid), 'feature')
                    if not os.path.isdir(label_dir):
                        os.makedirs(label_dir)
                    if not os.path.isdir(feat_dir):
                        os.makedirs(feat_dir)
                    featureFilename = os.path.join(
                        feat_dir,
                        filenameNoSuffix.split('/')[-1] + '.npy')
                    np.save(featureFilename, feat)
                    t_f = os.path.join(
                        label_dir,
                        filenameNoSuffix.split('/')[-1] + '.npy')
                    print(t_f)
                    np.save(t_f, targets)

Exemple #3

0

Afficher le fichier

Fichier : timit_preprocess.py Projet : deep2dream/Automatic_Speech_Recognition

def wav2feature(rootdir, save_directory, mode, feature_len,level, keywords, win_len, win_step,  seq2seq, save):
    feat_dir = os.path.join(os.path.join(os.path.join(save_directory, level), keywords), mode)
    label_dir = os.path.join(os.path.join(os.path.join(save_directory, level), keywords), 'label')
    if not os.path.exists(label_dir):
        os.makedirs(label_dir)
    if not os.path.exists(feat_dir):
        os.makedirs(feat_dir)
    count = 0
    for subdir, dirs, files in os.walk(rootdir):
        for file in files:
            fullFilename = os.path.join(subdir, file)
            filenameNoSuffix =  os.path.splitext(fullFilename)[0]
            if file.endswith('.WAV'):
                rate = None
                sig = None
                try:
                    (rate,sig)= wav.read(fullFilename)
                except ValueError as e:
                    if e.message == "File format 'NIST'... not understood.":
                        sf = Sndfile(fullFilename, 'r')
                        nframes = sf.nframes
                        sig = sf.read_frames(nframes)
                        rate = sf.samplerate
                feat = calcfeat_delta_delta(sig,rate,win_length=win_len,win_step=win_step,mode=mode,feature_len=feature_len)
                feat = preprocessing.scale(feat)
                feat = np.transpose(feat)
                print(feat.shape)

                if level == 'phn':
                    labelFilename = filenameNoSuffix + '.PHN'
                    phenome = []
                    with open(labelFilename,'r') as f:
                        if seq2seq is True:
                            phenome.append(len(phn)) # <start token>
                        for line in f.read().splitlines():
                            s=line.split(' ')[2]
                            p_index = phn.index(s)
                            phenome.append(p_index)
                        if seq2seq is True:
                            phenome.append(len(phn)+1) # <end token>
                        print(phenome)
                    phenome = np.array(phenome)

                elif level == 'cha':
                    labelFilename = filenameNoSuffix + '.WRD'
                    phenome = []
                    sentence = ''
                    with open(labelFilename,'r') as f:
                        for line in f.read().splitlines():
                            s=line.split(' ')[2]
                            sentence += s+' '
                            if seq2seq is True:
                                phenome.append(28)
                            for c in s:
                                if c=="'":
                                    phenome.append(27)
                                else:
                                    phenome.append(ord(c)-96)
                            phenome.append(0)

                        phenome = phenome[:-1]
                        if seq2seq is True:
                            phenome.append(29)
                    print(phenome)
                    print(sentence)

                count+=1
                print('file index:',count)
                if save:
                    featureFilename = os.path.join(feat_dir,filenameNoSuffix.split('/')[-1]+'.npy')
                    np.save(featureFilename,feat)
                    labelFilename = os.path.join(label_dir,filenameNoSuffix.split('/')[-1]+'.npy')
                    print(featureFilename,labelFilename)
                    np.save(labelFilename,phenome)

Exemple #4

0

Afficher le fichier

Fichier : wsj_preprocess.py Projet : yuzhe630/Automatic_Speech_Recognition

def wav2feature(root_directory, save_directory, name, win_len, win_step, mode,
                feature_len, seq2seq, save):
    """
  To run for WSJ corpus, you should download sph2pipe_v2.5 first!
  """

    count = 0
    dirid = 0
    level = 'cha' if seq2seq is False else 'seq2seq'
    for subdir, dirs, files in os.walk(root_directory):
        for f in files:
            fullFilename = os.path.join(subdir, f)
            filenameNoSuffix = os.path.splitext(fullFilename)[0]
            if f.endswith('.wv1') or f.endswith('.wav'):
                rate = None
                sig = None
                try:
                    (rate, sig) = wav.read(fullFilename)
                except ValueError as e:
                    sph2pipe = os.path.join(sph2pipe_dir, 'sph2pipe')
                    wav_name = fullFilename.replace('wv1', 'wav')
                    check_call(
                        ['./sph2pipe', '-f', 'rif', fullFilename, wav_name])
                    os.remove(fullFilename)
                    print(wav_name)
                    (rate, sig) = wav.read(wav_name)
                    os.remove(fullFilename)

                feat = calcfeat_delta_delta(sig,
                                            rate,
                                            win_length=win_len,
                                            win_step=win_step,
                                            feature_len=feature_len,
                                            mode=mode)
                feat = preprocessing.scale(feat)
                feat = np.transpose(feat)
                print(feat.shape)
                labelFilename = filenameNoSuffix + '.label'
                with open(labelFilename, 'r') as f:
                    characters = f.readline().strip().lower()
                targets = []
                if seq2seq is True:
                    targets.append(28)
                for c in characters:
                    if c == ' ':
                        targets.append(0)
                    elif c == "'":
                        targets.append(27)
                    else:
                        targets.append(ord(c) - 96)
                if seq2seq is True:
                    targets.append(29)
                targets = np.array(targets)
                print(targets)
                if save:
                    count += 1
                    if count % 1000 == 0:
                        dirid += 1
                    print('file index:', count)
                    print('dir index:', dirid)
                    label_dir = os.path.join(save_directory, level, name,
                                             str(dirid), 'label')
                    feat_dir = os.path.join(save_directory, level, name,
                                            str(dirid), mode)
                    if not os.path.isdir(label_dir):
                        os.makedirs(label_dir)
                    if not os.path.isdir(feat_dir):
                        os.makedirs(feat_dir)
                    featureFilename = os.path.join(
                        feat_dir,
                        filenameNoSuffix.split('/')[-1] + '.npy')
                    np.save(featureFilename, feat)
                    t_f = os.path.join(
                        label_dir,
                        filenameNoSuffix.split('/')[-1] + '.npy')
                    print(t_f)
                    np.save(t_f, targets)