import h5py from hyp_data_reader import HypDataReader import numpy as np """ Author: Jeff Lai, Jesus JHU hltcoe 2017 Read mfcc data from 'mfcc_cmvn.h5' to train, validate, test a lstm. Using h5py library and Jesus' hyp_data_reader. """ ark2h5_08_16k = HypDataReader( '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_08_16k.h5' ) #an instance of hyp_data_reader ark2h5_08_8k = HypDataReader( '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_08_8k.h5') ark2h5_10_16k = HypDataReader( '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_10_16k.h5') ark2h5_10_8k = HypDataReader( '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_10_8k.h5') def test(): key_08_8k = sorted(ark2h5_08_8k.get_datasets()) key_08_16k = sorted(ark2h5_08_16k.get_datasets()) key_10_8k = sorted(ark2h5_10_8k.get_datasets()) key_10_16k = sorted(ark2h5_10_16k.get_datasets()) count = 0 print(len(key_08_8k)) print(len(key_08_16k)) print(len(key_10_8k)) print(len(key_10_16k))
import h5py from hyp_data_reader import HypDataReader import numpy as np """ Author: Jeff Lai, Jesus JHU hltcoe 2017 Read mfcc data from 'mfcc_cmvn.h5' to train, validate, test a lstm. Using h5py library and Jesus' hyp_data_reader. """ mfcc_h5 = HypDataReader('/export/b14/jlai/scale/vad/kaldi/mfcc_cmvn.h5' ) #an instance of hyp_data_reader def get_utt(): """ -returns a list of .wav file names """ return mfcc_h5.get_datasets() def get_frame(): """ -returns a dictionary with the .wav file name as key and its mfcc (frame*dimension) matrix as value """ all_frames = {} keys = get_utt() for i, matrix_id in enumerate(mfcc_h5.read(keys)): matrix = np.asarray(matrix_id) all_frames[keys[i]] = matrix
import h5py from hyp_data_reader import HypDataReader from dev2vad import vad_import import numpy as np import math """ Author: Jeff Lai, Jesus JHU hltcoe 2017 Read mfcc data from 'mfcc_cmvn.h5' to train, validate, test a lstm. Using h5py library and Jesus' hyp_data_reader. *modification: return a generator for mfcc and vad respectively. Speeds up the process. """ mfcc_h5 = HypDataReader('/export/b13/jlai/scale/vad/open_sat/mfcc_cmvn.h5' ) #an instance of hyp_data_reader def get_utt(): """ -returns a list of .wav file names """ return mfcc_h5.get_datasets() def get_frame(): """ -returns a dictionary with the .wav file name as key and its mfcc (frame*dimension) matrix as value """ all_frames = {} keys = get_utt()