예제 #1
0
import h5py
from hyp_data_reader import HypDataReader
import numpy as np
"""
Author: Jeff Lai, Jesus 
JHU hltcoe 2017 

Read mfcc data from 'mfcc_cmvn.h5' to train, validate, test a lstm. Using h5py library and Jesus' hyp_data_reader.
"""

ark2h5_08_16k = HypDataReader(
    '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_08_16k.h5'
)  #an instance of hyp_data_reader
ark2h5_08_8k = HypDataReader(
    '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_08_8k.h5')
ark2h5_10_16k = HypDataReader(
    '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_10_16k.h5')
ark2h5_10_8k = HypDataReader(
    '/export/b13/jlai/scale/super_res/8_16/mfcc/ark2h5_10_8k.h5')


def test():
    key_08_8k = sorted(ark2h5_08_8k.get_datasets())
    key_08_16k = sorted(ark2h5_08_16k.get_datasets())
    key_10_8k = sorted(ark2h5_10_8k.get_datasets())
    key_10_16k = sorted(ark2h5_10_16k.get_datasets())
    count = 0
    print(len(key_08_8k))
    print(len(key_08_16k))
    print(len(key_10_8k))
    print(len(key_10_16k))
예제 #2
0
import h5py
from hyp_data_reader import HypDataReader
import numpy as np
"""
Author: Jeff Lai, Jesus 
JHU hltcoe 2017 

Read mfcc data from 'mfcc_cmvn.h5' to train, validate, test a lstm. Using h5py library and Jesus' hyp_data_reader.
"""

mfcc_h5 = HypDataReader('/export/b14/jlai/scale/vad/kaldi/mfcc_cmvn.h5'
                        )  #an instance of hyp_data_reader


def get_utt():
    """	
	-returns a list of .wav file names 
	"""
    return mfcc_h5.get_datasets()


def get_frame():
    """
	-returns a dictionary with the .wav file name as key and its mfcc (frame*dimension) matrix as value 
	"""
    all_frames = {}
    keys = get_utt()
    for i, matrix_id in enumerate(mfcc_h5.read(keys)):
        matrix = np.asarray(matrix_id)
        all_frames[keys[i]] = matrix
예제 #3
0
import h5py
from hyp_data_reader import HypDataReader
from dev2vad import vad_import
import numpy as np
import math
"""
Author: Jeff Lai, Jesus 
JHU hltcoe 2017 

Read mfcc data from 'mfcc_cmvn.h5' to train, validate, test a lstm. Using h5py library and Jesus' hyp_data_reader.

*modification: 
return a generator for mfcc and vad respectively. Speeds up the process. 
"""

mfcc_h5 = HypDataReader('/export/b13/jlai/scale/vad/open_sat/mfcc_cmvn.h5'
                        )  #an instance of hyp_data_reader


def get_utt():
    """	
	-returns a list of .wav file names 
	"""
    return mfcc_h5.get_datasets()


def get_frame():
    """
	-returns a dictionary with the .wav file name as key and its mfcc (frame*dimension) matrix as value 
	"""
    all_frames = {}
    keys = get_utt()