def test_multiple_mlf_files(): os.chdir(data_path) feature_dim = 33 num_classes = 132 context = 2 test_mlf_path = "../../../../Tests/EndToEndTests/Speech/Data/glob_00001.mlf" features_file = "glob_0000.scp" label_files = ["glob_0000.mlf", test_mlf_path] label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=label_files))) # Make sure we can read at least one minibatch. mbsource = MinibatchSource([fd, ld]) mbsource.next_minibatch(1) os.chdir(abs_path)
def test_htk_deserializers(): mbsize = 640 epoch_size = 1000 * mbsize lr = [0.001] feature_dim = 33 num_classes = 132 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" labels_file = "glob_0000.mlf" label_mapping_file = "state.list" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFDeserializer( label_mapping_file, StreamDefs( awesome_labels=StreamDef(shape=num_classes, mlf=labels_file))) reader = MinibatchSource([fd, ld]) features = C.input_variable(((2 * context + 1) * feature_dim)) labels = C.input_variable((num_classes)) model = Sequential( [For(range(3), lambda: Recurrence(LSTM(256))), Dense(num_classes)]) z = model(features) ce = C.cross_entropy_with_softmax(z, labels) errs = C.classification_error(z, labels) learner = C.adam_sgd(z.parameters, lr=C.learning_rate_schedule(lr, C.UnitType.sample, epoch_size), momentum=C.momentum_as_time_constant_schedule(1000), low_memory=True, gradient_clipping_threshold_per_sample=15, gradient_clipping_with_truncation=True) trainer = C.Trainer(z, (ce, errs), learner) input_map = { features: reader.streams.amazing_features, labels: reader.streams.awesome_labels } pp = C.ProgressPrinter(freq=0) # just run and verify it doesn't crash for i in range(3): mb_data = reader.next_minibatch(mbsize, input_map=input_map) trainer.train_minibatch(mb_data) pp.update_with_trainer(trainer, with_metric=True) assert True os.chdir(abs_path)
def create_mb_source(features_file, labels_file, label_mapping_filem, total_number_of_samples): for file_name in [features_file, labels_file, label_mapping_file]: if not os.path.exists(file_name): raise RuntimeError("File '%s' does not exist. Please check that datadir argument is set correctly." % (file_name)) fd = HTKFeatureDeserializer(StreamDefs( amazing_features = StreamDef(shape=feature_dim, context=(context,context), scp=features_file))) ld = HTKMLFDeserializer(label_mapping_file, StreamDefs( awesome_labels = StreamDef(shape=num_classes, mlf=labels_file))) # Enabling BPTT with truncated_length > 0 return MinibatchSource([fd,ld], truncation_length=250, epoch_size=total_number_of_samples)
def decode_model(use_gpu=True, gpu_id=0): # use GPU or CPU according to parameters try_set_default_device(gpu(gpu_id) if use_gpu else cpu()) model_dnn = load_model("./model/speech_enhancement.model") features_file = "./test_normed.scp" feature_dim = 257 test_reader = MinibatchSource(HTKFeatureDeserializer(StreamDefs( amazing_features=StreamDef( shape=feature_dim, context=(3, 3), scp=features_file))), randomize=False, frame_mode=False) eval_input_map = {input: test_reader.streams.amazing_features} f = open(features_file) line = f.readline() while line: temp_input_path = line.split(']')[0] mb_size = temp_input_path.split(',')[-1] mb_size = int(mb_size) + 1 noisy_fea = test_reader.next_minibatch( mb_size, input_map=eval_input_map) real_noisy_fea = noisy_fea[input].data node_in_graph = model_dnn.find_by_name('irm') output_nodes = combine([node_in_graph.owner]) out_noisy_fea = output_nodes.eval(real_noisy_fea) # out_noisy_fea = as_composite(model_dnn.output1[0].owner).eval( # real_noisy_fea) out_SE_noisy_fea = np.concatenate((out_noisy_fea), axis=0) out_file_path = line.split('=')[0] out_file_name = os.path.join('./enhanced_norm_fea_mat', out_file_path) out_file_fullpath = os.path.split(out_file_name)[0] # print (out_file_fullpath) if not os.path.exists(out_file_fullpath): os.makedirs(out_file_fullpath) sio.savemat(out_file_name, {'SE': out_SE_noisy_fea}) line = f.readline() f.close()
def test_multiple_streams_in_htk(): feature_dim = 33 context = 2 os.chdir(data_path) features_file = "glob_0000.scp" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef(shape=feature_dim, context=(context, context), scp=features_file), amazing_features2=StreamDef(shape=feature_dim, context=(context, context), scp=features_file))) mbs = MinibatchSource([fd]) mb = mbs.next_minibatch(1) assert (mb[mbs.streams.amazing_features].asarray() == mb[ mbs.streams.amazing_features2].asarray()).all() os.chdir(abs_path)
def test_mlf_binary_files(): os.chdir(data_path) feature_dim = 33 num_classes = 132 context = 2 features_file = "glob_0000.scp" fd = HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(context, context), scp=features_file))) ld = HTKMLFBinaryDeserializer( StreamDefs(awesome_labels=StreamDef(shape=num_classes, mlf=e2e_data_path + "mlf2.bin"))) # Make sure we can read at least one minibatch. mbsource = MinibatchSource([fd, ld]) mbsource.next_minibatch(1) os.chdir(abs_path)
def decode_model(features_file, irm_mat_dir, feature_dim, use_gpu=True, gpu_id=0): """Applies model to LPS features to generate ideal ratio mask. Parameters ---------- features_file : str Path to HTK script file for chunks of LPS features to be processed. irm_mat_dir : str Path to output directory for ``.mat`` files containing ideal ratio masks. feature_dim : int Feature dimensionality. Needed to parse HTK binary file containing features. use_gpu : bool, optional If True and GPU is available, perform all processing on GPU. (Default: True) gpu_id : int, optional Id of GPU on which to do computation. (Default: 0) """ if not os.path.exists(irm_mat_dir): os.makedirs(irm_mat_dir) # Load model. with wurlitzer.pipes() as (stdout, stderr): try_set_default_device(gpu(gpu_id) if use_gpu else cpu()) model_dnn = load_model(MODELF) # Compute ideal ratio masks for all chunks of LPS features specified in # the script file and save as .mat files in irm_mat_dir. with wurlitzer.pipes() as (stdout, stderr): test_reader = MinibatchSource(HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(3, 3), scp=features_file))), randomize=False, frame_mode=False, trace_level=0) eval_input_map = {input: test_reader.streams.amazing_features} with open(features_file, 'r') as f: for line in f: # Parse line of script file to get id for chunk and location of # corresponding LPS features. Each line has the format: # # {CHUNK_ID}={PATH_TO_HTK_BIN}[{START_FRAME_INDEX},{END_FRAME_INDEX}] line = line.strip() chunk_id, htk_bin_path, start_ind, end_ind = re.match( r'(\S+)=(\S+)\[(\d+),(\d+)\]$', line).groups() start_ind = int(start_ind) end_ind = int(end_ind) mb_size = end_ind - start_ind + 1 # Determine IRM features for frames in chunk. noisy_fea = test_reader.next_minibatch(mb_size, input_map=eval_input_map) real_noisy_fea = noisy_fea[input].data node_name = b'irm' if PY2 else 'irm' node_in_graph = model_dnn.find_by_name(node_name) output_nodes = combine([node_in_graph.owner]) with wurlitzer.pipes() as (stdout, stderr): irm = output_nodes.eval(real_noisy_fea) if len(irm) == 1: irm = irm[0] else: raise Exception("Unexpected IRM shape: " + str(np.shape(irm))) # Write .mat file. sio.savemat(os.path.join(irm_mat_dir, chunk_id + '.mat'), {'IRM': irm})
def decode_model(features_file, irm_mat_dir, feature_dim, use_gpu=True, gpu_id=0, mode=1, model_select='400h', stage_select=3): """Applies model to LPS features to generate ideal ratio mask. Parameters ---------- features_file : str Path to HTK script file for chunks of LPS features to be processed. irm_mat_dir : str Path to output directory for ``.mat`` files containing ideal ratio masks. feature_dim : int Feature dimensionality. Needed to parse HTK binary file containing features. use_gpu : bool, optional If True and GPU is available, perform all processing on GPU. (Default: True) gpu_id : int, optional Id of GPU on which to do computation. (Default: 0) """ if not os.path.exists(irm_mat_dir): os.makedirs(irm_mat_dir) model_select = str(model_select) # Load model. with wurlitzer.pipes() as (stdout, stderr): try_set_default_device(gpu(gpu_id) if use_gpu else cpu()) if model_select.lower() == '400h': MODELF = os.path.join(HERE, "model", "speech_enhancement_400h.model") model_dnn = load_model(MODELF) elif model_select.lower() == '1000h': MODELF = os.path.join(HERE, "model", "speech_enhancement_1000h.model") model_dnn = load_model(MODELF) # Compute ideal ratio masks for all chunks of LPS features specified in # the script file and save as .mat files in irm_mat_dir. with wurlitzer.pipes() as (stdout, stderr): test_reader = MinibatchSource(HTKFeatureDeserializer( StreamDefs(amazing_features=StreamDef( shape=feature_dim, context=(3, 3), scp=features_file))), randomize=False, frame_mode=False, trace_level=0) eval_input_map = {input: test_reader.streams.amazing_features} with open(features_file, 'r') as f: for line in f: # Parse line of script file to get id for chunk and location of # corresponding LPS features. Each line has the format: # # {CHUNK_ID}={PATH_TO_HTK_BIN}[{START_FRAME_INDEX},{END_FRAME_INDEX}] line = line.strip() chunk_id, htk_bin_path, start_ind, end_ind = re.match( r'(\S+)=(\S+)\[(\d+),(\d+)\]$', line).groups() start_ind = int(start_ind) end_ind = int(end_ind) mb_size = end_ind - start_ind + 1 # Determine IRM features for frames in chunk. noisy_fea = test_reader.next_minibatch(mb_size, input_map=eval_input_map) real_noisy_fea = noisy_fea[input].data if model_select.lower() == '400h': node_names = [ b'irm' if PY2 else 'irm', b'lps' if PY2 else 'lps' ] elif model_select.lower() == '1000h': node_names = [ b'irm_s' + str(stage_select) if PY2 else 'irm_s' + str(stage_select), b'lps_s' + str(stage_select) if PY2 else 'lps_s' + str(stage_select) ] else: utils.error('Invalid parameter of model_select!!!!!') outputs_dict = {} for node_name in node_names: node_in_graph = model_dnn.find_by_name(node_name) output_nodes = combine([node_in_graph.owner]) with wurlitzer.pipes() as (stdout, stderr): value = output_nodes.eval(real_noisy_fea) value = np.concatenate((value), axis=0) outputs_dict[node_name] = value if model_select.lower() == '400h': sio.savemat(os.path.join(irm_mat_dir, chunk_id + '.mat'), { 'IRM': outputs_dict['irm'], 'LPS': outputs_dict['lps'] }) elif model_select.lower() == '1000h': sio.savemat( os.path.join(irm_mat_dir, chunk_id + '.mat'), { 'IRM': outputs_dict['irm_s' + str(stage_select)], 'LPS': outputs_dict['lps_s' + str(stage_select)] })
import cntk as C import numpy as np from cntk.io import MinibatchSource, HTKFeatureDeserializer, StreamDef, StreamDefs from cntk import load_model, combine from cntk.device import gpu,try_set_default_device,cpu from cntk.ops import as_composite import scipy.io as sio import os import sys GPU_id = int(sys.argv[1]) try_set_default_device(gpu(GPU_id)) model_dnn= load_model("./model/speech_enhancement.model") features_file = "./test_normed.scp" feature_dim = 257 test_reader = MinibatchSource(HTKFeatureDeserializer(StreamDefs( amazing_features = StreamDef(shape=feature_dim,context=(3,3), scp=features_file))),randomize = False,frame_mode=False) eval_input_map = {input :test_reader.streams.amazing_features} f = open(features_file) line = f.readline() while line: temp_input_path = line.split(']')[0] mb_size = temp_input_path.split(',')[-1] mb_size = int(mb_size) + 1 noisy_fea=test_reader.next_minibatch(mb_size, input_map = eval_input_map) real_noisy_fea=noisy_fea[input].data node_in_graph = model_dnn.find_by_name('irm') output_nodes = combine([node_in_graph.owner]) out_noisy_fea = output_nodes.eval(real_noisy_fea) # out_noisy_fea = as_composite(model_dnn.output1[0].owner).eval(real_noisy_fea)