num_hidden_proj=num_hidden_proj) data_names = [x[0] for x in data_test.provide_data] label_names = ['softmax_label'] module = mx.mod.Module(sym, context=contexts, data_names=data_names, label_names=label_names) # set the parameters module.bind(data_shapes=data_test.provide_data, label_shapes=None, for_training=False) module.set_params(arg_params=arg_params, aux_params=aux_params) #kaldiWriter = KaldiWriteOut(None, out_file) #kaldiWriter.open_or_fd() kaldiWriter = KaldiWriteOut(out_dir + "/feats.scp", out_dir + "/feats.ark") kaldiWriter.open() for preds, i_batch, batch in module.iter_predict(data_test): label = batch.label[0].asnumpy().astype('int32') posteriors = preds[0].asnumpy().astype('float32') # copy over states if decoding_method == METHOD_BUCKETING: for (ind, utt) in enumerate(batch.utt_id): if utt != "GAP_UTT": posteriors = np.log(posteriors[:label[0][0], 1:] + 1e-20) - np.log( data_train.label_mean).T kaldiWriter.write(utt, posteriors) elif decoding_method == METHOD_SIMPLE: for (ind, utt) in enumerate(batch.utt_id): if utt != "GAP_UTT":
num_label=label_dim, output_states=True, num_hidden_proj=num_hidden_proj) data_names = [x[0] for x in data_test.provide_data] label_names = ['softmax_label'] module = mx.mod.Module(sym, context=contexts, data_names=data_names, label_names=label_names) # set the parameters module.bind(data_shapes=data_test.provide_data, label_shapes=None, for_training=False) module.set_params(arg_params=arg_params, aux_params=aux_params) kaldiWriter = KaldiWriteOut(None, out_file) kaldiWriter.open_or_fd() for preds, i_batch, batch in module.iter_predict(data_test): #pred_label = np.array(preds[0].asnumpy().argmax(axis=1)) label = batch.label[0].asnumpy().astype('int32') posteriors = preds[0].asnumpy().astype('float32')[0] #print np.sum(posteriors[1][:]) # copy over states if decoding_method == METHOD_BUCKETING: for (ind, utt) in enumerate(batch.utt_id): if utt != "GAP_UTT": #print sum(posteriors[0,:]) posteriors = np.log(posteriors[:label[0][0], 1:] + 1e-20) - np.log( data_train.label_mean).T
truncate_len=20 data_test = TruncatedSentenceIter(test_sets, batch_size, init_states, truncate_len, feat_dim=feat_dim, do_shuffling=False, pad_zeros=True, has_label=True) sym = lstm_unroll(num_lstm_layer, truncate_len, feat_dim, num_hidden=num_hidden, num_label=label_dim, output_states=True, num_hidden_proj=num_hidden_proj) data_names = [x[0] for x in data_test.provide_data] label_names = ['softmax_label'] module = mx.mod.Module(sym, context=contexts, data_names=data_names, label_names=label_names) # set the parameters module.bind(data_shapes=data_test.provide_data, label_shapes=None, for_training=False) module.set_params(arg_params=arg_params, aux_params=aux_params) kaldiWriter = KaldiWriteOut(None, out_file) kaldiWriter.open_or_fd() for preds, i_batch, batch in module.iter_predict(data_test): #pred_label = np.array(preds[0].asnumpy().argmax(axis=1)) label = batch.label[0].asnumpy().astype('int32') posteriors = preds[0].asnumpy().astype('float32')[0] #print np.sum(posteriors[1][:]) # copy over states if decoding_method == METHOD_BUCKETING: for (ind, utt) in enumerate(batch.utt_id): if utt != "GAP_UTT": #print sum(posteriors[0,:]) posteriors = np.log(posteriors[:label[0][0],1:] + 1e-20) - np.log(data_train.label_mean).T kaldiWriter.write(utt, posteriors) elif decoding_method == METHOD_SIMPLE:
init_states, test_sets = prepare_data(args) state_names = [x[0] for x in init_states] batch_size = args.config.getint('train', 'batch_size') num_hidden = args.config.getint('arch', 'num_hidden') num_lstm_layer = args.config.getint('arch', 'num_lstm_layer') feat_dim = args.config.getint('data', 'xdim') label_dim = args.config.getint('data', 'ydim') out_file = args.config.get('data', 'out_file') num_epoch = args.config.getint('train', 'num_epoch') model_name = get_checkpoint_path(args) logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s') # load the model label_mean = np.zeros((label_dim,1), dtype='float32') data_test = TruncatedSentenceIter(test_sets, batch_size, init_states, 20, feat_dim=feat_dim, do_shuffling=False, pad_zeros=True, has_label=True) for i, batch in enumerate(data_test.labels): hist, edges = np.histogram(batch.flat, bins=range(0,label_dim+1)) label_mean += hist.reshape(label_dim,1) kaldiWriter = KaldiWriteOut(None, out_file) kaldiWriter.open_or_fd() kaldiWriter.write("label_mean", label_mean) args.config.write(sys.stderr)
num_hidden = args.config.getint('arch', 'num_hidden') num_lstm_layer = args.config.getint('arch', 'num_lstm_layer') feat_dim = args.config.getint('data', 'xdim') label_dim = args.config.getint('data', 'ydim') out_file = args.config.get('data', 'out_file') num_epoch = args.config.getint('train', 'num_epoch') model_name = get_checkpoint_path(args) logging.basicConfig(level=logging.DEBUG, format='%(asctime)-15s %(message)s') # load the model label_mean = np.zeros((label_dim, 1), dtype='float32') data_test = TruncatedSentenceIter(test_sets, batch_size, init_states, 20, feat_dim=feat_dim, do_shuffling=False, pad_zeros=True, has_label=True) for i, batch in enumerate(data_test.labels): hist, edges = np.histogram(batch.flat, bins=range(0, label_dim + 1)) label_mean += hist.reshape(label_dim, 1) kaldiWriter = KaldiWriteOut(None, out_file) kaldiWriter.open_or_fd() kaldiWriter.write("label_mean", label_mean) args.config.write(sys.stderr)