def main(args): if args.use_ivectors: args.input_dim = args.input_dim + args.ivector_dim input_data = T.ftensor3('input_data') input_mask = T.fmatrix('input_mask') network = models.deep_bidir_lstm_alex(input_var=input_data, mask_var=input_mask, input_dim=args.input_dim, num_units_list=[args.num_nodes] * args.num_layers, output_dim=args.output_dim) network_params = get_all_params(network, trainable=True) print('Loading Parameters...', file=sys.stderr) if args.model: with open(args.model, 'rb') as f: pretrain_network_params_val, pretrain_update_params_val, \ pretrain_total_epoch_cnt = pickle.load(f) set_model_param_value(network_params, pretrain_network_params_val) else: print('Must specfiy network to load', file=sys.stderr) sys.exit(1) ff_fn = ff(input_data=input_data, input_mask=input_mask, network=network) feat_stream = fuel_utils.get_feat_stream(args.data_path, args.dataset, args.batch_size, use_ivectors=args.use_ivectors) uttid_stream = fuel_utils.get_uttid_stream(args.data_path, args.dataset, args.batch_size) writer = kaldi_io.BaseFloatMatrixWriter(args.wxfilename) for batch_idx, (feat_batch, uttid_batch) in enumerate( zip(feat_stream.get_epoch_iterator(), uttid_stream.get_epoch_iterator())): input_data, input_mask = feat_batch feat_lens = input_mask.sum(axis=1) print('Feed-forwarding...', file=sys.stderr) net_output = ff_fn(input_data, input_mask) print('Writing outputs...', file=sys.stderr) for out_idx, (output, uttid) in enumerate(zip(net_output[0], uttid_batch[0])): valid_len = feat_lens[out_idx] writer.write(uttid.encode('ascii'), numpy.log(output[:valid_len])) writer.close()
def main(options): input_data = T.ftensor3('input_data') input_mask = T.fmatrix('input_mask') target_data = T.imatrix('target_data') target_mask = T.fmatrix('target_mask') network = build_network(input_data=input_data, input_mask=input_mask, num_inputs=options['num_inputs'], num_units_list=options['num_units_list'], num_outputs=options['num_outputs'], dropout_ratio=options['dropout_ratio'], use_layer_norm=options['use_layer_norm'], learn_init=True, grad_clipping=1.0) network_params = get_all_params(network, trainable=True) if options['reload_model']: print('Loading model...') pretrain_network_params_val, pretrain_update_params_val, pretrain_total_batch_cnt = pickle.load(open(options['reload_model'], 'rb')) set_model_param_value(network_params, pretrain_network_params_val) else: print 'Must specfiy network to load' sys.exit(1) ff_fn = ff(input_data=input_data, input_mask=input_mask, network=network) feat_stream = get_feat_stream(options['data_path'], options['dataset'], options['batch_size']) uttid_stream = get_uttid_stream(options['data_path'], options['dataset'], options['batch_size']) writer = kaldi_io.BaseFloatMatrixWriter(options['save_path']) for batch_idx, (feat_batch, uttid_batch) in enumerate(zip(feat_stream.get_epoch_iterator(), uttid_stream.get_epoch_iterator())): print 'Processing batch {}'.format(batch_idx) input_data, input_mask = feat_batch net_output = ff_fn(input_data, input_mask) for output, uttid in zip(net_output[0], uttid_batch[0]): writer.write(uttid.encode('ascii'), output) writer.close()
if args.padding_left is not None: padding_left = int(args.padding_left) padding_right = padding if args.padding_right is not None: padding_right = int(args.padding_right) if padding_left < 0 or padding_right < 0: logging.error("Padding can't be negative!") sys.exit(1) count = 0 logging.info("Padding with %d in the left and %d on the right", padding_left, padding_right) #should use with, but if something happens the files will get closed anyways reader = kaldi_io.SequentialBaseFloatMatrixReader(args.in_rxfilename) writer = kaldi_io.BaseFloatMatrixWriter(args.out_wxfilename) size_writer = None if args.orig_size_wxfilename is not None: size_writer = kaldi_io.PythonWriter(args.orig_size_wxfilename) for name, value in reader: count += 1 if padding_left + padding_right == 0: padded = value else: num_frames, frame_dim = value.shape padded = np.empty(shape=(num_frames + padding_left + padding_right, frame_dim), dtype=value.dtype)
config.inter_op_parallelism_threads = 1 keras.backend.tensorflow_backend.set_session(tf.Session(config=config)) if __name__ == '__main__': model = sys.argv[1] left_context = int(sys.argv[2]) right_context = int(sys.argv[3]) if not model.endswith('.h5'): raise TypeError( 'Unsupported model type. Please use h5 format. Update Keras if needed' ) m = keras.models.load_model(model) with kaldi_io.SequentialBaseFloatMatrixReader("ark:-") as arkIn, \ kaldi_io.BaseFloatMatrixWriter("ark,t:-") as arkOut: signal(SIGPIPE, SIG_DFL) for utt, utt_feats in arkIn: feats = np.zeros( (utt_feats.shape[0] + left_context + right_context, utt_feats.shape[1])) feats[:left_context, :] = utt_feats[0] feats[-right_context:, :] = utt_feats[-1] feats[left_context:-right_context, :] = utt_feats feats = np.expand_dims(feats, 0) logProbMat = np.log(m.predict(feats)[0]) logProbMat[logProbMat == -np.inf] = -100 arkOut.write(utt, logProbMat)
def main(): parser = get_parser() args = parser.parse_args() config, model = get_config_and_model(args) dataset = config['Datasets'][args.subset] # Remove the training graph generator from the dataset. The testing # data may have characters for which we don't have CD symbols # and the graphs are not needed to compute the logits dataset.dataset.graph_gen = None owriter = kaldi_io.BaseFloatMatrixWriter(args.out_wspec) for j, batch in enumerate(dataset): sys.stderr.write("Processing batch %d/%d\n" % (j + 1, len(dataset))) feature_lens = Variable(batch['features'][1]) features = Variable(batch['features'][0]) speakers = batch['spkids'] if Globals.cuda: features = features.cuda() with torch.no_grad(): encoded, encoded_lens = model.encoder(features, feature_lens, speakers) # t x bsz x num_classes logprobs = model.decoder.logits(encoded, encoded_lens) logprobs = logprobs.data.cpu().numpy() # transfer probability mass from hash `#` to blank `<pad>` if args.transfer_hash_prob: blank_probs = np.exp(logprobs[:, :, 0]) hash_probs = np.exp(logprobs[:, :, 3]) blank_probs += hash_probs - EPSILON hash_probs = EPSILON logprobs[:, :, 0] = np.log(blank_probs) logprobs[:, :, 3] = np.log(hash_probs) t, bsz, num_classes = logprobs.shape if args.imitate_biphones: logprobs = np.tile(logprobs, (1, 1, num_classes)) num_classes = num_classes**2 if not args.block_normalize: num_mono = int(np.round(num_classes**0.5)) z = np.exp(logprobs).sum(axis=2, keepdims=True) # This epsilon has to be really tiny, # otherwise not normalizes properly logprobs -= np.log(z + EPSILON) elif args.block_normalize: num_mono = int(np.round(num_classes**0.5)) z = np.exp(logprobs).reshape(t, bsz, num_mono, num_mono) z = z.sum(axis=3).repeat(num_mono, axis=2) logprobs -= np.log(z + EPSILON) elif args.block_marginalize: print("Block-marginalizing probabilities.") num_symbols = int(np.round(num_classes**0.5)) probs = np.exp(logprobs) probs = ( probs.reshape(t, bsz, num_symbols, num_symbols).sum(axis=2) / num_symbols) logprobs = np.log(probs) assert not np.any(np.isnan(logprobs)) for i in np.argsort(batch['uttids']): example_len = encoded_lens[i] owriter[batch['uttids'][i]] = logprobs[:example_len, i, :]
model_name = open(args.model_file, 'r').read() nnet.read(model_name) if args.prior_counts is not None: prior_counts = np.genfromtxt(args.prior_counts) priors = prior_counts / prior_counts.sum() log_priors = np.log(priors) # here we are doing context window and feature normalization feats += ' splice-feats --print-args=false --left-context='+str(splice) + \ ' --right-context='+str(splice) + ' ark:- ark:-|' feats += ' apply-cmvn --print-args=false --norm-vars=true ' + srcdir + '/cmvn.mat ark:- ark:- |' count = 0 reader = kaldi_io.SequentialBaseFloatMatrixReader(feats) writer = kaldi_io.BaseFloatMatrixWriter('ark:-') for uid, feats in reader: nnet_out = nnet.predict(feats, no_softmax=args.no_softmax) if args.apply_log: nnet_out = np.log(nnet_out) if args.prior_counts is not None: log_likes = nnet_out - log_priors nnet_out = log_likes writer.write(uid, nnet_out) count += 1 if args.verbose and count % 10 == 0: logger.info("LOG (nnet_forward.py) %d utterances processed" % count)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config', type=str, default=None, help='config file (Kaldi format)') parser.add_argument( '--frame-length', type=float, default=25, help='Frame length in milliseconds (float, default = 25)') parser.add_argument( '--frame-shift', type=float, default=10, help='Frame shift in milliseconds (float, default = 10)') parser.add_argument( '--window-type', type=str, default='hamming', help= 'Type of window ("hamming"|"hanning") (string, default = "hamming")') parser.add_argument( '--complex-format', type=str, default='real-imaginary', help='Format of complex numbers ("real-imaginary"|"magnitude-phase") ' + '(string, default = "real-imaginary")') parser.add_argument('wav_scp', metavar='IN', type=str, help='WAV scp files (do not accept command line)') parser.add_argument('feats_wspecifier', metavar='OUT', type=str, help='<feats-wspecifier>') args = parser.parse_args() # config parser without section if args.config is not None: ini_str = '[root]\n' + open(args.config, 'r').read() ini_str = ini_str.replace('--', '').replace( '-', '_') # remove '--' in the kaldi config ini_fp = StringIO.StringIO(ini_str) config = ConfigParser.RawConfigParser() config.readfp(ini_fp) # set config file values as defaults parser.set_defaults(**dict(config.items('root'))) args = parser.parse_args() # logging info logging.basicConfig( level=logging.INFO, format="%(asctime)s (%(module)s:%(lineno)d) %(levelname)s: %(message)s" ) for arg in vars(args): logging.info(arg + ": " + str(getattr(args, arg))) with open(args.wav_scp, 'r') as f: scp = [x.split() for x in f.readlines()] # list of [utt_id, wav_name] writer = kaldi_io.BaseFloatMatrixWriter(args.feats_wspecifier) for x in scp: if len(x) != 2: sys.exit("wav.scp must be (utt_id, WAV)") (rate, sig) = wav.read(x[1]) feat = cspec(sig, samplerate=rate) if args.complex_format is 'real-imaginary': feat = np.hstack((feat.real, feat.imag)) elif args.complex_format is 'magnitude-phase': feat = np.hstack((feat.absolute, feat.angles)) else: sys.exit("do not support a complex number format of " + args.complex_format) writer.write(x[0], feat)
def start(self): self.owriter = kaldi_io.BaseFloatMatrixWriter('ark:'+self.filename)