def test_lm(): n_vocab = 3 n_units = 2 batchsize = 5 rnnlm_ch = lm_chainer.ClassifierWithState(lm_chainer.RNNLM(n_vocab, n_units)) rnnlm_th = lm_pytorch.ClassifierWithState(lm_pytorch.RNNLM(n_vocab, n_units)) transfer_lm(rnnlm_ch.predictor, rnnlm_th.predictor) import numpy # TODO(karita) implement weight transfer # numpy.testing.assert_equal(rnnlm_ch.predictor.embed.W.data, rnnlm_th.predictor.embed.weight.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l1.upward.b.data, rnnlm_th.predictor.l1.bias_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l1.upward.W.data, rnnlm_th.predictor.l1.weight_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l1.lateral.W.data, rnnlm_th.predictor.l1.weight_hh.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l2.upward.b.data, rnnlm_th.predictor.l2.bias_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l2.upward.W.data, rnnlm_th.predictor.l2.weight_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l2.lateral.W.data, rnnlm_th.predictor.l2.weight_hh.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.lo.b.data, rnnlm_th.predictor.lo.bias.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.lo.W.data, rnnlm_th.predictor.lo.weight.data.numpy()) # test prediction equality x = torch.from_numpy(numpy.random.randint(n_vocab, size=(batchsize))).long() with torch.no_grad(), chainer.no_backprop_mode(), chainer.using_config('train', False): rnnlm_th.predictor.eval() state_th, y_th = rnnlm_th.predictor(None, x.long()) state_ch, y_ch = rnnlm_ch.predictor(None, x.data.numpy()) for k in state_ch.keys(): print(k) print(state_th[k].data.numpy()) print(state_ch[k].data) numpy.testing.assert_allclose(state_th[k].data.numpy(), state_ch[k].data, 1e-5) print("y") print(y_th.data.numpy()) print(y_ch.data) numpy.testing.assert_allclose(y_th.data.numpy(), y_ch.data, 1e-5)
def test_recognition_results_with_lm(etype, m_str, text_idx1): const = 1e-4 numpy.random.seed(1) seq_true_texts = [["o", "iuiuiuiuiuiuiuiuo", "aiaiaiaiaiaiaiaio"], ["o", "uiuiuiuiuiuiuiuio", "aiaiaiaiaiaiaiaio"], ["o", "iuiuiuiuiuiuiuiuo", "aiaiaiaiaiaiaiaio"], ["o", "uiuiuiuiuiuiuiuio", "aiaiaiaiaiaiaiaio"]] # ctc_weight: 0.0 (attention), 0.5 (hybrid CTC/attention), 1.0 (CTC) for text_idx2, ctc_weight in enumerate([0.0, 0.5, 1.0]): seq_true_text = seq_true_texts[text_idx1][text_idx2] args = make_arg(etype=etype, rnnlm="dummy", ctc_weight=ctc_weight, lm_weight=0.3) m = importlib.import_module(m_str) model = m.Loss(m.E2E(40, 5, args), 0.5) if "_th" in m_str: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), 2, 10)) init_torch_weight_const(model, const) init_torch_weight_const(rnnlm, const) else: rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(args.char_list), 2, 10)) init_chainer_weight_const(model, const) init_chainer_weight_const(rnnlm, const) data = [("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), token=seq_true_text))] in_data = data[0][1]["feat"] nbest_hyps = model.predictor.recognize(in_data, args, args.char_list, rnnlm) y_hat = nbest_hyps[0]['yseq'][1:] seq_hat = [args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = data[0][1]["token"] assert seq_hat_text == seq_true_text
def recog(args): '''Run recognition''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ["CHAINER_SEED"] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) chainer.serializers.load_npz(args.model, model) # read rnnlm if args.rnnlm: rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(train_args.char_list), 650)) chainer.serializers.load_npz(args.rnnlm, rnnlm) else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(word_dict), 650)) chainer.serializers.load_npz(args.word_rnnlm, word_rnnlm) if rnnlm is not None: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name in recog_json.keys(): feat = kaldi_io_py.read_mat(recog_json[name]['input'][0]['feat']) logging.info('decoding ' + name) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['output'][0]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = dict() new_json[name]['utt2spk'] = recog_json[name]['utt2spk'] # add 1-best recognition results to json logging.debug("dump token id") out_dic = dict() for _key in recog_json[name]['output'][0]: out_dic[_key] = recog_json[name]['output'][0][_key] # TODO(karita) make consistent to chainer as idx[0] not idx out_dic['rec_tokenid'] = " ".join([str(idx[0]) for idx in y_hat]) logging.debug("dump token") out_dic['rec_token'] = " ".join(seq_hat) logging.debug("dump text") out_dic['rec_text'] = seq_hat_text new_json[name]['output'] = [out_dic] # TODO(nelson): Modify this part when saving more than 1 hyp is enabled # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] \ = " ".join([str(idx[0]) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_json }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ["CHAINER_SEED"] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config with open(args.model_conf, "rb") as f: logging.info('reading a model config file from' + args.model_conf) idim, odim, train_args = pickle.load(f) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) chainer.serializers.load_npz(args.model, model) # read rnnlm if args.rnnlm: rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(train_args.char_list), 650)) chainer.serializers.load_npz(args.rnnlm, rnnlm) else: rnnlm = None # prepare Kaldi reader reader = kaldi_io_py.read_mat_ark(args.recog_feat) # read json data with open(args.recog_label, 'rb') as f: recog_json = json.load(f)['utts'] new_json = {} for name, feat in reader: logging.info('decoding ' + name) if args.beam_size == 1: y_hat = e2e.recognize(feat, args, train_args.char_list, rnnlm) else: nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) # get 1best and remove sos y_hat = nbest_hyps[0]['yseq'][1:] y_true = map(int, recog_json[name]['tokenid'].split()) # print out decoding result seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_true = [train_args.char_list[int(idx)] for idx in y_true] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = "".join(seq_true).replace('<space>', ' ') logging.info("groundtruth[%s]: " + seq_true_text, name) logging.info("prediction [%s]: " + seq_hat_text, name) # copy old json info new_json[name] = recog_json[name] # add 1-best recognition results to json new_json[name]['rec_tokenid'] = " ".join( [str(idx[0]) for idx in y_hat]) new_json[name]['rec_token'] = " ".join(seq_hat) new_json[name]['rec_text'] = seq_hat_text # add n-best recognition results with scores if args.beam_size > 1 and len(nbest_hyps) > 1: for i, hyp in enumerate(nbest_hyps): y_hat = hyp['yseq'][1:] seq_hat = [train_args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') new_json[name]['rec_tokenid' + '[' + '{:05d}'.format(i) + ']'] \ = " ".join([str(idx[0]) for idx in y_hat]) new_json[name]['rec_token' + '[' + '{:05d}'.format(i) + ']'] = " ".join(seq_hat) new_json[name]['rec_text' + '[' + '{:05d}'.format(i) + ']'] = seq_hat_text new_json[name]['score' + '[' + '{:05d}'.format(i) + ']'] = hyp['score'] # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_json }, indent=4, sort_keys=True).encode('utf_8'))
def recog(args): '''Run recognition''' # display chainer version logging.info('chainer version = ' + chainer.__version__) # seed setting (chainer seed may not need it) os.environ["CHAINER_SEED"] = str(args.seed) logging.info('chainer seed = ' + os.environ['CHAINER_SEED']) # read training config idim, odim, train_args = get_model_conf(args.model, args.model_conf) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) # specify model architecture logging.info('reading model parameters from ' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) chainer_load(args.model, model) # read rnnlm if args.rnnlm: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(train_args.char_list), rnnlm_args.unit)) chainer_load(args.rnnlm, rnnlm) else: rnnlm = None if args.word_rnnlm: if not args.word_dict: logging.error( 'word dictionary file is not specified for the word RNNLM.') sys.exit(1) rnnlm_args = get_model_conf(args.word_rnnlm, args.rnnlm_conf) word_dict = load_labeldict(args.word_dict) char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(word_dict), rnnlm_args.unit)) chainer_load(args.word_rnnlm, word_rnnlm) if rnnlm is not None: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_chainer.ClassifierWithState( extlm_chainer.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] # decode each utterance new_js = {} with chainer.no_backprop_mode(): for idx, name in enumerate(js.keys(), 1): logging.info('(%d/%d) decoding ' + name, idx, len(js.keys())) feat = kaldi_io_py.read_mat(js[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) new_js[name] = add_results_to_json(js[name], nbest_hyps, train_args.char_list) # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True).encode('utf_8'))