def test_recognition_results_with_lm(etype, m_str, text_idx1): const = 1e-4 numpy.random.seed(1) seq_true_texts = [["o", "iuiuiuiuiuiuiuiuo", "aiaiaiaiaiaiaiaio"], ["o", "uiuiuiuiuiuiuiuio", "aiaiaiaiaiaiaiaio"], ["o", "iuiuiuiuiuiuiuiuo", "aiaiaiaiaiaiaiaio"], ["o", "uiuiuiuiuiuiuiuio", "aiaiaiaiaiaiaiaio"]] # ctc_weight: 0.0 (attention), 0.5 (hybrid CTC/attention), 1.0 (CTC) for text_idx2, ctc_weight in enumerate([0.0, 0.5, 1.0]): seq_true_text = seq_true_texts[text_idx1][text_idx2] args = make_arg(etype=etype, rnnlm="dummy", ctc_weight=ctc_weight, lm_weight=0.3) m = importlib.import_module(m_str) model = m.Loss(m.E2E(40, 5, args), 0.5) if "_th" in m_str: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), 2, 10)) init_torch_weight_const(model, const) init_torch_weight_const(rnnlm, const) else: rnnlm = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(len(args.char_list), 2, 10)) init_chainer_weight_const(model, const) init_chainer_weight_const(rnnlm, const) data = [("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32), token=seq_true_text))] in_data = data[0][1]["feat"] nbest_hyps = model.predictor.recognize(in_data, args, args.char_list, rnnlm) y_hat = nbest_hyps[0]['yseq'][1:] seq_hat = [args.char_list[int(idx)] for idx in y_hat] seq_hat_text = "".join(seq_hat).replace('<space>', ' ') seq_true_text = data[0][1]["token"] assert seq_hat_text == seq_true_text
def test_lm(): n_vocab = 3 n_layers = 2 n_units = 2 batchsize = 5 rnnlm_ch = lm_chainer.ClassifierWithState( lm_chainer.RNNLM(n_vocab, n_layers, n_units)) rnnlm_th = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(n_vocab, n_layers, n_units)) transfer_lm(rnnlm_ch.predictor, rnnlm_th.predictor) import numpy # TODO(karita) implement weight transfer # numpy.testing.assert_equal(rnnlm_ch.predictor.embed.W.data, rnnlm_th.predictor.embed.weight.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l1.upward.b.data, rnnlm_th.predictor.l1.bias_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l1.upward.W.data, rnnlm_th.predictor.l1.weight_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l1.lateral.W.data, rnnlm_th.predictor.l1.weight_hh.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l2.upward.b.data, rnnlm_th.predictor.l2.bias_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l2.upward.W.data, rnnlm_th.predictor.l2.weight_ih.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.l2.lateral.W.data, rnnlm_th.predictor.l2.weight_hh.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.lo.b.data, rnnlm_th.predictor.lo.bias.data.numpy()) # numpy.testing.assert_equal(rnnlm_ch.predictor.lo.W.data, rnnlm_th.predictor.lo.weight.data.numpy()) # test prediction equality x = torch.from_numpy(numpy.random.randint(n_vocab, size=(batchsize))).long() with torch.no_grad(), chainer.no_backprop_mode(), chainer.using_config( 'train', False): rnnlm_th.predictor.eval() state_th, y_th = rnnlm_th.predictor(None, x.long()) state_ch, y_ch = rnnlm_ch.predictor(None, x.data.numpy()) for k in state_ch.keys(): for n in range(len(state_th[k])): print(k, n) print(state_th[k][n].data.numpy()) print(state_ch[k][n].data) numpy.testing.assert_allclose(state_th[k][n].data.numpy(), state_ch[k][n].data, 1e-5) print("y") print(y_th.data.numpy()) print(y_ch.data) numpy.testing.assert_allclose(y_th.data.numpy(), y_ch.data, 1e-5)
def test_batch_beam_search(etype, m_str): const = 1e-4 numpy.random.seed(1) # ctc_weight: 0.0 (attention), 0.5 (hybrid CTC/attention), 1.0 (CTC) for ctc_weight in [0.0]: args = make_arg(etype=etype, rnnlm="dummy", ctc_weight=ctc_weight, lm_weight=0.3) m = importlib.import_module(m_str) model = m.Loss(m.E2E(40, 5, args), 0.5) if "_th" in m_str: rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), 2, 10)) init_torch_weight_const(model, const) init_torch_weight_const(rnnlm, const) else: # chainer module continue data = [("aaa", dict(feat=numpy.random.randn(100, 40).astype(numpy.float32)))] in_data = data[0][1]["feat"] for lm_weight in [0.0, 0.3]: if lm_weight == 0.0: s_nbest_hyps = model.predictor.recognize( in_data, args, args.char_list) b_nbest_hyps = model.predictor.recognize_batch([in_data], args, args.char_list) else: s_nbest_hyps = model.predictor.recognize( in_data, args, args.char_list, rnnlm) b_nbest_hyps = model.predictor.recognize_batch([in_data], args, args.char_list, rnnlm) assert s_nbest_hyps[0]['yseq'] == b_nbest_hyps[0][0]['yseq']
def recog(args): '''Run recognition''' # seed setting torch.manual_seed(args.seed) # read training config idim, odim, train_args = get_model_conf(args.model, args.model_conf) # load trained model parameters logging.info('reading model parameters from ' + args.model) e2e = E2E(idim, odim, train_args) model = Loss(e2e, train_args.mtlalpha) torch_load(args.model, model) e2e.recog_args = args # read rnnlm if args.rnnlm: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(train_args.char_list), rnnlm_args.layer, rnnlm_args.unit)) torch_load(args.rnnlm, rnnlm) rnnlm.eval() else: rnnlm = None if args.word_rnnlm: rnnlm_args = get_model_conf(args.word_rnnlm, args.word_rnnlm_conf) word_dict = rnnlm_args.char_list_dict char_dict = {x: i for i, x in enumerate(train_args.char_list)} word_rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(word_dict), rnnlm_args.layer, rnnlm_args.unit)) torch_load(args.word_rnnlm, word_rnnlm) word_rnnlm.eval() if rnnlm is not None: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.MultiLevelLM(word_rnnlm.predictor, rnnlm.predictor, word_dict, char_dict)) else: rnnlm = lm_pytorch.ClassifierWithState( extlm_pytorch.LookAheadWordLM(word_rnnlm.predictor, word_dict, char_dict)) # gpu if args.ngpu == 1: gpu_id = range(args.ngpu) logging.info('gpu id: ' + str(gpu_id)) model.cuda() if rnnlm: rnnlm.cuda() # read json data with open(args.recog_json, 'rb') as f: js = json.load(f)['utts'] new_js = {} if args.batchsize == 0: with torch.no_grad(): for idx, name in enumerate(js.keys(), 1): logging.info('(%d/%d) decoding ' + name, idx, len(js.keys())) feat = kaldi_io_py.read_mat(js[name]['input'][0]['feat']) nbest_hyps = e2e.recognize(feat, args, train_args.char_list, rnnlm) new_js[name] = add_results_to_json(js[name], nbest_hyps, train_args.char_list) else: try: from itertools import zip_longest as zip_longest except Exception: from itertools import izip_longest as zip_longest def grouper(n, iterable, fillvalue=None): kargs = [iter(iterable)] * n return zip_longest(*kargs, fillvalue=fillvalue) # sort data keys = list(js.keys()) feat_lens = [js[key]['input'][0]['shape'][0] for key in keys] sorted_index = sorted(range(len(feat_lens)), key=lambda i: -feat_lens[i]) keys = [keys[i] for i in sorted_index] with torch.no_grad(): for names in grouper(args.batchsize, keys, None): names = [name for name in names if name] feats = [ kaldi_io_py.read_mat(js[name]['input'][0]['feat']) for name in names ] nbest_hyps = e2e.recognize_batch(feats, args, train_args.char_list, rnnlm=rnnlm) for i, nbest_hyp in enumerate(nbest_hyps): name = names[i] new_js[name] = add_results_to_json(js[name], nbest_hyp, train_args.char_list) # TODO(watanabe) fix character coding problems when saving it with open(args.result_label, 'wb') as f: f.write( json.dumps({ 'utts': new_js }, indent=4, sort_keys=True).encode('utf_8'))
def train(args): '''Run training''' # seed setting torch.manual_seed(args.seed) # debug mode setting # 0 would be fastest, but 1 seems to be reasonable # by considering reproducability # revmoe type check if args.debugmode < 2: chainer.config.type_check = False logging.info('torch type check is disabled') # use determinisitic computation or not if args.debugmode < 1: torch.backends.cudnn.deterministic = False logging.info('torch cudnn deterministic is disabled') else: torch.backends.cudnn.deterministic = True # check cuda availability if not torch.cuda.is_available(): logging.warning('cuda is not available') # get input and output dimension info with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] utts = list(valid_json.keys()) idim = int(valid_json[utts[0]]['input'][0]['shape'][1]) odim = int(valid_json[utts[0]]['output'][0]['shape'][1]) logging.info('#input dims : ' + str(idim)) logging.info('#output dims: ' + str(odim)) # specify attention, CTC, hybrid mode if args.mtlalpha == 1.0: mtl_mode = 'ctc' logging.info('Pure CTC mode') elif args.mtlalpha == 0.0: mtl_mode = 'att' logging.info('Pure attention mode') else: mtl_mode = 'mtl' logging.info('Multitask learning mode') # specify model architecture e2e = E2E(idim, odim, args) model = Loss(e2e, args.mtlalpha) if args.rnnlm is not None: rnnlm_args = get_model_conf(args.rnnlm, args.rnnlm_conf) rnnlm = lm_pytorch.ClassifierWithState( lm_pytorch.RNNLM(len(args.char_list), rnnlm_args.layer, rnnlm_args.unit)) torch.load(args.rnnlm, rnnlm) e2e.rnnlm = rnnlm # write model config if not os.path.exists(args.outdir): os.makedirs(args.outdir) model_conf = args.outdir + '/model.json' with open(model_conf, 'wb') as f: logging.info('writing a model config file to ' + model_conf) f.write( json.dumps((idim, odim, vars(args)), indent=4, sort_keys=True).encode('utf_8')) for key in sorted(vars(args).keys()): logging.info('ARGS: ' + key + ': ' + str(vars(args)[key])) reporter = model.reporter # check the use of multi-gpu if args.ngpu > 1: model = torch.nn.DataParallel(model, device_ids=list(range(args.ngpu))) logging.info('batch size is automatically increased (%d -> %d)' % (args.batch_size, args.batch_size * args.ngpu)) args.batch_size *= args.ngpu # set torch device device = torch.device("cuda" if args.ngpu > 0 else "cpu") model = model.to(device) # Setup an optimizer if args.opt == 'adadelta': optimizer = torch.optim.Adadelta(model.parameters(), rho=0.95, eps=args.eps) elif args.opt == 'adam': optimizer = torch.optim.Adam(model.parameters()) # FIXME: TOO DIRTY HACK setattr(optimizer, "target", reporter) setattr(optimizer, "serialize", lambda s: reporter.serialize(s)) # Setup a converter converter = CustomConverter(e2e.subsample[0]) # read json data with open(args.train_json, 'rb') as f: train_json = json.load(f)['utts'] with open(args.valid_json, 'rb') as f: valid_json = json.load(f)['utts'] # make minibatch list (variable length) train = make_batchset(train_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) valid = make_batchset(valid_json, args.batch_size, args.maxlen_in, args.maxlen_out, args.minibatches, min_batch_size=args.ngpu if args.ngpu > 1 else 1) # hack to make batchsze argument as 1 # actual bathsize is included in a list if args.n_iter_processes > 0: train_iter = chainer.iterators.MultiprocessIterator( TransformDataset(train, converter.transform), batch_size=1, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) valid_iter = chainer.iterators.MultiprocessIterator( TransformDataset(valid, converter.transform), batch_size=1, repeat=False, shuffle=False, n_processes=args.n_iter_processes, n_prefetch=8, maxtasksperchild=20) else: train_iter = chainer.iterators.SerialIterator(TransformDataset( train, converter.transform), batch_size=1) valid_iter = chainer.iterators.SerialIterator(TransformDataset( valid, converter.transform), batch_size=1, repeat=False, shuffle=False) # Set up a trainer updater = CustomUpdater(model, args.grad_clip, train_iter, optimizer, converter, device, args.ngpu) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.outdir) # Resume from a snapshot if args.resume: logging.info('resumed from %s' % args.resume) torch_resume(args.resume, trainer) # Evaluate the model with the test dataset for each epoch trainer.extend( CustomEvaluator(model, valid_iter, reporter, converter, device)) # Save attention weight each epoch if args.num_save_attention > 0 and args.mtlalpha != 1.0: data = sorted(list(valid_json.items())[:args.num_save_attention], key=lambda x: int(x[1]['input'][0]['shape'][1]), reverse=True) if hasattr(model, "module"): att_vis_fn = model.module.predictor.calculate_all_attentions else: att_vis_fn = model.predictor.calculate_all_attentions trainer.extend(PlotAttentionReport(att_vis_fn, data, args.outdir + "/att_ws", converter=converter, device=device), trigger=(1, 'epoch')) # Make a plot for training and validation values trainer.extend( extensions.PlotReport([ 'main/loss', 'validation/main/loss', 'main/loss_ctc', 'validation/main/loss_ctc', 'main/loss_att', 'validation/main/loss_att' ], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/acc', 'validation/main/acc'], 'epoch', file_name='acc.png')) # Save best models trainer.extend( extensions.snapshot_object(model, 'model.loss.best', savefun=torch_save), trigger=training.triggers.MinValueTrigger('validation/main/loss')) if mtl_mode is not 'ctc': trainer.extend( extensions.snapshot_object(model, 'model.acc.best', savefun=torch_save), trigger=training.triggers.MaxValueTrigger('validation/main/acc')) # save snapshot which contains model and optimizer states trainer.extend(torch_snapshot(), trigger=(1, 'epoch')) # epsilon decay in the optimizer if args.opt == 'adadelta': if args.criterion == 'acc' and mtl_mode is not 'ctc': trainer.extend(restore_snapshot(model, args.outdir + '/model.acc.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/acc', lambda best_value, current_value: best_value > current_value)) elif args.criterion == 'loss': trainer.extend(restore_snapshot(model, args.outdir + '/model.loss.best', load_fn=torch_load), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) trainer.extend(adadelta_eps_decay(args.eps_decay), trigger=CompareValueTrigger( 'validation/main/loss', lambda best_value, current_value: best_value < current_value)) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(REPORT_INTERVAL, 'iteration'))) report_keys = [ 'epoch', 'iteration', 'main/loss', 'main/loss_ctc', 'main/loss_att', 'validation/main/loss', 'validation/main/loss_ctc', 'validation/main/loss_att', 'main/acc', 'validation/main/acc', 'elapsed_time' ] if args.opt == 'adadelta': trainer.extend(extensions.observe_value( 'eps', lambda trainer: trainer.updater.get_optimizer('main'). param_groups[0]["eps"]), trigger=(REPORT_INTERVAL, 'iteration')) report_keys.append('eps') if args.report_cer: report_keys.append('validation/main/cer') if args.report_wer: report_keys.append('validation/main/wer') trainer.extend(extensions.PrintReport(report_keys), trigger=(REPORT_INTERVAL, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=REPORT_INTERVAL)) # Run the training trainer.run()