def test_sa_transducer_trainable_and_decodable(train_dic, recog_dic): train_args = make_train_args(**train_dic) recog_args = make_recog_args(**recog_dic) model, x, ilens, y, data = prepare(train_args) optim = torch.optim.Adam(model.parameters(), 0.01) loss = model(x, ilens, y) optim.zero_grad() loss.backward() optim.step() beam_search = BeamSearchTransducer( decoder=model.decoder, beam_size=recog_args.beam_size, lm=None, lm_weight=0.0, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) with torch.no_grad(): nbest = model.recognize(x[0, :ilens[0]].numpy(), beam_search) print(y[0]) if recog_args.beam_size == 1: print(nbest["yseq"][1:-1]) else: print(nbest[0]["yseq"][1:-1])
def __init__( self, decoder, token_list, sym_space, sym_blank, report_cer=False, report_wer=False, ): """Construct an ErrorCalculator object for transducer model.""" super().__init__() self.beam_search = BeamSearchTransducer( decoder=decoder, beam_size=1, ) self.decoder = decoder self.token_list = token_list self.space = sym_space self.blank = sym_blank self.report_cer = report_cer self.report_wer = report_wer
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) loss = model(*batch) loss.backward() beam_search = BeamSearchTransducer( decoder=model.dec, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) with torch.no_grad(): in_data = np.random.randn(20, idim) model.recognize(in_data, beam_search)
def test_custom_transducer_trainable_and_decodable(train_dic, recog_dic): train_args = make_train_args(**train_dic) recog_args = make_recog_args(**recog_dic) model, feats, feats_len, labels, data, uttid_list = prepare(train_args) optim = torch.optim.Adam(model.parameters(), 0.01) loss = model(feats, feats_len, labels) optim.zero_grad() loss.backward() optim.step() beam_search = BeamSearchTransducer( decoder=model.decoder, joint_network=model.transducer_tasks.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, softmax_temperature=recog_args.softmax_temperature, ) with torch.no_grad(): nbest = model.recognize(feats[0, : feats_len[0]].numpy(), beam_search) print(nbest[0]["yseq"][1:-1])
def __init__( self, decoder: Union[RNNDecoder, CustomDecoder], joint_network: JointNetwork, token_list: List[int], sym_space: str, sym_blank: str, report_cer: bool = False, report_wer: bool = False, ): """Construct an ErrorCalculator object for Transducer model.""" super().__init__() self.beam_search = BeamSearchTransducer( decoder=decoder, joint_network=joint_network, beam_size=2, search_type="default", ) self.decoder = decoder self.token_list = token_list self.space = sym_space self.blank = sym_blank self.report_cer = report_cer self.report_wer = report_wer
def test_dynamic_quantization(train_dic, recog_dic, quantize_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) if not is_torch_1_5_plus and ( torch.nn.Linear in quantize_dic["mod"] and quantize_dic["dtype"] == torch.float16 ): # In recognize(...) from asr.py we raise ValueError however # AssertionError is originaly raised by torch. with pytest.raises(AssertionError): model = torch.quantization.quantize_dynamic( model, quantize_dic["mod"], dtype=quantize_dic["dtype"], ) pytest.skip("Skip rest of the test after checking AssertionError") else: model = torch.quantization.quantize_dynamic( model, quantize_dic["mod"], quantize_dic["dtype"], ) beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.transducer_tasks.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, quantization=True, ) with torch.no_grad(): in_data = np.random.randn(20, idim) if not is_torch_1_4_plus and torch.nn.LSTM in quantize_dic["mod"]: # Cf. previous comment with pytest.raises(AssertionError): model.recognize(in_data, beam_search) else: model.recognize(in_data, beam_search)
def test_auxiliary_task(train_dic): train_args = make_train_args(**train_dic) recog_args = make_recog_args() model, x, ilens, y, data, uttid_list = prepare(train_args) optim = torch.optim.Adam(model.parameters(), 0.01) loss = model(x, ilens, y) optim.zero_grad() loss.backward() optim.step() beam_search = BeamSearchTransducer( decoder=model.decoder, joint_network=model.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp") torch.save(model.state_dict(), tmpdir + "/model.dummy.best") with open(tmpdir + "/model.json", "wb") as f: f.write( json.dumps( (12, 5, vars(train_args)), indent=4, ensure_ascii=False, sort_keys=True, ).encode("utf_8")) with torch.no_grad(): model, _ = load_trained_model(tmpdir + "/model.dummy.best", training=False) nbest = model.recognize(x[0, :ilens[0]].numpy(), beam_search) print(y[0]) print(nbest[0]["yseq"][1:-1])
def test_auxiliary_task(train_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args() model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) loss = model(*batch) loss.backward() beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp") torch.save(model.state_dict(), tmpdir + "/model.dummy.best") with open(tmpdir + "/model.json", "wb") as f: f.write( json.dumps( (idim, odim, vars(train_args)), indent=4, ensure_ascii=False, sort_keys=True, ).encode("utf_8")) with torch.no_grad(): in_data = np.random.randn(20, idim) model, _ = load_trained_model(tmpdir + "/model.dummy.best", training=False) model.recognize(in_data, beam_search)
def test_dynamic_quantization(train_dic, recog_dic, quantize_dic): train_args = make_train_args(**train_dic) recog_args = make_recog_args(**recog_dic) model, feats, feats_len, _, _, _ = prepare(train_args) if not is_torch_1_5_plus and ( torch.nn.Linear in quantize_dic["mod"] and quantize_dic["dtype"] == torch.float16 ): # In recognize(...) from asr.py we raise ValueError however # AssertionError is originaly raised by torch. with pytest.raises(AssertionError): model = torch.quantization.quantize_dynamic( model, quantize_dic["mod"], dtype=quantize_dic["dtype"], ) pytest.skip("Skip rest of the test after checking AssertionError") else: model = torch.quantization.quantize_dynamic( model, quantize_dic["mod"], dtype=quantize_dic["dtype"], ) beam_search = BeamSearchTransducer( decoder=model.decoder, joint_network=model.transducer_tasks.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, quantization=True, ) with torch.no_grad(): model.recognize(feats[0, : feats_len[0]].numpy(), beam_search)
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) # to avoid huge training time, cer/wer report # is only enabled at validation steps if train_args.report_cer or train_args.report_wer: model.training = False loss = model(*batch) loss.backward() beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) with torch.no_grad(): in_data = np.random.randn(20, idim) model.recognize(in_data, beam_search)
def test_pytorch_trainable_and_transferable(model_type, finetune_dic): idim, odim, ilens, olens = get_default_scope_inputs() if model_type == "rnn": from espnet.nets.pytorch_backend.e2e_asr import E2E arg_function = get_rnn_args else: from espnet.nets.pytorch_backend.e2e_asr_transducer import E2E arg_function = get_rnnt_args args = arg_function() model = E2E(idim, odim, args) batch = pytorch_prepare_inputs(idim, odim, ilens, olens) loss = model(*batch) loss.backward() if not os.path.exists(".pytest_cache"): os.makedirs(".pytest_cache") tmppath = tempfile.mktemp() if finetune_dic["use_lm"] is not None: lm = get_lm(args.dlayers, args.dunits, args.char_list) tmppath += "_rnnlm" torch_save(tmppath, lm) else: torch_save(tmppath, model) if finetune_dic["enc_init"] is not None: finetune_dic["enc_init"] = tmppath if finetune_dic["dec_init"] is not None: finetune_dic["dec_init"] = tmppath finetune_args = arg_function(**finetune_dic) # create dummy model.json for saved model to go through # get_model_conf(...) called in load_trained_modules method. model_conf = os.path.dirname(tmppath) + "/model.json" with open(model_conf, "wb") as f: f.write( json.dumps( (idim, odim, vars(finetune_args)), indent=4, ensure_ascii=False, sort_keys=True, ).encode("utf_8")) model = load_trained_modules(idim, odim, finetune_args) loss = model(*batch) loss.backward() if model_type == "rnnt": beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.joint_network, beam_size=1, lm=None, lm_weight=0.0, search_type="default", max_sym_exp=2, u_max=10, nstep=1, prefix_alpha=1, score_norm=False, ) with torch.no_grad(): in_data = np.random.randn(10, idim) model.recognize(in_data, beam_search) else: with torch.no_grad(): in_data = np.random.randn(10, idim) model.recognize(in_data, args, args.char_list)