def test_invalid_aux_transducer_loss_enc_layers(): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(use_aux_transducer_loss=True) with pytest.raises(ValueError): E2E(idim, odim, train_args) train_args = get_default_train_args( use_aux_transducer_loss=True, aux_transducer_loss_enc_output_layers="foo") with pytest.raises(ValueError): E2E(idim, odim, train_args) train_args = get_default_train_args( use_aux_transducer_loss=True, aux_transducer_loss_enc_output_layers=[0, 4]) with pytest.raises(ValueError): E2E(idim, odim, train_args) train_args = get_default_train_args( use_aux_transducer_loss=True, use_symm_kl_div_loss=True, aux_transducer_loss_enc_output_layers=[0], elayers=3, etype="blstmp", subsample="1_2_1", ) with pytest.raises(ValueError): E2E(idim, odim, train_args)
def test_no_block_arch(): _, idim, odim, _, _ = get_default_scope_inputs() args = make_train_args(enc_block_arch=None) with pytest.raises(ValueError): E2E(idim, odim, args) args = make_train_args(dec_block_arch=None) with pytest.raises(ValueError): E2E(idim, odim, args)
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) loss = model(*batch) loss.backward() beam_search = BeamSearchTransducer( decoder=model.dec, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) with torch.no_grad(): in_data = np.random.randn(20, idim) model.recognize(in_data, beam_search)
def prepare(backend, args): bs, idim, odim, ilens, olens = get_default_scope_inputs() n_token = odim - 1 model = E2E(idim, odim, args) x = torch.randn(bs, max(ilens), idim) y = (torch.rand(bs, max(olens)) * n_token % n_token).long() for i in range(bs): x[i, ilens[i]:] = -1 y[i, olens[i]:] = model.ignore_id data = [] for i in range(bs): data.append(( "utt%d" % i, { "input": [{ "shape": [ilens[i], idim] }], "output": [{ "shape": [olens[i]] }], }, )) return model, x, torch.tensor(ilens), y, data
def prepare(args): bs, idim, odim, ilens, olens = get_default_scope_inputs() n_token = odim - 1 model = E2E(idim, odim, args) feats = torch.randn(bs, max(ilens), idim) labels = (torch.rand(bs, max(olens)) * n_token % n_token).long() for i in range(bs): feats[i, ilens[i]:] = -1 labels[i, olens[i]:] = model.ignore_id data = {} uttid_list = [] for i in range(bs): data["utt%d" % i] = { "input": [{ "shape": [ilens[i], idim] }], "output": [{ "shape": [olens[i]] }], } uttid_list.append("utt%d" % i) return model, feats, torch.tensor(ilens), labels, data, uttid_list
def test_subsampling(train_dic, subsample): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) model = E2E(idim, odim, train_args) assert model.get_total_subsampling_factor() == subsample
def test_pytorch_transducer_gpu_trainable(backend, trans_type): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(trans_type=trans_type) if trans_type == "warp-rnnt" and torch.version.cuda != "10.0": with pytest.raises(ImportError): model = E2E(idim, odim, train_args) return model = E2E(idim, odim, train_args) model.cuda() batch = prepare_inputs(backend, idim, odim, ilens, olens, is_cuda=True) loss = model(*batch) loss.backward()
def test_invalid_aux_task_layer_list(): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(aux_task_type="default") with pytest.raises(ValueError): E2E(idim, odim, train_args) train_args = get_default_train_args(aux_task_type="default", aux_task_layer_list="foo") with pytest.raises(ValueError): E2E(idim, odim, train_args) train_args = get_default_train_args(aux_task_type="default", aux_task_layer_list=[0, 4]) with pytest.raises(ValueError): E2E(idim, odim, train_args)
def test_calculate_plot_attention(): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args() model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens, is_cuda=False) assert model.calculate_all_attentions(*batch) == []
def test_pytorch_calculate_attentions(atype, backend="pytorch"): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(rnnt_mode="rnnt-att", atype=atype) model = E2E(idim, odim, train_args) batch = prepare_inputs(backend, idim, odim, ilens, olens, is_cuda=False) att_ws = model.calculate_all_attentions(*batch)[0] print(att_ws.shape)
def test_dynamic_quantization(train_dic, recog_dic, quantize_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) if not is_torch_1_5_plus and ( torch.nn.Linear in quantize_dic["mod"] and quantize_dic["dtype"] == torch.float16 ): # In recognize(...) from asr.py we raise ValueError however # AssertionError is originaly raised by torch. with pytest.raises(AssertionError): model = torch.quantization.quantize_dynamic( model, quantize_dic["mod"], dtype=quantize_dic["dtype"], ) pytest.skip("Skip rest of the test after checking AssertionError") else: model = torch.quantization.quantize_dynamic( model, quantize_dic["mod"], quantize_dic["dtype"], ) beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.transducer_tasks.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, quantization=True, ) with torch.no_grad(): in_data = np.random.randn(20, idim) if not is_torch_1_4_plus and torch.nn.LSTM in quantize_dic["mod"]: # Cf. previous comment with pytest.raises(AssertionError): model.recognize(in_data, beam_search) else: model.recognize(in_data, beam_search)
def test_pytorch_multi_gpu_trainable(backend): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args() ngpu = 2 device_ids = list(range(ngpu)) model = E2E(idim, odim, train_args) model = torch.nn.DataParallel(model, device_ids) model.cuda() batch = prepare_inputs(backend, idim, odim, ilens, olens, is_cuda=True) loss = 1.0 / ngpu * model(*batch) loss.backward(loss.new_ones(ngpu))
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) loss = model(*batch) loss.backward() with torch.no_grad(): in_data = np.random.randn(20, idim) model.recognize(in_data, recog_args, train_args.char_list, recog_args.rnnlm)
def test_auxiliary_task(train_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args() model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) loss = model(*batch) loss.backward() beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp") torch.save(model.state_dict(), tmpdir + "/model.dummy.best") with open(tmpdir + "/model.json", "wb") as f: f.write( json.dumps( (idim, odim, vars(train_args)), indent=4, ensure_ascii=False, sort_keys=True, ).encode("utf_8")) with torch.no_grad(): in_data = np.random.randn(20, idim) model, _ = load_trained_model(tmpdir + "/model.dummy.best", training=False) model.recognize(in_data, beam_search)
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic): idim, odim, ilens, olens = get_default_scope_inputs() train_args = get_default_train_args(**train_dic) recog_args = get_default_recog_args(**recog_dic) model = E2E(idim, odim, train_args) batch = prepare_inputs(idim, odim, ilens, olens) # to avoid huge training time, cer/wer report # is only enabled at validation steps if train_args.report_cer or train_args.report_wer: model.training = False loss = model(*batch) loss.backward() beam_search = BeamSearchTransducer( decoder=model.dec, joint_network=model.joint_network, beam_size=recog_args.beam_size, lm=recog_args.rnnlm, lm_weight=recog_args.lm_weight, search_type=recog_args.search_type, max_sym_exp=recog_args.max_sym_exp, u_max=recog_args.u_max, nstep=recog_args.nstep, prefix_alpha=recog_args.prefix_alpha, score_norm=recog_args.score_norm_transducer, ) with torch.no_grad(): in_data = np.random.randn(20, idim) model.recognize(in_data, beam_search)