Exemple #1
0
def test_sa_transducer_trainable_and_decodable(train_dic, recog_dic):
    train_args = make_train_args(**train_dic)
    recog_args = make_recog_args(**recog_dic)

    model, x, ilens, y, data = prepare(train_args)

    optim = torch.optim.Adam(model.parameters(), 0.01)
    loss = model(x, ilens, y)

    optim.zero_grad()
    loss.backward()
    optim.step()

    beam_search = BeamSearchTransducer(
        decoder=model.decoder,
        beam_size=recog_args.beam_size,
        lm=None,
        lm_weight=0.0,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
    )

    with torch.no_grad():
        nbest = model.recognize(x[0, :ilens[0]].numpy(), beam_search)

        print(y[0])
        if recog_args.beam_size == 1:
            print(nbest["yseq"][1:-1])
        else:
            print(nbest[0]["yseq"][1:-1])
    def __init__(
        self,
        decoder,
        token_list,
        sym_space,
        sym_blank,
        report_cer=False,
        report_wer=False,
    ):
        """Construct an ErrorCalculator object for transducer model."""
        super().__init__()

        self.beam_search = BeamSearchTransducer(
            decoder=decoder,
            beam_size=1,
        )

        self.decoder = decoder

        self.token_list = token_list
        self.space = sym_space
        self.blank = sym_blank

        self.report_cer = report_cer
        self.report_wer = report_wer
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic):
    idim, odim, ilens, olens = get_default_scope_inputs()

    train_args = get_default_train_args(**train_dic)
    recog_args = get_default_recog_args(**recog_dic)

    model = E2E(idim, odim, train_args)

    batch = prepare_inputs(idim, odim, ilens, olens)

    loss = model(*batch)
    loss.backward()

    beam_search = BeamSearchTransducer(
        decoder=model.dec,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
    )

    with torch.no_grad():
        in_data = np.random.randn(20, idim)

        model.recognize(in_data, beam_search)
def test_custom_transducer_trainable_and_decodable(train_dic, recog_dic):
    train_args = make_train_args(**train_dic)
    recog_args = make_recog_args(**recog_dic)

    model, feats, feats_len, labels, data, uttid_list = prepare(train_args)

    optim = torch.optim.Adam(model.parameters(), 0.01)
    loss = model(feats, feats_len, labels)

    optim.zero_grad()
    loss.backward()
    optim.step()

    beam_search = BeamSearchTransducer(
        decoder=model.decoder,
        joint_network=model.transducer_tasks.joint_network,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
        softmax_temperature=recog_args.softmax_temperature,
    )

    with torch.no_grad():
        nbest = model.recognize(feats[0, : feats_len[0]].numpy(), beam_search)

        print(nbest[0]["yseq"][1:-1])
Exemple #5
0
    def __init__(
        self,
        decoder: Union[RNNDecoder, CustomDecoder],
        joint_network: JointNetwork,
        token_list: List[int],
        sym_space: str,
        sym_blank: str,
        report_cer: bool = False,
        report_wer: bool = False,
    ):
        """Construct an ErrorCalculator object for Transducer model."""
        super().__init__()

        self.beam_search = BeamSearchTransducer(
            decoder=decoder,
            joint_network=joint_network,
            beam_size=2,
            search_type="default",
        )

        self.decoder = decoder

        self.token_list = token_list
        self.space = sym_space
        self.blank = sym_blank

        self.report_cer = report_cer
        self.report_wer = report_wer
Exemple #6
0
def test_dynamic_quantization(train_dic, recog_dic, quantize_dic):
    idim, odim, ilens, olens = get_default_scope_inputs()

    train_args = get_default_train_args(**train_dic)
    recog_args = get_default_recog_args(**recog_dic)

    model = E2E(idim, odim, train_args)

    if not is_torch_1_5_plus and (
        torch.nn.Linear in quantize_dic["mod"]
        and quantize_dic["dtype"] == torch.float16
    ):
        # In recognize(...) from asr.py we raise ValueError however
        # AssertionError is originaly raised by torch.
        with pytest.raises(AssertionError):
            model = torch.quantization.quantize_dynamic(
                model,
                quantize_dic["mod"],
                dtype=quantize_dic["dtype"],
            )
        pytest.skip("Skip rest of the test after checking AssertionError")
    else:
        model = torch.quantization.quantize_dynamic(
            model,
            quantize_dic["mod"],
            quantize_dic["dtype"],
        )

    beam_search = BeamSearchTransducer(
        decoder=model.dec,
        joint_network=model.transducer_tasks.joint_network,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
        quantization=True,
    )

    with torch.no_grad():
        in_data = np.random.randn(20, idim)

        if not is_torch_1_4_plus and torch.nn.LSTM in quantize_dic["mod"]:
            # Cf. previous comment
            with pytest.raises(AssertionError):
                model.recognize(in_data, beam_search)
        else:
            model.recognize(in_data, beam_search)
def test_auxiliary_task(train_dic):
    train_args = make_train_args(**train_dic)
    recog_args = make_recog_args()

    model, x, ilens, y, data, uttid_list = prepare(train_args)

    optim = torch.optim.Adam(model.parameters(), 0.01)
    loss = model(x, ilens, y)

    optim.zero_grad()
    loss.backward()
    optim.step()

    beam_search = BeamSearchTransducer(
        decoder=model.decoder,
        joint_network=model.joint_network,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
    )

    tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
    torch.save(model.state_dict(), tmpdir + "/model.dummy.best")

    with open(tmpdir + "/model.json", "wb") as f:
        f.write(
            json.dumps(
                (12, 5, vars(train_args)),
                indent=4,
                ensure_ascii=False,
                sort_keys=True,
            ).encode("utf_8"))

    with torch.no_grad():
        model, _ = load_trained_model(tmpdir + "/model.dummy.best",
                                      training=False)

        nbest = model.recognize(x[0, :ilens[0]].numpy(), beam_search)

        print(y[0])
        print(nbest[0]["yseq"][1:-1])
Exemple #8
0
def test_auxiliary_task(train_dic):
    idim, odim, ilens, olens = get_default_scope_inputs()

    train_args = get_default_train_args(**train_dic)
    recog_args = get_default_recog_args()

    model = E2E(idim, odim, train_args)

    batch = prepare_inputs(idim, odim, ilens, olens)

    loss = model(*batch)
    loss.backward()

    beam_search = BeamSearchTransducer(
        decoder=model.dec,
        joint_network=model.joint_network,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
    )

    tmpdir = tempfile.mkdtemp(prefix="tmp_", dir="/tmp")
    torch.save(model.state_dict(), tmpdir + "/model.dummy.best")

    with open(tmpdir + "/model.json", "wb") as f:
        f.write(
            json.dumps(
                (idim, odim, vars(train_args)),
                indent=4,
                ensure_ascii=False,
                sort_keys=True,
            ).encode("utf_8"))

    with torch.no_grad():
        in_data = np.random.randn(20, idim)

        model, _ = load_trained_model(tmpdir + "/model.dummy.best",
                                      training=False)

        model.recognize(in_data, beam_search)
def test_dynamic_quantization(train_dic, recog_dic, quantize_dic):
    train_args = make_train_args(**train_dic)
    recog_args = make_recog_args(**recog_dic)

    model, feats, feats_len, _, _, _ = prepare(train_args)

    if not is_torch_1_5_plus and (
        torch.nn.Linear in quantize_dic["mod"]
        and quantize_dic["dtype"] == torch.float16
    ):
        # In recognize(...) from asr.py we raise ValueError however
        # AssertionError is originaly raised by torch.
        with pytest.raises(AssertionError):
            model = torch.quantization.quantize_dynamic(
                model,
                quantize_dic["mod"],
                dtype=quantize_dic["dtype"],
            )
        pytest.skip("Skip rest of the test after checking AssertionError")
    else:
        model = torch.quantization.quantize_dynamic(
            model,
            quantize_dic["mod"],
            dtype=quantize_dic["dtype"],
        )

    beam_search = BeamSearchTransducer(
        decoder=model.decoder,
        joint_network=model.transducer_tasks.joint_network,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
        quantization=True,
    )

    with torch.no_grad():
        model.recognize(feats[0, : feats_len[0]].numpy(), beam_search)
Exemple #10
0
def test_pytorch_transducer_trainable_and_decodable(train_dic, recog_dic):
    idim, odim, ilens, olens = get_default_scope_inputs()

    train_args = get_default_train_args(**train_dic)
    recog_args = get_default_recog_args(**recog_dic)

    model = E2E(idim, odim, train_args)

    batch = prepare_inputs(idim, odim, ilens, olens)

    # to avoid huge training time, cer/wer report
    # is only enabled at validation steps
    if train_args.report_cer or train_args.report_wer:
        model.training = False

    loss = model(*batch)
    loss.backward()

    beam_search = BeamSearchTransducer(
        decoder=model.dec,
        joint_network=model.joint_network,
        beam_size=recog_args.beam_size,
        lm=recog_args.rnnlm,
        lm_weight=recog_args.lm_weight,
        search_type=recog_args.search_type,
        max_sym_exp=recog_args.max_sym_exp,
        u_max=recog_args.u_max,
        nstep=recog_args.nstep,
        prefix_alpha=recog_args.prefix_alpha,
        score_norm=recog_args.score_norm_transducer,
    )

    with torch.no_grad():
        in_data = np.random.randn(20, idim)

        model.recognize(in_data, beam_search)
Exemple #11
0
def test_pytorch_trainable_and_transferable(model_type, finetune_dic):
    idim, odim, ilens, olens = get_default_scope_inputs()

    if model_type == "rnn":
        from espnet.nets.pytorch_backend.e2e_asr import E2E

        arg_function = get_rnn_args
    else:
        from espnet.nets.pytorch_backend.e2e_asr_transducer import E2E

        arg_function = get_rnnt_args

    args = arg_function()

    model = E2E(idim, odim, args)

    batch = pytorch_prepare_inputs(idim, odim, ilens, olens)

    loss = model(*batch)
    loss.backward()

    if not os.path.exists(".pytest_cache"):
        os.makedirs(".pytest_cache")

    tmppath = tempfile.mktemp()

    if finetune_dic["use_lm"] is not None:
        lm = get_lm(args.dlayers, args.dunits, args.char_list)
        tmppath += "_rnnlm"

        torch_save(tmppath, lm)
    else:
        torch_save(tmppath, model)

    if finetune_dic["enc_init"] is not None:
        finetune_dic["enc_init"] = tmppath
    if finetune_dic["dec_init"] is not None:
        finetune_dic["dec_init"] = tmppath

    finetune_args = arg_function(**finetune_dic)

    # create dummy model.json for saved model to go through
    # get_model_conf(...) called in load_trained_modules method.
    model_conf = os.path.dirname(tmppath) + "/model.json"
    with open(model_conf, "wb") as f:
        f.write(
            json.dumps(
                (idim, odim, vars(finetune_args)),
                indent=4,
                ensure_ascii=False,
                sort_keys=True,
            ).encode("utf_8"))

    model = load_trained_modules(idim, odim, finetune_args)

    loss = model(*batch)
    loss.backward()

    if model_type == "rnnt":
        beam_search = BeamSearchTransducer(
            decoder=model.dec,
            joint_network=model.joint_network,
            beam_size=1,
            lm=None,
            lm_weight=0.0,
            search_type="default",
            max_sym_exp=2,
            u_max=10,
            nstep=1,
            prefix_alpha=1,
            score_norm=False,
        )

        with torch.no_grad():
            in_data = np.random.randn(10, idim)
            model.recognize(in_data, beam_search)
    else:
        with torch.no_grad():
            in_data = np.random.randn(10, idim)
            model.recognize(in_data, args, args.char_list)