예제 #1
0
    def _test_full_beam_decoder(self, test_args):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        sample = next(samples)
        src_tokens = sample['net_input']['src_tokens'][0:1].t()
        src_lengths = sample['net_input']['src_lengths'][0:1].int()

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(models.build_model(test_args, src_dict,
                                                 tgt_dict))

        bs = BeamSearch(model_list, src_tokens, src_lengths, beam_size=6)
        prev_token = torch.LongTensor([0])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(11)
        prev_hypos_indices = torch.zeros(6, dtype=torch.int64)

        outs = bs(src_tokens, src_lengths, prev_token, prev_scores,
                  attn_weights, prev_hypos_indices, torch.LongTensor([20]))

        import io
        f = io.BytesIO()
        torch.onnx._export(
            bs, (src_tokens, src_lengths, prev_token, prev_scores,
                 attn_weights, prev_hypos_indices, torch.LongTensor([20])),
            f,
            export_params=True,
            verbose=False,
            example_outputs=outs)

        torch.onnx._export_to_pretty_string(
            bs, (src_tokens, src_lengths, prev_token, prev_scores,
                 attn_weights, prev_hypos_indices, torch.LongTensor([20])),
            f,
            export_params=True,
            verbose=False,
            example_outputs=outs)

        f.seek(0)
        import onnx
        onnx_model = onnx.load(f)
        c2_model = caffe2_backend.prepare(onnx_model)
        c2_model.run(
            (src_tokens.numpy(), src_lengths.numpy(), prev_token.numpy(),
             prev_scores.numpy(), attn_weights.numpy(),
             prev_hypos_indices.numpy(), np.array([20])))
예제 #2
0
def main():
    parser = get_parser_with_args()
    args = parser.parse_args()

    if args.output_file == "":
        print("No action taken. Need output_file to be specified.")
        parser.print_help()
        return

    checkpoint_filenames = args.path.split(CHECKPOINT_PATHS_DELIMITER)

    beam_search = BeamSearch.build_from_checkpoints(
        checkpoint_filenames=checkpoint_filenames,
        src_dict_filename=args.src_dict,
        dst_dict_filename=args.dst_dict,
        beam_size=args.beam_size,
        word_reward=args.word_reward,
        unk_reward=args.unk_reward,
    )
    beam_search.save_to_pytorch(output_path=args.output_file)
    if args.output_graph_file:
        with open(args.output_graph_file.path, "w") as f:
            f.write(str(beam_search.graph))
    def test_basic_generate(self):
        """
        A basic test that the output given by SequenceGenerator class is the same
        """
        # Setup parameters required for SequenceGenerator and BeamSeach/BeamDecode
        TEST_ARGS = test_utils.ModelParamsDict(arch="rnn")
        TEST_ARGS.sequence_lstm = True
        BEAM_SIZE = 1
        WORD_REWARD = 0
        UNK_REWARD = 0
        LENGTH_PENALTY = 0

        PLACEHOLDER_SEQ_LENGTH = 5
        NBEST = 2
        MAX_SEQ_LEN = 7

        src_tokens = torch.LongTensor([[0, 0, 0]])
        src_lengths = torch.LongTensor([3])

        # Generate values using SequenceGenerator
        _, src_dict, tgt_dict = test_utils.prepare_inputs(TEST_ARGS)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        model = task.build_model(TEST_ARGS)
        translator = SequenceGenerator(
            [model],
            task.target_dictionary,
            beam_size=BEAM_SIZE,
            word_reward=WORD_REWARD,
            unk_reward=UNK_REWARD,
        )

        encoder_input = {"src_tokens": src_tokens, "src_lengths": src_lengths}
        top_seq_gen_hypothesis = translator.generate(encoder_input,
                                                     maxlen=MAX_SEQ_LEN)[0]

        # Generate output using BeamSearch/BeamDecode
        placeholder_src_tokens = torch.LongTensor(
            np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64"))
        placeholder_src_lengths = torch.IntTensor(
            np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32"))

        beam_search = BeamSearch(
            [model],
            tgt_dict,
            placeholder_src_tokens,
            placeholder_src_lengths,
            beam_size=BEAM_SIZE,
            word_reward=WORD_REWARD,
            unk_reward=UNK_REWARD,
            quantize=False,
        )
        beam_decode = BeamDecode(
            eos_token_id=tgt_dict.eos(),
            length_penalty=LENGTH_PENALTY,
            nbest=NBEST,
            beam_size=BEAM_SIZE,
            stop_at_eos=True,
        )

        # Few more placeholder inputs for BeamSearch
        prev_token = torch.LongTensor([tgt_dict.eos()])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_lengths[0].item())
        prev_hypos_indices = torch.zeros(1, dtype=torch.int64)
        num_steps = torch.LongTensor([MAX_SEQ_LEN])

        all_tokens, all_scores, all_weights, all_prev_indices = beam_search(
            src_tokens.transpose(0, 1),
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            num_steps,
        )
        beam_decode_output = beam_decode(all_tokens, all_scores, all_weights,
                                         all_prev_indices, num_steps[0])

        for hyp_index in range(
                min(len(beam_decode_output), len(top_seq_gen_hypothesis))):
            top_beam_decode_hypothesis = beam_decode_output[hyp_index]

            # Compare two outputs
            # We always look only from 0 to MAX_SEQ_LEN, because sequence generator
            # adds an EOS at the end after MAX_SEQ_LEN
            ## Compare two hypothesis
            np.testing.assert_array_equal(
                top_seq_gen_hypothesis[hyp_index]["tokens"].tolist()
                [0:MAX_SEQ_LEN],
                top_beam_decode_hypothesis[0].tolist()[0:MAX_SEQ_LEN],
            )
            ## Compare token level scores
            np.testing.assert_array_almost_equal(
                top_seq_gen_hypothesis[hyp_index]
                ["positional_scores"].tolist()[0:MAX_SEQ_LEN],
                top_beam_decode_hypothesis[2][0:MAX_SEQ_LEN],
                decimal=1,
            )

            ## Compare attention weights
            np.testing.assert_array_almost_equal(
                top_seq_gen_hypothesis[hyp_index]["attention"].numpy()
                [:, 0:MAX_SEQ_LEN],
                torch.stack(top_beam_decode_hypothesis[3]).transpose(
                    0, 1).numpy()[:, 0:MAX_SEQ_LEN],
                decimal=1,
            )
예제 #4
0
def main():
    parser = argparse.ArgumentParser(
        description=(
            'Export PyTorch-trained FBTranslate models to caffe2'
        ),
    )
    parser.add_argument(
        '--checkpoint',
        action='append',
        nargs='+',
        help='PyTorch checkpoint file (at least one required)',
    )
    parser.add_argument(
        '--output_file',
        default='',
        help='File name to which to save beam search network',
    )
    parser.add_argument(
        '--src_dict',
        required=True,
        help='File encoding PyTorch dictionary for source language',
    )
    parser.add_argument(
        '--dst_dict',
        required=True,
        help='File encoding PyTorch dictionary for source language',
    )
    parser.add_argument(
        '--beam_size',
        type=int,
        default=6,
        help='Number of top candidates returned by each decoder step',
    )
    parser.add_argument(
        '--word_penalty',
        type=float,
        default=0.0,
        help='Value to add for each word (besides EOS)',
    )
    parser.add_argument(
        '--unk_penalty',
        type=float,
        default=0.0,
        help='Value to add for each word UNK token',
    )

    args = parser.parse_args()

    if args.output_file == '':
        print('No action taken. Need output_file to be specified.')
        parser.print_help()
        return

    checkpoint_filenames = [arg[0] for arg in args.checkpoint]

    beam_search = BeamSearch.build_from_checkpoints(
        checkpoint_filenames=checkpoint_filenames,
        src_dict_filename=args.src_dict,
        dst_dict_filename=args.dst_dict,
        beam_size=args.beam_size,
        word_penalty=args.word_penalty,
        unk_penalty=args.unk_penalty,
    )
    beam_search.save_to_db(
        args.output_file,
    )
예제 #5
0
    def _test_beam_component_equivalence(self, test_args):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        # to initialize BeamSearch object
        sample = next(samples)
        # [seq len, batch size=1]
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        # [seq len]
        src_lengths = sample["net_input"]["src_lengths"][0:1].long()

        beam_size = 5
        full_beam_search = BeamSearch(model_list,
                                      tgt_dict,
                                      src_tokens,
                                      src_lengths,
                                      beam_size=beam_size)

        encoder_ensemble = EncoderEnsemble(model_list)

        # to initialize decoder_step_ensemble
        with torch.no_grad():
            pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list,
                                                           tgt_dict,
                                                           beam_size=beam_size)

        prev_token = torch.LongTensor([tgt_dict.eos()])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_tokens.shape[0])
        prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64)
        num_steps = torch.LongTensor([2])

        with torch.no_grad():
            (
                bs_out_tokens,
                bs_out_scores,
                bs_out_weights,
                bs_out_prev_indices,
            ) = full_beam_search(
                src_tokens,
                src_lengths,
                prev_token,
                prev_scores,
                attn_weights,
                prev_hypos_indices,
                num_steps,
            )

        comp_out_tokens = (np.ones([num_steps + 1, beam_size], dtype="int64") *
                           tgt_dict.eos())
        comp_out_scores = np.zeros([num_steps + 1, beam_size])
        comp_out_weights = np.zeros(
            [num_steps + 1, beam_size,
             src_lengths.numpy()[0]])
        comp_out_prev_indices = np.zeros([num_steps + 1, beam_size],
                                         dtype="int64")

        # single EOS in flat array
        input_tokens = torch.LongTensor(np.array([tgt_dict.eos()]))
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        with torch.no_grad():
            pytorch_first_step_outputs = decoder_step_ensemble(
                input_tokens, prev_scores, timestep, *pytorch_encoder_outputs)

        comp_out_tokens[1, :] = pytorch_first_step_outputs[0]
        comp_out_scores[1, :] = pytorch_first_step_outputs[1]
        comp_out_prev_indices[1, :] = pytorch_first_step_outputs[2]
        comp_out_weights[1, :, :] = pytorch_first_step_outputs[3]

        next_input_tokens = pytorch_first_step_outputs[0]
        next_prev_scores = pytorch_first_step_outputs[1]
        timestep += 1

        # Tile states after first timestep
        next_states = list(pytorch_first_step_outputs[4:])
        for i in range(len(model_list)):
            next_states[i] = next_states[i].repeat(1, beam_size, 1)

        with torch.no_grad():
            pytorch_next_step_outputs = decoder_step_ensemble(
                next_input_tokens, next_prev_scores, timestep, *next_states)

        comp_out_tokens[2, :] = pytorch_next_step_outputs[0]
        comp_out_scores[2, :] = pytorch_next_step_outputs[1]
        comp_out_prev_indices[2, :] = pytorch_next_step_outputs[2]
        comp_out_weights[2, :, :] = pytorch_next_step_outputs[3]

        np.testing.assert_array_equal(comp_out_tokens, bs_out_tokens.numpy())
        np.testing.assert_allclose(comp_out_scores,
                                   bs_out_scores.numpy(),
                                   rtol=1e-4,
                                   atol=1e-6)
        np.testing.assert_array_equal(comp_out_prev_indices,
                                      bs_out_prev_indices.numpy())
        np.testing.assert_allclose(comp_out_weights,
                                   bs_out_weights.numpy(),
                                   rtol=1e-4,
                                   atol=1e-6)
예제 #6
0
    def _test_full_beam_decoder(self, test_args, quantize=False):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = next(samples)
        # [seq len, batch size=1]
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        # [seq len]
        src_lengths = sample["net_input"]["src_lengths"][0:1].long()

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        length, word_length = 11, 7
        if test_args.arch in constants.ARCHS_FOR_CHAR_SOURCE:
            char_inds = torch.LongTensor(
                np.random.randint(0,
                                  126, (1, length, word_length),
                                  dtype="int64"))
            word_lengths = torch.IntTensor(
                np.array([word_length] * length, dtype="int32")).reshape(
                    (1, length))
        else:
            char_inds, word_lengths = None, None

        beam_size = 6
        bs = BeamSearch(
            model_list,
            tgt_dict,
            src_tokens,
            src_lengths,
            beam_size=beam_size,
            quantize=quantize,
            char_inds=char_inds,
            word_lengths=word_lengths,
        )
        f = io.BytesIO()
        bs.save_to_pytorch(f)

        # Test generalization with a different sequence length
        src_tokens = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
                                       11]).unsqueeze(1)
        src_lengths = torch.LongTensor([11])
        prev_token = torch.LongTensor([0])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(src_tokens.shape[0])
        prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64)

        outs = bs(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            torch.LongTensor([20]),
            char_inds=char_inds,
            word_lengths=word_lengths,
        )

        f.seek(0)
        deserialized_bs = torch.jit.load(f)
        deserialized_bs.apply(lambda s: s._unpack()
                              if hasattr(s, "_unpack") else None)
        outs_deserialized = deserialized_bs(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            torch.LongTensor([20]),
            char_inds=char_inds,
            word_lengths=word_lengths,
        )

        for a, b in zip(outs_deserialized, outs):
            np.testing.assert_allclose(a.detach().numpy(), b.detach().numpy())
예제 #7
0
def main():
    parser = argparse.ArgumentParser(
        description=("Export PyTorch-trained FBTranslate models to caffe2"))
    parser.add_argument(
        "--checkpoint",
        action="append",
        nargs="+",
        help="PyTorch checkpoint file (at least one required)",
    )
    parser.add_argument(
        "--output_file",
        default="",
        help="File name to which to save beam search network",
    )
    parser.add_argument(
        "--src_dict",
        required=True,
        help="File encoding PyTorch dictionary for source language",
    )
    parser.add_argument(
        "--dst_dict",
        required=True,
        help="File encoding PyTorch dictionary for source language",
    )
    parser.add_argument(
        "--beam_size",
        type=int,
        default=6,
        help="Number of top candidates returned by each decoder step",
    )
    parser.add_argument(
        "--word_reward",
        type=float,
        default=0.0,
        help="Value to add for each word (besides EOS)",
    )
    parser.add_argument(
        "--unk_reward",
        type=float,
        default=0.0,
        help="Value to add for each word UNK token",
    )

    args = parser.parse_args()

    if args.output_file == "":
        print("No action taken. Need output_file to be specified.")
        parser.print_help()
        return

    checkpoint_filenames = [arg[0] for arg in args.checkpoint]

    beam_search = BeamSearch.build_from_checkpoints(
        checkpoint_filenames=checkpoint_filenames,
        src_dict_filename=args.src_dict,
        dst_dict_filename=args.dst_dict,
        beam_size=args.beam_size,
        word_reward=args.word_reward,
        unk_reward=args.unk_reward,
    )
    beam_search.save_to_db(args.output_file)
예제 #8
0
def main():
    parser = argparse.ArgumentParser(
        description=("Export PyTorch-trained FBTranslate models"))
    parser.add_argument(
        "--path",
        "--checkpoint",
        metavar="FILE",
        help="path(s) to model file(s), colon separated",
    )
    parser.add_argument(
        "--output-file",
        default="",
        help="File name to which to save beam search network",
    )
    parser.add_argument(
        "--output-graph-file",
        default="",
        help="File name to which to save the beam search graph for debugging",
    )
    parser.add_argument(
        "--src-dict",
        required=True,
        help="File encoding PyTorch dictionary for source language",
    )
    parser.add_argument(
        "--dst-dict",
        required=True,
        help="File encoding PyTorch dictionary for source language",
    )
    parser.add_argument(
        "--beam-size",
        type=int,
        default=6,
        help="Number of top candidates returned by each decoder step",
    )
    parser.add_argument(
        "--word-reward",
        type=float,
        default=0.0,
        help="Value to add for each word (besides EOS)",
    )
    parser.add_argument(
        "--unk-reward",
        type=float,
        default=0.0,
        help="Value to add for each word UNK token",
    )

    args = parser.parse_args()

    if args.output_file == "":
        print("No action taken. Need output_file to be specified.")
        parser.print_help()
        return

    checkpoint_filenames = args.path.split(":")

    beam_search = BeamSearch.build_from_checkpoints(
        checkpoint_filenames=checkpoint_filenames,
        src_dict_filename=args.src_dict,
        dst_dict_filename=args.dst_dict,
        beam_size=args.beam_size,
        word_reward=args.word_reward,
        unk_reward=args.unk_reward,
    )
    beam_search.save_to_pytorch(output_path=args.output_file)
    if args.output_graph_file:
        with open(args.output_graph_file.path, "w") as f:
            f.write(str(beam_search.graph))