Example #1
0
def load_models_from_checkpoints(checkpoint_filenames,
                                 src_dict_filename,
                                 dst_dict_filename,
                                 lexical_dict_paths=None):
    src_dict = dictionary.Dictionary.load(src_dict_filename)
    dst_dict = dictionary.Dictionary.load(dst_dict_filename)
    models = []
    for filename in checkpoint_filenames:
        checkpoint_data = torch.load(filename, map_location="cpu")
        if lexical_dict_paths is not None:
            assert (
                checkpoint_data["args"].vocab_reduction_params is not None
            ), "lexical dictionaries can only be replaced in vocab-reduction models"
            checkpoint_data["args"].vocab_reduction_params[
                "lexical_dictionaries"] = lexical_dict_paths
        task = tasks.DictionaryHolderTask(src_dict, dst_dict)

        architecture = checkpoint_data["args"].arch
        if architecture == "rnn":
            model = rnn.RNNModel.build_model(checkpoint_data["args"], task)
        elif architecture == "char_source":
            model = char_source_model.CharSourceModel.build_model(
                checkpoint_data["args"], task)
        elif architecture == "rnn_word_pred":
            model = word_prediction_model.RNNWordPredictionModel.build_model(
                checkpoint_data["args"], task)
        elif architecture == "ptt_transformer":
            model = transformer.TransformerModel.build_model(
                checkpoint_data["args"], task)
        else:
            raise RuntimeError("Architecture not supported: {architecture}")
        model.load_state_dict(checkpoint_data["model"])
        models.append(model)

    return models, src_dict, dst_dict
Example #2
0
def load_models_from_checkpoints(checkpoint_filenames,
                                 src_dict_filename,
                                 dst_dict_filename,
                                 lexical_dict_paths=None):
    src_dict = dictionary.Dictionary.load(src_dict_filename)
    dst_dict = dictionary.Dictionary.load(dst_dict_filename)
    models = []
    for filename in checkpoint_filenames:
        checkpoint_data = torch.load(filename, map_location="cpu")
        if lexical_dict_paths is not None:
            assert (
                checkpoint_data["args"].vocab_reduction_params is not None
            ), "lexical dictionaries can only be replaced in vocab-reduction models"
            checkpoint_data["args"].vocab_reduction_params[
                "lexical_dictionaries"] = lexical_dict_paths
        task = tasks.DictionaryHolderTask(src_dict, dst_dict)
        if checkpoint_data["args"].arch == "char_source":
            model = char_source_model.CharSourceModel.build_model(
                checkpoint_data["args"], task)
        else:
            model = rnn.RNNModel.build_model(checkpoint_data["args"], task)
        model.load_state_dict(checkpoint_data["model"])
        models.append(model)

    return models, src_dict, dst_dict
Example #3
0
 def _gpu_train_step(self, test_args):
     samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
     task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
     model = task.build_model(test_args)
     criterion = task.build_criterion(test_args)
     trainer = Trainer(test_args, task, model, criterion)
     logging_dict = trainer.train_step(next(samples))
     return trainer, logging_dict
Example #4
0
    def test_char_rnn_equivalent(self):
        """Ensure that the CharRNNEncoder.onnx_export_model path does not
        change computation"""
        test_args = test_utils.ModelParamsDict(
            encoder_bidirectional=True, sequence_lstm=True
        )
        lexical_dictionaries = test_utils.create_lexical_dictionaries()
        test_args.vocab_reduction_params = {
            "lexical_dictionaries": lexical_dictionaries,
            "num_top_words": 5,
            "max_translation_candidates_per_word": 1,
        }

        test_args.arch = "char_source"
        test_args.char_source_dict_size = 126
        test_args.char_embed_dim = 8
        test_args.char_rnn_units = 12
        test_args.char_rnn_layers = 2

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = CharSourceEncoderEnsemble(model_list)

        length = 5
        src_tokens = torch.LongTensor(
            np.random.randint(0, len(src_dict), (length, 1), dtype="int64")
        )
        src_lengths = torch.IntTensor(np.array([length], dtype="int32"))
        word_length = 3
        char_inds = torch.LongTensor(
            np.random.randint(0, 126, (1, length, word_length), dtype="int64")
        )
        word_lengths = torch.IntTensor(
            np.array([word_length] * length, dtype="int32")
        ).reshape((1, length))

        onnx_path_outputs = encoder_ensemble(
            src_tokens, src_lengths, char_inds, word_lengths
        )

        for model in encoder_ensemble.models:
            model.encoder.onnx_export_model = False

        original_path_outputs = encoder_ensemble(
            src_tokens, src_lengths, char_inds, word_lengths
        )

        for (onnx_out, original_out) in zip(onnx_path_outputs, original_path_outputs):
            onnx_array = onnx_out.detach().numpy()
            original_array = original_out.detach().numpy()
            assert onnx_array.shape == original_array.shape
            np.testing.assert_allclose(onnx_array, original_array)
Example #5
0
 def test_basic_generate(self):
     test_args = test_utils.ModelParamsDict()
     _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
     task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
     model = task.build_model(test_args)
     translator = beam_decode.SequenceGenerator([model], task.target_dictionary)
     src_tokens = torch.LongTensor([[0, 0, 0], [0, 0, 0]])
     src_lengths = torch.LongTensor([3, 3])
     encoder_input = (src_tokens, src_lengths)
     translator.generate(encoder_input, maxlen=7)
Example #6
0
 def test_load_pretrained_embedding(self):
     test_args = test_utils.ModelParamsDict()
     _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
     encoder_embed_path, embed_weights = test_utils.create_pretrained_embed(
         src_dict, test_args.encoder_hidden_dim)
     test_args.encoder_pretrained_embed = encoder_embed_path
     task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
     model = task.build_model(test_args)
     assert np.allclose(model.encoder.embed_tokens.weight.data.numpy(),
                        embed_weights)
     os.remove(encoder_embed_path)
Example #7
0
 def test_load_pretrained_embedding(self):
     encoder_embedding = open(test_utils.make_temp_file(), "wb")
     test_args = test_utils.ModelParamsDict(
         encoder_pretrained_embed=encoder_embedding.name, )
     # The vocabulary defaults to 103 in test_utils.prepare_inputs.
     embed_array = np.random.random((103, test_args.encoder_embed_dim))
     np.save(encoder_embedding, embed_array)
     encoder_embedding.close()
     _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
     task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
     model = task.build_model(test_args)
     assert np.allclose(
         model.encoder.embed_tokens.weight.data.numpy(),
         embed_array,
     )
Example #8
0
    def _test_forced_decoder_export(self, test_args):
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        forced_decoder_ensemble = ForcedDecoder(
            model_list, tgt_dict, word_reward=0.25, unk_reward=-0.5
        )

        tmp_dir = tempfile.mkdtemp()
        forced_decoder_pb_path = os.path.join(tmp_dir, "forced_decoder.pb")
        forced_decoder_ensemble.onnx_export(forced_decoder_pb_path)
Example #9
0
    def _test_ensemble_encoder_export_char_source(self, test_args):
        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = CharSourceEncoderEnsemble(model_list)

        tmp_dir = tempfile.mkdtemp()
        encoder_pb_path = os.path.join(tmp_dir, "char_encoder.pb")
        encoder_ensemble.onnx_export(encoder_pb_path)

        length = 5
        src_tokens = torch.LongTensor(np.ones((length, 1), dtype="int64"))
        src_lengths = torch.IntTensor(np.array([length], dtype="int32"))
        word_length = 3
        char_inds = torch.LongTensor(np.ones((1, length, word_length), dtype="int64"))
        word_lengths = torch.IntTensor(
            np.array([word_length] * length, dtype="int32")
        ).reshape((1, length))

        pytorch_encoder_outputs = encoder_ensemble(
            src_tokens, src_lengths, char_inds, word_lengths
        )

        onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path)

        caffe2_encoder_outputs = onnx_encoder.run(
            (
                src_tokens.numpy(),
                src_lengths.numpy(),
                char_inds.numpy(),
                word_lengths.numpy(),
            )
        )

        for i in range(len(pytorch_encoder_outputs)):
            caffe2_out_value = caffe2_encoder_outputs[i]
            pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy()
            np.testing.assert_allclose(
                caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6
            )

        encoder_ensemble.save_to_db(os.path.join(tmp_dir, "encoder.predictor_export"))
Example #10
0
    def test_char_rnn_generate(self):
        test_args = test_utils.ModelParamsDict(sequence_lstm=True)
        test_args.arch = "char_source"
        test_args.char_source_dict_size = 126
        test_args.char_embed_dim = 8
        test_args.char_rnn_units = 12
        test_args.char_rnn_layers = 2

        _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        model = task.build_model(test_args)
        translator = beam_decode.SequenceGenerator([model], task.target_dictionary)
        src_tokens = torch.LongTensor([[0, 0, 0], [0, 0, 0]])
        src_lengths = torch.LongTensor([3, 3])
        char_inds = torch.LongTensor(np.zeros((2, 3, 5)))
        word_lengths = torch.LongTensor([[5, 5, 5], [5, 5, 5]])
        encoder_input = (src_tokens, src_lengths, char_inds, word_lengths)
        translator.generate(encoder_input, maxlen=7)
Example #11
0
    def _test_ensemble_encoder_export(self, test_args):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = EncoderEnsemble(model_list)

        tmp_dir = tempfile.mkdtemp()
        encoder_pb_path = os.path.join(tmp_dir, "encoder.pb")
        encoder_ensemble.onnx_export(encoder_pb_path)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path)

        caffe2_encoder_outputs = onnx_encoder.run(
            (src_tokens.numpy(), src_lengths.numpy()))

        for i in range(len(pytorch_encoder_outputs)):
            caffe2_out_value = caffe2_encoder_outputs[i]
            pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy()
            np.testing.assert_allclose(caffe2_out_value,
                                       pytorch_out_value,
                                       rtol=1e-4,
                                       atol=1e-6)

        encoder_ensemble.save_to_db(
            os.path.join(tmp_dir, "encoder.predictor_export"))
Example #12
0
    def _test_full_beam_decoder(self, test_args):
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))

        bs = BeamSearch(model_list, tgt_dict, src_tokens, src_lengths, beam_size=6)
        prev_token = torch.LongTensor([0])
        prev_scores = torch.FloatTensor([0.0])
        attn_weights = torch.zeros(11)
        prev_hypos_indices = torch.zeros(6, dtype=torch.int64)

        outs = bs(
            src_tokens,
            src_lengths,
            prev_token,
            prev_scores,
            attn_weights,
            prev_hypos_indices,
            torch.LongTensor([20]),
        )

        import io

        f = io.BytesIO()
        torch.onnx._export(
            bs,
            (
                src_tokens,
                src_lengths,
                prev_token,
                prev_scores,
                attn_weights,
                prev_hypos_indices,
                torch.LongTensor([20]),
            ),
            f,
            export_params=True,
            verbose=False,
            example_outputs=outs,
        )

        torch.onnx._export_to_pretty_string(
            bs,
            (
                src_tokens,
                src_lengths,
                prev_token,
                prev_scores,
                attn_weights,
                prev_hypos_indices,
                torch.LongTensor([20]),
            ),
            f,
            export_params=True,
            verbose=False,
            example_outputs=outs,
        )

        f.seek(0)

        onnx_model = onnx.load(f)
        c2_model = caffe2_backend.prepare(onnx_model)
        c2_model.run(
            (
                src_tokens.numpy(),
                src_lengths.numpy(),
                prev_token.numpy(),
                prev_scores.numpy(),
                attn_weights.numpy(),
                prev_hypos_indices.numpy(),
                np.array([20]),
            )
        )
Example #13
0
    def _test_batched_beam_decoder_step(self, test_args):
        beam_size = 5
        samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args)
        task = tasks.DictionaryHolderTask(src_dict, tgt_dict)

        num_models = 3
        model_list = []
        for _ in range(num_models):
            model_list.append(task.build_model(test_args))
        encoder_ensemble = EncoderEnsemble(model_list)

        # test equivalence
        # The discrepancy in types here is a temporary expedient.
        # PyTorch indexing requires int64 while support for tracing
        # pack_padded_sequence() requires int32.
        sample = next(samples)
        src_tokens = sample["net_input"]["src_tokens"][0:1].t()
        src_lengths = sample["net_input"]["src_lengths"][0:1].int()

        pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths)

        decoder_step_ensemble = DecoderBatchedStepEnsemble(
            model_list, tgt_dict, beam_size=beam_size
        )

        tmp_dir = tempfile.mkdtemp()
        decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb")
        decoder_step_ensemble.onnx_export(decoder_step_pb_path, pytorch_encoder_outputs)

        # single EOS in flat array
        input_tokens = torch.LongTensor(np.array([tgt_dict.eos()]))
        prev_scores = torch.FloatTensor(np.array([0.0]))
        timestep = torch.LongTensor(np.array([0]))

        pytorch_first_step_outputs = decoder_step_ensemble(
            input_tokens, prev_scores, timestep, *pytorch_encoder_outputs
        )

        # next step inputs (input_tokesn shape: [beam_size])
        next_input_tokens = torch.LongTensor(np.array([i for i in range(4, 9)]))

        next_prev_scores = pytorch_first_step_outputs[1]
        next_timestep = timestep + 1
        next_states = list(pytorch_first_step_outputs[4:])

        # Tile these for the next timestep
        for i in range(len(model_list)):
            next_states[i] = next_states[i].repeat(1, beam_size, 1)

        pytorch_next_step_outputs = decoder_step_ensemble(
            next_input_tokens, next_prev_scores, next_timestep, *next_states
        )

        onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path)

        decoder_inputs_numpy = [
            next_input_tokens.numpy(),
            next_prev_scores.detach().numpy(),
            next_timestep.detach().numpy(),
        ]
        for tensor in next_states:
            decoder_inputs_numpy.append(tensor.detach().numpy())

        caffe2_next_step_outputs = onnx_decoder.run(tuple(decoder_inputs_numpy))

        for i in range(len(pytorch_next_step_outputs)):
            caffe2_out_value = caffe2_next_step_outputs[i]
            pytorch_out_value = pytorch_next_step_outputs[i].detach().numpy()
            np.testing.assert_allclose(
                caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6
            )