def load_models_from_checkpoints(checkpoint_filenames, src_dict_filename, dst_dict_filename, lexical_dict_paths=None): src_dict = dictionary.Dictionary.load(src_dict_filename) dst_dict = dictionary.Dictionary.load(dst_dict_filename) models = [] for filename in checkpoint_filenames: checkpoint_data = torch.load(filename, map_location="cpu") if lexical_dict_paths is not None: assert ( checkpoint_data["args"].vocab_reduction_params is not None ), "lexical dictionaries can only be replaced in vocab-reduction models" checkpoint_data["args"].vocab_reduction_params[ "lexical_dictionaries"] = lexical_dict_paths task = tasks.DictionaryHolderTask(src_dict, dst_dict) architecture = checkpoint_data["args"].arch if architecture == "rnn": model = rnn.RNNModel.build_model(checkpoint_data["args"], task) elif architecture == "char_source": model = char_source_model.CharSourceModel.build_model( checkpoint_data["args"], task) elif architecture == "rnn_word_pred": model = word_prediction_model.RNNWordPredictionModel.build_model( checkpoint_data["args"], task) elif architecture == "ptt_transformer": model = transformer.TransformerModel.build_model( checkpoint_data["args"], task) else: raise RuntimeError("Architecture not supported: {architecture}") model.load_state_dict(checkpoint_data["model"]) models.append(model) return models, src_dict, dst_dict
def load_models_from_checkpoints(checkpoint_filenames, src_dict_filename, dst_dict_filename, lexical_dict_paths=None): src_dict = dictionary.Dictionary.load(src_dict_filename) dst_dict = dictionary.Dictionary.load(dst_dict_filename) models = [] for filename in checkpoint_filenames: checkpoint_data = torch.load(filename, map_location="cpu") if lexical_dict_paths is not None: assert ( checkpoint_data["args"].vocab_reduction_params is not None ), "lexical dictionaries can only be replaced in vocab-reduction models" checkpoint_data["args"].vocab_reduction_params[ "lexical_dictionaries"] = lexical_dict_paths task = tasks.DictionaryHolderTask(src_dict, dst_dict) if checkpoint_data["args"].arch == "char_source": model = char_source_model.CharSourceModel.build_model( checkpoint_data["args"], task) else: model = rnn.RNNModel.build_model(checkpoint_data["args"], task) model.load_state_dict(checkpoint_data["model"]) models.append(model) return models, src_dict, dst_dict
def _gpu_train_step(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(test_args) criterion = task.build_criterion(test_args) trainer = Trainer(test_args, task, model, criterion) logging_dict = trainer.train_step(next(samples)) return trainer, logging_dict
def test_char_rnn_equivalent(self): """Ensure that the CharRNNEncoder.onnx_export_model path does not change computation""" test_args = test_utils.ModelParamsDict( encoder_bidirectional=True, sequence_lstm=True ) lexical_dictionaries = test_utils.create_lexical_dictionaries() test_args.vocab_reduction_params = { "lexical_dictionaries": lexical_dictionaries, "num_top_words": 5, "max_translation_candidates_per_word": 1, } test_args.arch = "char_source" test_args.char_source_dict_size = 126 test_args.char_embed_dim = 8 test_args.char_rnn_units = 12 test_args.char_rnn_layers = 2 _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = CharSourceEncoderEnsemble(model_list) length = 5 src_tokens = torch.LongTensor( np.random.randint(0, len(src_dict), (length, 1), dtype="int64") ) src_lengths = torch.IntTensor(np.array([length], dtype="int32")) word_length = 3 char_inds = torch.LongTensor( np.random.randint(0, 126, (1, length, word_length), dtype="int64") ) word_lengths = torch.IntTensor( np.array([word_length] * length, dtype="int32") ).reshape((1, length)) onnx_path_outputs = encoder_ensemble( src_tokens, src_lengths, char_inds, word_lengths ) for model in encoder_ensemble.models: model.encoder.onnx_export_model = False original_path_outputs = encoder_ensemble( src_tokens, src_lengths, char_inds, word_lengths ) for (onnx_out, original_out) in zip(onnx_path_outputs, original_path_outputs): onnx_array = onnx_out.detach().numpy() original_array = original_out.detach().numpy() assert onnx_array.shape == original_array.shape np.testing.assert_allclose(onnx_array, original_array)
def test_basic_generate(self): test_args = test_utils.ModelParamsDict() _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(test_args) translator = beam_decode.SequenceGenerator([model], task.target_dictionary) src_tokens = torch.LongTensor([[0, 0, 0], [0, 0, 0]]) src_lengths = torch.LongTensor([3, 3]) encoder_input = (src_tokens, src_lengths) translator.generate(encoder_input, maxlen=7)
def test_load_pretrained_embedding(self): test_args = test_utils.ModelParamsDict() _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) encoder_embed_path, embed_weights = test_utils.create_pretrained_embed( src_dict, test_args.encoder_hidden_dim) test_args.encoder_pretrained_embed = encoder_embed_path task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(test_args) assert np.allclose(model.encoder.embed_tokens.weight.data.numpy(), embed_weights) os.remove(encoder_embed_path)
def test_load_pretrained_embedding(self): encoder_embedding = open(test_utils.make_temp_file(), "wb") test_args = test_utils.ModelParamsDict( encoder_pretrained_embed=encoder_embedding.name, ) # The vocabulary defaults to 103 in test_utils.prepare_inputs. embed_array = np.random.random((103, test_args.encoder_embed_dim)) np.save(encoder_embedding, embed_array) encoder_embedding.close() _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(test_args) assert np.allclose( model.encoder.embed_tokens.weight.data.numpy(), embed_array, )
def _test_forced_decoder_export(self, test_args): _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) forced_decoder_ensemble = ForcedDecoder( model_list, tgt_dict, word_reward=0.25, unk_reward=-0.5 ) tmp_dir = tempfile.mkdtemp() forced_decoder_pb_path = os.path.join(tmp_dir, "forced_decoder.pb") forced_decoder_ensemble.onnx_export(forced_decoder_pb_path)
def _test_ensemble_encoder_export_char_source(self, test_args): _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = CharSourceEncoderEnsemble(model_list) tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, "char_encoder.pb") encoder_ensemble.onnx_export(encoder_pb_path) length = 5 src_tokens = torch.LongTensor(np.ones((length, 1), dtype="int64")) src_lengths = torch.IntTensor(np.array([length], dtype="int32")) word_length = 3 char_inds = torch.LongTensor(np.ones((1, length, word_length), dtype="int64")) word_lengths = torch.IntTensor( np.array([word_length] * length, dtype="int32") ).reshape((1, length)) pytorch_encoder_outputs = encoder_ensemble( src_tokens, src_lengths, char_inds, word_lengths ) onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path) caffe2_encoder_outputs = onnx_encoder.run( ( src_tokens.numpy(), src_lengths.numpy(), char_inds.numpy(), word_lengths.numpy(), ) ) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy() np.testing.assert_allclose( caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6 ) encoder_ensemble.save_to_db(os.path.join(tmp_dir, "encoder.predictor_export"))
def test_char_rnn_generate(self): test_args = test_utils.ModelParamsDict(sequence_lstm=True) test_args.arch = "char_source" test_args.char_source_dict_size = 126 test_args.char_embed_dim = 8 test_args.char_rnn_units = 12 test_args.char_rnn_layers = 2 _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(test_args) translator = beam_decode.SequenceGenerator([model], task.target_dictionary) src_tokens = torch.LongTensor([[0, 0, 0], [0, 0, 0]]) src_lengths = torch.LongTensor([3, 3]) char_inds = torch.LongTensor(np.zeros((2, 3, 5))) word_lengths = torch.LongTensor([[5, 5, 5], [5, 5, 5]]) encoder_input = (src_tokens, src_lengths, char_inds, word_lengths) translator.generate(encoder_input, maxlen=7)
def _test_ensemble_encoder_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = EncoderEnsemble(model_list) tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, "encoder.pb") encoder_ensemble.onnx_export(encoder_pb_path) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path) caffe2_encoder_outputs = onnx_encoder.run( (src_tokens.numpy(), src_lengths.numpy())) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) encoder_ensemble.save_to_db( os.path.join(tmp_dir, "encoder.predictor_export"))
def _test_full_beam_decoder(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) bs = BeamSearch(model_list, tgt_dict, src_tokens, src_lengths, beam_size=6) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(11) prev_hypos_indices = torch.zeros(6, dtype=torch.int64) outs = bs( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), ) import io f = io.BytesIO() torch.onnx._export( bs, ( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), ), f, export_params=True, verbose=False, example_outputs=outs, ) torch.onnx._export_to_pretty_string( bs, ( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), ), f, export_params=True, verbose=False, example_outputs=outs, ) f.seek(0) onnx_model = onnx.load(f) c2_model = caffe2_backend.prepare(onnx_model) c2_model.run( ( src_tokens.numpy(), src_lengths.numpy(), prev_token.numpy(), prev_scores.numpy(), attn_weights.numpy(), prev_hypos_indices.numpy(), np.array([20]), ) )
def _test_batched_beam_decoder_step(self, test_args): beam_size = 5 samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderBatchedStepEnsemble( model_list, tgt_dict, beam_size=beam_size ) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb") decoder_step_ensemble.onnx_export(decoder_step_pb_path, pytorch_encoder_outputs) # single EOS in flat array input_tokens = torch.LongTensor(np.array([tgt_dict.eos()])) prev_scores = torch.FloatTensor(np.array([0.0])) timestep = torch.LongTensor(np.array([0])) pytorch_first_step_outputs = decoder_step_ensemble( input_tokens, prev_scores, timestep, *pytorch_encoder_outputs ) # next step inputs (input_tokesn shape: [beam_size]) next_input_tokens = torch.LongTensor(np.array([i for i in range(4, 9)])) next_prev_scores = pytorch_first_step_outputs[1] next_timestep = timestep + 1 next_states = list(pytorch_first_step_outputs[4:]) # Tile these for the next timestep for i in range(len(model_list)): next_states[i] = next_states[i].repeat(1, beam_size, 1) pytorch_next_step_outputs = decoder_step_ensemble( next_input_tokens, next_prev_scores, next_timestep, *next_states ) onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path) decoder_inputs_numpy = [ next_input_tokens.numpy(), next_prev_scores.detach().numpy(), next_timestep.detach().numpy(), ] for tensor in next_states: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_next_step_outputs = onnx_decoder.run(tuple(decoder_inputs_numpy)) for i in range(len(pytorch_next_step_outputs)): caffe2_out_value = caffe2_next_step_outputs[i] pytorch_out_value = pytorch_next_step_outputs[i].detach().numpy() np.testing.assert_allclose( caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6 )