def _test_ensemble_encoder_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = EncoderEnsemble(model_list) tmp_dir = tempfile.mkdtemp() encoder_pb_path = os.path.join(tmp_dir, "encoder.pb") encoder_ensemble.onnx_export(encoder_pb_path) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) onnx_encoder = caffe2_backend.prepare_zip_archive(encoder_pb_path) caffe2_encoder_outputs = onnx_encoder.run( (src_tokens.numpy(), src_lengths.numpy())) for i in range(len(pytorch_encoder_outputs)): caffe2_out_value = caffe2_encoder_outputs[i] pytorch_out_value = pytorch_encoder_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) encoder_ensemble.save_to_db( os.path.join(tmp_dir, "encoder.predictor_export"))
def test_batch_computation(self): test_args = test_utils.ModelParamsDict("transformer") test_args.enable_rescoring = True test_args.length_penalty = 1 test_args.l2r_model_path = "/tmp/test_rescorer_model.pt" test_args.l2r_model_weight = 1.0 test_args.r2l_model_weight = 0.0 test_args.reverse_model_weight = 0.0 test_args.cloze_transformer_weight = 1.0 test_args.lm_model_weight = 0.0 test_args.length_penalty = 1.0 _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict) model = task.build_model(test_args) torch.save(model, test_args.l2r_model_path) with patch( "pytorch_translate.utils.load_diverse_ensemble_for_inference", return_value=([model], test_args, task), ): rescorer = Rescorer(test_args) src_tokens = torch.tensor([[1, 3, 3, 4, 2], [1, 3, 2, 0, 0]]) hypos = [ {"tokens": torch.tensor([1, 5, 2])}, {"tokens": torch.tensor([6, 3, 5, 2])}, {"tokens": torch.tensor([1, 2])}, {"tokens": torch.tensor([1, 5, 6, 2])}, ] scores = rescorer.score(src_tokens, hypos) src_tokens = torch.tensor([[1, 3, 3, 4, 2]]) hypos = [ {"tokens": torch.tensor([1, 5, 2])}, {"tokens": torch.tensor([6, 3, 5, 2])}, ] scores_single = rescorer.score(src_tokens, hypos) assert torch.equal(scores[0], scores_single[0])
def test_topk_kd_loss(self): """ Makes sure that we can build KD loss without problem. """ test_args = test_utils.ModelParamsDict() _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) self.task = tasks.DictionaryHolderTask(src_dict, tgt_dict) sample = self._dummy_sample() model = self.task.build_model(test_args) net_output = model(**sample["net_input"]) student_lprobs = model.get_normalized_probs(net_output, log_probs=True) # [bsz, seqlen, vocab] -> [bsz*seqlen, vocab] lprobs = student_lprobs.view(-1, student_lprobs.size(-1)) teacher_model = self.task.build_model(test_args) teacher_probs = teacher_model.get_normalized_probs(net_output, log_probs=False) top_k_teacher_probs, indices = torch.topk(teacher_probs, k=3) top_k_teacher_probs_normalized = F.normalize( top_k_teacher_probs, p=1, dim=2 ).detach() sample["top_k_scores"] = top_k_teacher_probs_normalized sample["top_k_indices"] = indices kd_criterion = knowledge_distillation_loss.KnowledgeDistillationCriterion.build_criterion( test_args, self.task ) kd_loss = kd_criterion.get_kd_loss(sample, student_lprobs, lprobs) # Calculate kd_loss using full matrix and compare topk_mask = torch.zeros(student_lprobs.shape).type_as(student_lprobs) topk_probs = topk_mask.scatter( 2, indices, top_k_teacher_probs_normalized.float() ) topk_probs_flat = topk_probs.view(-1, topk_probs.size(-1)) kd_loss_2 = -torch.sum(topk_probs_flat * lprobs) np.testing.assert_almost_equal(kd_loss.item(), kd_loss_2.item(), decimal=4) assert kd_loss >= 0
def test_model_passing_as_parameter(self): test_args = test_utils.ModelParamsDict("transformer") test_args.enable_rescoring = True test_args.length_penalty = 1 test_args.l2r_model_weight = 1.0 test_args.r2l_model_weight = 0.0 test_args.reverse_model_weight = 0.0 test_args.lm_model_weight = 1.01 test_args.cloze_transformer_weight = 1.0 test_args.length_penalty = 1.0 _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.PytorchTranslateTask(test_args, src_dict, tgt_dict) model = task.build_model(test_args) src_tokens = torch.tensor([1, 2, 3, 4, 5]).cuda() hypos = [ {"tokens": torch.tensor([1, 2]).cuda()}, {"tokens": torch.tensor([1, 2]).cuda()}, ] rescorer = Rescorer( test_args, task, {"l2r_model": {"model": model, "task": task}} ) scores = rescorer.score(src_tokens, hypos) assert scores.size()[1] == 5
def test_beam_search_and_decode_generate(self): """ A basic test that the output given by BeamSearchAndDecode class is the same as SequenceGenerator """ test_args = test_utils.ModelParamsDict(arch="rnn") test_args.sequence_lstm = True BEAM_SIZE = 1 WORD_REWARD = 1 UNK_REWARD = -1 LENGTH_PENALTY = 0 PLACEHOLDER_SEQ_LENGTH = 5 NBEST = 2 MAX_SEQ_LEN = 7 src_tokens = torch.LongTensor([[0, 0, 0]]) src_lengths = torch.LongTensor([3]) # Build model list samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) models = task.build_model(test_args) # Placeholder inputs for BeamSearchAndDecode placeholder_src_tokens = torch.LongTensor( np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64")) placeholder_src_lengths = torch.IntTensor( np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32")) prev_token = torch.LongTensor([tgt_dict.eos()]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_lengths[0].item()) prev_hypos_indices = torch.zeros(BEAM_SIZE, dtype=torch.int64) num_steps = torch.LongTensor([MAX_SEQ_LEN]) # Generate output using SequenceGenerator translator = SequenceGenerator( [models], task.target_dictionary, beam_size=BEAM_SIZE, word_reward=WORD_REWARD, unk_reward=UNK_REWARD, ) encoder_input = {"src_tokens": src_tokens, "src_lengths": src_lengths} top_seq_gen_hypothesis = translator.generate(encoder_input, beam_size=BEAM_SIZE, maxlen=MAX_SEQ_LEN)[0] # Generate output using BeamSearch/BeamDecode placeholder_src_tokens = torch.LongTensor( np.ones((PLACEHOLDER_SEQ_LENGTH, 1), dtype="int64")) placeholder_src_lengths = torch.IntTensor( np.array([PLACEHOLDER_SEQ_LENGTH], dtype="int32")) # Generate output using BeamSearchAndDecode class beam_search_and_decode = BeamSearchAndDecode( [models], tgt_dict=tgt_dict, src_tokens=placeholder_src_tokens, src_lengths=placeholder_src_lengths, eos_token_id=tgt_dict.eos(), length_penalty=LENGTH_PENALTY, nbest=NBEST, beam_size=BEAM_SIZE, stop_at_eos=True, word_reward=WORD_REWARD, unk_reward=UNK_REWARD, quantize=True, ) beam_search_and_decode_output = beam_search_and_decode( src_tokens.transpose(0, 1), src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, num_steps[0], ) for hyp_index in range( min(len(beam_search_and_decode_output), len(top_seq_gen_hypothesis))): beam_search_and_decode_hypothesis = beam_search_and_decode_output[ hyp_index] # Compare two outputs # We always look only from 0 to MAX_SEQ_LEN, because sequence generator # adds an EOS at the end after MAX_SEQ_LEN # Compare two hypotheses np.testing.assert_array_equal( top_seq_gen_hypothesis[hyp_index]["tokens"].tolist() [0:MAX_SEQ_LEN], beam_search_and_decode_hypothesis[0].tolist()[0:MAX_SEQ_LEN], ) # Compare token level scores np.testing.assert_array_almost_equal( top_seq_gen_hypothesis[hyp_index] ["positional_scores"].tolist()[0:MAX_SEQ_LEN], beam_search_and_decode_hypothesis[2][0:MAX_SEQ_LEN], decimal=1, ) # Compare attention weights np.testing.assert_array_almost_equal( top_seq_gen_hypothesis[hyp_index]["attention"].numpy() [:, 0:MAX_SEQ_LEN], beam_search_and_decode_hypothesis[3].numpy()[:, 0:MAX_SEQ_LEN], decimal=1, )
def _test_batched_beam_decoder_step(self, test_args): beam_size = 5 samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample['net_input']['src_tokens'][0:1].t() src_lengths = sample['net_input']['src_lengths'][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderBatchedStepEnsemble( model_list, beam_size=beam_size, ) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, 'decoder_step.pb') decoder_step_ensemble.onnx_export( decoder_step_pb_path, pytorch_encoder_outputs, ) # single EOS in flat array input_tokens = torch.LongTensor( np.array([model_list[0].dst_dict.eos()]), ) prev_scores = torch.FloatTensor(np.array([0.0])) timestep = torch.LongTensor(np.array([0])) pytorch_first_step_outputs = decoder_step_ensemble( input_tokens, prev_scores, timestep, *pytorch_encoder_outputs) # next step inputs (input_tokesn shape: [beam_size]) next_input_tokens = torch.LongTensor( np.array([i for i in range(4, 9)]), ) next_prev_scores = pytorch_first_step_outputs[1] next_timestep = timestep + 1 next_states = pytorch_first_step_outputs[4:] step_inputs = [] # encoder outputs need to be replicated for each input hypothesis for encoder_rep in pytorch_encoder_outputs[:len(model_list)]: step_inputs.append(encoder_rep.repeat(1, beam_size, 1)) if model_list[0].decoder.vocab_reduction_module is not None: step_inputs.append(pytorch_encoder_outputs[len(model_list)]) step_inputs.extend(list(next_states)) pytorch_next_step_outputs = decoder_step_ensemble( next_input_tokens, next_prev_scores, next_timestep, *step_inputs) with open(decoder_step_pb_path, 'r+b') as f: onnx_model = onnx.load(f) onnx_decoder = caffe2_backend.prepare(onnx_model) decoder_inputs_numpy = [ next_input_tokens.numpy(), next_prev_scores.detach().numpy(), next_timestep.detach().numpy(), ] for tensor in step_inputs: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_next_step_outputs = onnx_decoder.run( tuple(decoder_inputs_numpy), ) for i in range(len(pytorch_next_step_outputs)): caffe2_out_value = caffe2_next_step_outputs[i] pytorch_out_value = pytorch_next_step_outputs[i].data.numpy() np.testing.assert_allclose( caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6, )
def _test_full_ensemble_export(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(models.build_model(test_args, src_dict, tgt_dict)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample['net_input']['src_tokens'][0:1].t() src_lengths = sample['net_input']['src_lengths'][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderStepEnsemble( model_list, beam_size=5, ) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, 'decoder_step.pb') decoder_step_ensemble.onnx_export( decoder_step_pb_path, pytorch_encoder_outputs, ) # single EOS input_token = torch.LongTensor( np.array([[model_list[0].dst_dict.eos()]]), ) timestep = torch.LongTensor(np.array([[0]])) pytorch_decoder_outputs = decoder_step_ensemble( input_token, timestep, *pytorch_encoder_outputs) with open(decoder_step_pb_path, 'r+b') as f: onnx_model = onnx.load(f) onnx_decoder = caffe2_backend.prepare(onnx_model) decoder_inputs_numpy = [input_token.numpy(), timestep.numpy()] for tensor in pytorch_encoder_outputs: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_decoder_outputs = onnx_decoder.run(tuple(decoder_inputs_numpy)) for i in range(len(pytorch_decoder_outputs)): caffe2_out_value = caffe2_decoder_outputs[i] pytorch_out_value = pytorch_decoder_outputs[i].data.numpy() np.testing.assert_allclose( caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6, ) decoder_step_ensemble.save_to_db( os.path.join(tmp_dir, 'decoder_step.predictor_export'), pytorch_encoder_outputs, )
def _test_full_beam_decoder(self, test_args): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) bs = BeamSearch(model_list, tgt_dict, src_tokens, src_lengths, beam_size=6) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(11) prev_hypos_indices = torch.zeros(6, dtype=torch.int64) outs = bs( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), ) import io f = io.BytesIO() torch.onnx._export( bs, ( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), ), f, export_params=True, verbose=False, example_outputs=outs, ) f.seek(0) onnx_model = onnx.load(f) c2_model = caffe2_backend.prepare(onnx_model) c2_model.run(( src_tokens.numpy(), src_lengths.numpy(), prev_token.numpy(), prev_scores.numpy(), attn_weights.numpy(), prev_hypos_indices.numpy(), np.array([20]), ))
def _test_batched_beam_decoder_step(self, test_args, return_caffe2_rep=False): beam_size = 5 samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) encoder_ensemble = EncoderEnsemble(model_list) # test equivalence # The discrepancy in types here is a temporary expedient. # PyTorch indexing requires int64 while support for tracing # pack_padded_sequence() requires int32. sample = next(samples) src_tokens = sample["net_input"]["src_tokens"][0:1].t() src_lengths = sample["net_input"]["src_lengths"][0:1].int() pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list, tgt_dict, beam_size=beam_size) tmp_dir = tempfile.mkdtemp() decoder_step_pb_path = os.path.join(tmp_dir, "decoder_step.pb") decoder_step_ensemble.onnx_export(decoder_step_pb_path, pytorch_encoder_outputs) # single EOS in flat array input_tokens = torch.LongTensor(np.array([tgt_dict.eos()])) prev_scores = torch.FloatTensor(np.array([0.0])) timestep = torch.LongTensor(np.array([0])) pytorch_first_step_outputs = decoder_step_ensemble( input_tokens, prev_scores, timestep, *pytorch_encoder_outputs) # next step inputs (input_tokesn shape: [beam_size]) next_input_tokens = torch.LongTensor(np.array([i for i in range(4, 9)])) next_prev_scores = pytorch_first_step_outputs[1] next_timestep = timestep + 1 next_states = list(pytorch_first_step_outputs[4:]) # Tile these for the next timestep for i in range(len(model_list)): next_states[i] = next_states[i].repeat(1, beam_size, 1) pytorch_next_step_outputs = decoder_step_ensemble( next_input_tokens, next_prev_scores, next_timestep, *next_states) onnx_decoder = caffe2_backend.prepare_zip_archive(decoder_step_pb_path) if return_caffe2_rep: return onnx_decoder decoder_inputs_numpy = [ next_input_tokens.numpy(), next_prev_scores.detach().numpy(), next_timestep.detach().numpy(), ] for tensor in next_states: decoder_inputs_numpy.append(tensor.detach().numpy()) caffe2_next_step_outputs = onnx_decoder.run( tuple(decoder_inputs_numpy)) for i in range(len(pytorch_next_step_outputs)): caffe2_out_value = caffe2_next_step_outputs[i] pytorch_out_value = pytorch_next_step_outputs[i].detach().numpy() np.testing.assert_allclose(caffe2_out_value, pytorch_out_value, rtol=1e-4, atol=1e-6) decoder_step_ensemble.save_to_db( output_path=os.path.join(tmp_dir, "decoder.predictor_export"), encoder_ensemble_outputs=pytorch_encoder_outputs, )
def _test_beam_component_equivalence(self, test_args): beam_size = 5 samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) # to initialize BeamSearch object sample = next(samples) # [seq len, batch size=1] src_tokens = sample["net_input"]["src_tokens"][0:1].t() # [seq len] src_lengths = sample["net_input"]["src_lengths"][0:1].long() beam_size = 5 full_beam_search = BeamSearch(model_list, tgt_dict, src_tokens, src_lengths, beam_size=beam_size) encoder_ensemble = EncoderEnsemble(model_list) # to initialize decoder_step_ensemble with torch.no_grad(): pytorch_encoder_outputs = encoder_ensemble(src_tokens, src_lengths) decoder_step_ensemble = DecoderBatchedStepEnsemble(model_list, tgt_dict, beam_size=beam_size) prev_token = torch.LongTensor([tgt_dict.eos()]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_tokens.shape[0]) prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64) num_steps = torch.LongTensor([2]) with torch.no_grad(): ( bs_out_tokens, bs_out_scores, bs_out_weights, bs_out_prev_indices, ) = full_beam_search( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, num_steps, ) comp_out_tokens = (np.ones([num_steps + 1, beam_size], dtype="int64") * tgt_dict.eos()) comp_out_scores = np.zeros([num_steps + 1, beam_size]) comp_out_weights = np.zeros( [num_steps + 1, beam_size, src_lengths.numpy()[0]]) comp_out_prev_indices = np.zeros([num_steps + 1, beam_size], dtype="int64") # single EOS in flat array input_tokens = torch.LongTensor(np.array([tgt_dict.eos()])) prev_scores = torch.FloatTensor(np.array([0.0])) timestep = torch.LongTensor(np.array([0])) with torch.no_grad(): pytorch_first_step_outputs = decoder_step_ensemble( input_tokens, prev_scores, timestep, *pytorch_encoder_outputs) comp_out_tokens[1, :] = pytorch_first_step_outputs[0] comp_out_scores[1, :] = pytorch_first_step_outputs[1] comp_out_prev_indices[1, :] = pytorch_first_step_outputs[2] comp_out_weights[1, :, :] = pytorch_first_step_outputs[3] next_input_tokens = pytorch_first_step_outputs[0] next_prev_scores = pytorch_first_step_outputs[1] timestep += 1 # Tile states after first timestep next_states = list(pytorch_first_step_outputs[4:]) for i in range(len(model_list)): next_states[i] = next_states[i].repeat(1, beam_size, 1) with torch.no_grad(): pytorch_next_step_outputs = decoder_step_ensemble( next_input_tokens, next_prev_scores, timestep, *next_states) comp_out_tokens[2, :] = pytorch_next_step_outputs[0] comp_out_scores[2, :] = pytorch_next_step_outputs[1] comp_out_prev_indices[2, :] = pytorch_next_step_outputs[2] comp_out_weights[2, :, :] = pytorch_next_step_outputs[3] np.testing.assert_array_equal(comp_out_tokens, bs_out_tokens.numpy()) np.testing.assert_allclose(comp_out_scores, bs_out_scores.numpy(), rtol=1e-4, atol=1e-6) np.testing.assert_array_equal(comp_out_prev_indices, bs_out_prev_indices.numpy()) np.testing.assert_allclose(comp_out_weights, bs_out_weights.numpy(), rtol=1e-4, atol=1e-6)
def _test_full_beam_search_decoder(self, test_args, quantize=False): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) sample = next(samples) # [seq len, batch size=1] src_tokens = sample["net_input"]["src_tokens"][0:1].t() # [seq len] src_lengths = sample["net_input"]["src_lengths"][0:1].long() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) eos_token_id = 8 length_penalty = 0.25 nbest = 3 stop_at_eos = True num_steps = torch.LongTensor([20]) beam_size = 6 bsd = BeamSearchAndDecode( model_list, tgt_dict, src_tokens, src_lengths, eos_token_id=eos_token_id, length_penalty=length_penalty, nbest=nbest, beam_size=beam_size, stop_at_eos=stop_at_eos, quantize=quantize, ) f = io.BytesIO() bsd.save_to_pytorch(f) # Test generalization with a different sequence length src_tokens = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 9, 9, 10, 11]).unsqueeze(1) src_lengths = torch.LongTensor([11]) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_tokens.shape[0]) prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64) outs = bsd( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, num_steps[0], ) f.seek(0) deserialized_bsd = torch.jit.load(f) deserialized_bsd.apply(lambda s: s._unpack() if hasattr(s, "_unpack") else None) outs_deserialized = deserialized_bsd( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, num_steps[0], ) for hypo, hypo_deserialized in zip(outs, outs_deserialized): np.testing.assert_array_equal( hypo[0].tolist(), hypo_deserialized[0].tolist() ) np.testing.assert_array_almost_equal( hypo[2], hypo_deserialized[2], decimal=1 ) np.testing.assert_array_almost_equal( hypo[3].numpy(), hypo_deserialized[3].numpy(), decimal=1 )
def _test_full_beam_decoder(self, test_args, quantize=False): samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) sample = next(samples) # [seq len, batch size=1] src_tokens = sample["net_input"]["src_tokens"][0:1].t() # [seq len] src_lengths = sample["net_input"]["src_lengths"][0:1].long() num_models = 3 model_list = [] for _ in range(num_models): model_list.append(task.build_model(test_args)) length, word_length = 11, 7 if test_args.arch in constants.ARCHS_FOR_CHAR_SOURCE: char_inds = torch.LongTensor( np.random.randint(0, 126, (1, length, word_length), dtype="int64")) word_lengths = torch.IntTensor( np.array([word_length] * length, dtype="int32")).reshape( (1, length)) else: char_inds, word_lengths = None, None beam_size = 6 bs = BeamSearch( model_list, tgt_dict, src_tokens, src_lengths, beam_size=beam_size, quantize=quantize, char_inds=char_inds, word_lengths=word_lengths, ) f = io.BytesIO() bs.save_to_pytorch(f) # Test generalization with a different sequence length src_tokens = torch.LongTensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]).unsqueeze(1) src_lengths = torch.LongTensor([11]) prev_token = torch.LongTensor([0]) prev_scores = torch.FloatTensor([0.0]) attn_weights = torch.zeros(src_tokens.shape[0]) prev_hypos_indices = torch.zeros(beam_size, dtype=torch.int64) outs = bs( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), char_inds=char_inds, word_lengths=word_lengths, ) f.seek(0) deserialized_bs = torch.jit.load(f) deserialized_bs.apply(lambda s: s._unpack() if hasattr(s, "_unpack") else None) outs_deserialized = deserialized_bs( src_tokens, src_lengths, prev_token, prev_scores, attn_weights, prev_hypos_indices, torch.LongTensor([20]), char_inds=char_inds, word_lengths=word_lengths, ) for a, b in zip(outs_deserialized, outs): np.testing.assert_allclose(a.detach().numpy(), b.detach().numpy())
def test_collate(self): """ Makes sure that we can memoize in collate if we give a particular data index in different orders. """ test_args = test_utils.ModelParamsDict() _, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) self.task = tasks.DictionaryHolderTask(src_dict, tgt_dict) teacher_model = pytorch_translate_utils.maybe_cuda( self.task.build_model(test_args) ) d0, d1, d2, d3 = self._dummy_datasets(src_dict.eos(), tgt_dict.eos()) dataset1 = [d0, d1] dataset2 = [d2, d3] dataset3 = [d3, d0] dataset4 = [d1, d2] top_k_teacher_scores = {} top_k_teacher_indices = {} b1 = TeacherDataset.collate( dataset1, [teacher_model], 3, src_dict.pad(), src_dict.eos(), top_k_teacher_scores, top_k_teacher_indices, ) TeacherDataset.collate( dataset2, [teacher_model], 3, src_dict.pad(), src_dict.eos(), top_k_teacher_scores, top_k_teacher_indices, ) before_scores = [top_k_teacher_scores[i].cpu().numpy() for i in range(4)] before_indices = [top_k_teacher_indices[i].cpu().numpy() for i in range(4)] TeacherDataset.collate( dataset3, [teacher_model], 3, src_dict.pad(), src_dict.eos(), top_k_teacher_scores, top_k_teacher_indices, ) TeacherDataset.collate( dataset4, [teacher_model], 3, src_dict.pad(), src_dict.eos(), top_k_teacher_scores, top_k_teacher_indices, ) after_scores = [top_k_teacher_scores[i].cpu().numpy() for i in range(4)] after_indices = [top_k_teacher_indices[i].cpu().numpy() for i in range(4)] for i in range(4): np.array_equal(after_scores[i], before_scores[i]) np.array_equal(after_indices[i], before_indices[i]) b5 = TeacherDataset.collate( dataset1, [teacher_model], 3, src_dict.pad(), src_dict.eos(), top_k_teacher_scores, top_k_teacher_indices, ) probs_before = b1["top_k_scores"].numpy() indices_before = b1["top_k_indices"].numpy() probs_after = b5["top_k_scores"].numpy() indices_after = b5["top_k_indices"].numpy() # The first one has a different length, does the last two values in the # before value has irrelevant values.abs assert np.array_equal(probs_before[0][:-4], probs_after[0][:-4]) is True assert np.array_equal(indices_before[0][:-4], indices_after[0][:-4]) is True assert np.array_equal(probs_after[0][-4:], np.zeros((4, 3))) is True assert np.array_equal(indices_after[0][-4:], np.zeros((4, 3))) is True assert np.array_equal(probs_before[1], probs_after[1]) is True assert np.array_equal(indices_before[1], indices_after[1]) is True
def test_decoder_ensemble_with_eos(self): """ This is to test the functionality of DecoderBatchedStepEnsembleWithEOS class. We expect it generates same outputs with DecoderBatchedStepEnsemble before final step. At final step, it generates EOS tokens. """ test_args = test_utils.ModelParamsDict(arch="rnn") samples, src_dict, tgt_dict = test_utils.prepare_inputs(test_args) task = tasks.DictionaryHolderTask(src_dict, tgt_dict) model = task.build_model(test_args) eos_token = tgt_dict.eos() encoder_ensemble = EncoderEnsemble([model]) src_tokens = torch.LongTensor([4, 5, 6, 7, 8]).unsqueeze(1) src_lengths = torch.LongTensor([5]) enc_inputs = (src_tokens, src_lengths) encoder_outputs = encoder_ensemble(*enc_inputs) beam_size = 8 word_reward = 1 unk_reward = -1 decoder_ensemble = DecoderBatchedStepEnsemble( models=[model], tgt_dict=tgt_dict, beam_size=beam_size, word_reward=word_reward, unk_reward=unk_reward, ) decoder_ensemble_with_eos = DecoderBatchedStepEnsembleWithEOS( models=[model], tgt_dict=tgt_dict, beam_size=beam_size, word_reward=word_reward, unk_reward=unk_reward, ) prev_tokens = torch.LongTensor([eos_token]) prev_scores = torch.FloatTensor([0.0]) timestep = torch.LongTensor([0]) final_step = torch.tensor([False], dtype=torch.bool) maxLen = 5 num_steps = torch.LongTensor([maxLen]) decoder_first_step_outputs = decoder_ensemble(prev_tokens, prev_scores, timestep, *encoder_outputs) decoder_with_eos_first_step_outputs = decoder_ensemble_with_eos( prev_tokens, prev_scores, timestep, final_step, *encoder_outputs) # Test results at first step self._test_base(decoder_first_step_outputs, decoder_with_eos_first_step_outputs) ( prev_tokens, prev_scores, prev_hypos_indices, attn_weights, *states, ) = decoder_first_step_outputs # Tile is needed after first step for i in range(len([model])): states[i] = states[i].repeat(1, beam_size, 1) ( prev_tokens_with_eos, prev_scores_with_eos, prev_hypos_indices_with_eos, attn_weights_with_eos, *states_with_eos, ) = decoder_with_eos_first_step_outputs for i in range(len([model])): states_with_eos[i] = states_with_eos[i].repeat(1, beam_size, 1) for i in range(num_steps - 1): decoder_step_outputs = decoder_ensemble(prev_tokens, prev_scores, torch.tensor([i + 1]), *states) ( prev_tokens, prev_scores, prev_hypos_indices, attn_weights, *states, ) = decoder_step_outputs decoder_step_with_eos_outputs = decoder_ensemble_with_eos( prev_tokens_with_eos, prev_scores_with_eos, torch.tensor([i + 1]), final_step, *states_with_eos, ) ( prev_tokens_with_eos, prev_scores_with_eos, prev_hypos_indices_with_eos, attn_weights_with_eos, *states_with_eos, ) = decoder_step_with_eos_outputs # Test results at each step self._test_base(decoder_step_outputs, decoder_step_with_eos_outputs) # Test the outputs of final tesp decoder_final_with_eos_outputs = decoder_ensemble_with_eos( prev_tokens_with_eos, prev_scores_with_eos, torch.tensor([num_steps]), torch.tensor([True]), *states_with_eos, ) np.testing.assert_array_equal( decoder_final_with_eos_outputs[0], torch.LongTensor([eos_token]).repeat(beam_size), ) np.testing.assert_array_equal( decoder_final_with_eos_outputs[2], torch.LongTensor(np.array([i for i in range(beam_size)])), )
def setUp(self): self.args = test_utils.ModelParamsDict() _, src_dict, tgt_dict = test_utils.prepare_inputs(self.args) self.task = tasks.PytorchTranslateTask(self.args, src_dict, tgt_dict) self.model = self.task.build_model(self.args)