def test_advanced_inputs(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs() config.use_cache = False inputs_dict["input_ids"][:, -2:] = config.pad_token_id decoder_input_ids, decoder_attn_mask, causal_mask = _prepare_fsmt_decoder_inputs( config, inputs_dict["input_ids"] ) model = FSMTModel(config).to(torch_device).eval() decoder_features_with_created_mask = model(**inputs_dict)[0] decoder_features_with_passed_mask = model( decoder_attention_mask=invert_mask(decoder_attn_mask), decoder_input_ids=decoder_input_ids, **inputs_dict )[0] _assert_tensors_equal(decoder_features_with_passed_mask, decoder_features_with_created_mask) useless_mask = torch.zeros_like(decoder_attn_mask) decoder_features = model(decoder_attention_mask=useless_mask, **inputs_dict)[0] self.assertTrue(isinstance(decoder_features, torch.Tensor)) # no hidden states or attentions self.assertEqual( decoder_features.size(), (self.model_tester.batch_size, self.model_tester.seq_length, config.tgt_vocab_size), ) if decoder_attn_mask.min().item() < -1e3: # some tokens were masked self.assertFalse((decoder_features_with_created_mask == decoder_features).all().item()) # Test different encoder attention masks decoder_features_with_long_encoder_mask = model( inputs_dict["input_ids"], attention_mask=inputs_dict["attention_mask"].long() )[0] _assert_tensors_equal(decoder_features_with_long_encoder_mask, decoder_features_with_created_mask)
def test_export_to_onnx(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs() model = FSMTModel(config).to(torch_device) with tempfile.TemporaryDirectory() as tmpdirname: torch.onnx.export( model, (inputs_dict["input_ids"], inputs_dict["attention_mask"]), f"{tmpdirname}/fsmt_test.onnx", export_params=True, opset_version=12, input_names=["input_ids", "attention_mask"], )
def test_inference_no_head(self): tokenizer = self.default_tokenizer model = FSMTModel.from_pretrained(self.default_mname).to(torch_device) src_text = "My friend computer will translate this for me" input_ids = tokenizer([src_text], return_tensors="pt")["input_ids"] input_ids = _long_tensor(input_ids).to(torch_device) inputs_dict = prepare_fsmt_inputs_dict(model.config, input_ids) with torch.no_grad(): output = model(**inputs_dict)[0] expected_shape = torch.Size((1, 10, model.config.tgt_vocab_size)) self.assertEqual(output.shape, expected_shape) # expected numbers were generated when en-ru model, using just fairseq's model4.pt # may have to adjust if switched to a different checkpoint expected_slice = torch.tensor( [[-1.5753, -1.5753, 2.8975], [-0.9540, -0.9540, 1.0299], [-3.3131, -3.3131, 0.5219]] ).to(torch_device) self.assertTrue(torch.allclose(output[:, :3, :3], expected_slice, atol=TOLERANCE))
def test_initialization_more(self): config, inputs_dict = self.model_tester.prepare_config_and_inputs() model = FSMTModel(config) model.to(torch_device) model.eval() # test init # self.assertTrue((model.encoder.embed_tokens.weight == model.shared.weight).all().item()) def _check_var(module): """Check that we initialized various parameters from N(0, config.init_std).""" self.assertAlmostEqual(torch.std(module.weight).item(), config.init_std, 2) _check_var(model.encoder.embed_tokens) _check_var(model.encoder.layers[0].self_attn.k_proj) _check_var(model.encoder.layers[0].fc1)