def create_and_check_for_causal_lm( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): model = RobertaForCausalLM(config=config) model.to(torch_device) model.eval() result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=token_labels) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
return line filename = "SUBTLEXus74286wordstextversion.txt" vocab = get_vocab(filename, 3000) rw_vocab = get_vocab(filename, 10000) filename2 = "SUBTLEX-US frequency list with PoS information text version.txt" pos_dict = get_pos_dict(filename2) GPT2 = ModelInfo(GPT2LMHeadModel.from_pretrained('gpt2', return_dict=True), GPT2Tokenizer.from_pretrained('gpt2'), "Ġ", vocab, "GTP2") Roberta = ModelInfo( RobertaForCausalLM.from_pretrained('roberta-base', return_dict=True), RobertaTokenizer.from_pretrained('roberta-base'), "_", vocab, "Roberta") XLM = ModelInfo( XLMWithLMHeadModel.from_pretrained('xlm-mlm-xnli15-1024', return_dict=True), XLMTokenizer.from_pretrained('xlm-mlm-xnli15-1024'), "_", vocab, "XLM") T5 = ModelInfo( T5ForConditionalGeneration.from_pretrained("t5-base", return_dict=True), T5Tokenizer.from_pretrained("t5-base"), "_", vocab, "T5") Albert = ModelInfo( AlbertForMaskedLM.from_pretrained('albert-base-v2', return_dict=True), AlbertTokenizer.from_pretrained('albert-base-v2'), "_", vocab, "Albert")
def get_encoder_decoder_model(self, config, decoder_config): encoder_model = RobertaModel(config) decoder_model = RobertaForCausalLM(decoder_config) return encoder_model, decoder_model
def create_and_check_decoder_model_past_large_inputs( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): config.is_decoder = True config.add_cross_attention = True model = RobertaForCausalLM(config=config).to(torch_device).eval() # make sure that ids don't start with pad token mask = input_ids.ne(config.pad_token_id).long() input_ids = input_ids * mask # first forward pass outputs = model( input_ids, attention_mask=input_mask, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, use_cache=True, ) past_key_values = outputs.past_key_values # create hypothetical multiple next token and extent to next_input_ids next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size) # make sure that ids don't start with pad token mask = next_tokens.ne(config.pad_token_id).long() next_tokens = next_tokens * mask next_mask = ids_tensor((self.batch_size, 3), vocab_size=2) # append to next input_ids and next_input_ids = torch.cat([input_ids, next_tokens], dim=-1) next_attention_mask = torch.cat([input_mask, next_mask], dim=-1) output_from_no_past = model( next_input_ids, attention_mask=next_attention_mask, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, output_hidden_states=True, )["hidden_states"][0] output_from_past = model( next_tokens, attention_mask=next_attention_mask, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, past_key_values=past_key_values, output_hidden_states=True, )["hidden_states"][0] # select random slice random_slice_idx = ids_tensor((1, ), output_from_past.shape[-1]).item() output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx].detach( ) output_from_past_slice = output_from_past[:, :, random_slice_idx].detach() self.parent.assertTrue( output_from_past_slice.shape[1] == next_tokens.shape[1]) # test that outputs are equal for slice self.parent.assertTrue( torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))