def create_and_check_model_as_decoder( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask, ): config.add_cross_attention = True model = BigBirdModel(config) model.to(torch_device) model.eval() result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, encoder_attention_mask=encoder_attention_mask, ) result = model( input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, encoder_hidden_states=encoder_hidden_states, ) result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids) self.parent.assertEqual( result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_auto_padding( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ): model = BigBirdModel(config) model.to(torch_device) model.eval() result = model(input_ids) self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
def create_and_check_for_change_to_full_attn( self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, ): model = BigBirdModel(config) model.to(torch_device) model.eval() result = model(input_ids) self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size)) # the config should not be changed self.parent.assertTrue(model.config.attention_type == "block_sparse")