def create_and_check_model_as_decoder(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
     encoder_hidden_states,
     encoder_attention_mask,
 ):
     config.add_cross_attention = True
     model = BigBirdModel(config)
     model.to(torch_device)
     model.eval()
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
         encoder_attention_mask=encoder_attention_mask,
     )
     result = model(
         input_ids,
         attention_mask=input_mask,
         token_type_ids=token_type_ids,
         encoder_hidden_states=encoder_hidden_states,
     )
     result = model(input_ids,
                    attention_mask=input_mask,
                    token_type_ids=token_type_ids)
     self.parent.assertEqual(
         result.last_hidden_state.shape,
         (self.batch_size, self.seq_length, self.hidden_size))
 def create_and_check_for_auto_padding(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = BigBirdModel(config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids)
     self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
 def create_and_check_for_change_to_full_attn(
     self,
     config,
     input_ids,
     token_type_ids,
     input_mask,
     sequence_labels,
     token_labels,
     choice_labels,
 ):
     model = BigBirdModel(config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids)
     self.parent.assertEqual(result.last_hidden_state.shape, (self.batch_size, self.seq_length, self.hidden_size))
     # the config should not be changed
     self.parent.assertTrue(model.config.attention_type == "block_sparse")