def _test_TFPegasus(self, size, large=False):
        from transformers import PegasusTokenizer, TFPegasusModel
        tokenizer = PegasusTokenizer.from_pretrained(size)
        model = TFPegasusModel.from_pretrained(size)
        input_ids = \
            tokenizer("Studies have been shown that owning a dog is good for you", return_tensors="tf").input_ids
        decoder_input_ids = \
            tokenizer("Studies show that", return_tensors="tf").input_ids

        input_dict = {
            "input_ids": input_ids,
            "decoder_input_ids": decoder_input_ids
        }

        # this comes from TFPegasusEncoder/Decoder like:
        #   self.embed_scale = tf.math.sqrt(float(config.d_model)) if config.scale_embedding else 1.0
        # while this is mean to come from config tf tells us that those are model inputs
        # this might be new in tensformers-2.4.2, we did not notice before that
        extra_input = {
            "tf_pegasus_model/model/decoder/mul/y:0":
            np.array([32.], dtype=np.float32),
            "tf_pegasus_model/model/encoder/mul/y:0":
            np.array([32.], dtype=np.float32)
        }
        spec, input_dict = self.spec_and_pad(
            input_dict, max_length=model.config.max_length)
        outputs = ["last_hidden_state"]
        self.run_test(model,
                      input_dict,
                      input_signature=spec,
                      outputs=outputs,
                      large=large,
                      extra_input=extra_input)
    def check_decoder_model_past_large_inputs(self, config, inputs_dict):
        model = TFPegasusModel(config=config).get_decoder()
        input_ids = inputs_dict["input_ids"]

        input_ids = input_ids[:1, :]
        attention_mask = inputs_dict["attention_mask"][:1, :]
        self.batch_size = 1

        # first forward pass
        outputs = model(input_ids, attention_mask=attention_mask, use_cache=True)

        output, past_key_values = outputs.to_tuple()
        past_key_values = past_key_values[1]

        # create hypothetical next token and extent to next_input_ids
        next_tokens = ids_tensor((self.batch_size, 3), config.vocab_size)
        next_attn_mask = tf.cast(ids_tensor((self.batch_size, 3), 2), tf.int8)

        # append to next input_ids and
        next_input_ids = tf.concat([input_ids, next_tokens], axis=-1)
        next_attention_mask = tf.concat([attention_mask, next_attn_mask], axis=-1)

        output_from_no_past = model(next_input_ids, attention_mask=next_attention_mask)[0]
        output_from_past = model(next_tokens, attention_mask=next_attention_mask, past_key_values=past_key_values)[0]

        self.parent.assertEqual(next_tokens.shape[1], output_from_past.shape[1])

        # select random slice
        random_slice_idx = int(ids_tensor((1,), output_from_past.shape[-1]))
        output_from_no_past_slice = output_from_no_past[:, -3:, random_slice_idx]
        output_from_past_slice = output_from_past[:, :, random_slice_idx]

        # test that outputs are equal for slice
        tf.debugging.assert_near(output_from_past_slice, output_from_no_past_slice, rtol=1e-3)