Example #1
0
class ScheduleInitTest(unittest.TestCase):
    m = torch.nn.Linear(50, 50) if is_torch_available() else None
    optimizer = AdamW(m.parameters(), lr=10.) if is_torch_available() else None
    num_steps = 10

    def assertListAlmostEqual(self, list1, list2, tol):
        self.assertEqual(len(list1), len(list2))
        for a, b in zip(list1, list2):
            self.assertAlmostEqual(a, b, delta=tol)

    def test_constant_scheduler(self):
        scheduler = get_constant_schedule(self.optimizer)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [10.] * self.num_steps
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

        scheduler = get_constant_schedule(self.optimizer)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

    def test_warmup_constant_scheduler(self):
        scheduler = get_constant_schedule_with_warmup(self.optimizer,
                                                      num_warmup_steps=4)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [
            2.5, 5.0, 7.5, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0
        ]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

        scheduler = get_constant_schedule_with_warmup(self.optimizer,
                                                      num_warmup_steps=4)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

    def test_warmup_linear_scheduler(self):
        scheduler = get_linear_schedule_with_warmup(self.optimizer,
                                                    num_warmup_steps=2,
                                                    num_training_steps=10)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [
            5.0, 10.0, 8.75, 7.5, 6.25, 5.0, 3.75, 2.5, 1.25, 0.0
        ]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListEqual([l[0] for l in lrs], expected_learning_rates)

        scheduler = get_linear_schedule_with_warmup(self.optimizer,
                                                    num_warmup_steps=2,
                                                    num_training_steps=10)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

    def test_warmup_cosine_scheduler(self):
        scheduler = get_cosine_schedule_with_warmup(self.optimizer,
                                                    num_warmup_steps=2,
                                                    num_training_steps=10)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [
            5.0, 10.0, 9.61, 8.53, 6.91, 5.0, 3.08, 1.46, 0.38, 0.0
        ]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListAlmostEqual([l[0] for l in lrs],
                                   expected_learning_rates,
                                   tol=1e-2)

        scheduler = get_cosine_schedule_with_warmup(self.optimizer,
                                                    num_warmup_steps=2,
                                                    num_training_steps=10)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])

    def test_warmup_cosine_hard_restart_scheduler(self):
        scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=2,
            num_cycles=2,
            num_training_steps=10)
        lrs = unwrap_schedule(scheduler, self.num_steps)
        expected_learning_rates = [
            5.0, 10.0, 8.53, 5.0, 1.46, 10.0, 8.53, 5.0, 1.46, 0.0
        ]
        self.assertEqual(len(lrs[0]), 1)
        self.assertListAlmostEqual([l[0] for l in lrs],
                                   expected_learning_rates,
                                   tol=1e-2)

        scheduler = get_cosine_with_hard_restarts_schedule_with_warmup(
            self.optimizer,
            num_warmup_steps=2,
            num_cycles=2,
            num_training_steps=10)
        lrs_2 = unwrap_and_save_reload_schedule(scheduler, self.num_steps)
        self.assertListEqual([l[0] for l in lrs], [l[0] for l in lrs_2])
class BertModelTest(CommonTestCases.CommonModelTester):

    all_model_classes = (
        BertModel, BertForMaskedLM, BertForNextSentencePrediction,
        BertForPreTraining, BertForQuestionAnswering,
        BertForSequenceClassification,
        BertForTokenClassification) if is_torch_available() else ()

    class BertModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_input_mask=True,
            use_token_type_ids=True,
            use_labels=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
            device='cpu',
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_mask = use_input_mask
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope
            self.device = device

            # 1. superset of bert input/output descs
            # see BertPreTrainedModel doc
            self.input_ids_desc = IODescription(
                'input_ids', ['batch', 'max_seq_len_in_batch'],
                torch.int64,
                num_classes=self.vocab_size)
            self.attention_mask_desc = IODescription(
                'attention_mask', ['batch', 'max_seq_len_in_batch'],
                torch.int64,
                num_classes=2)
            self.token_type_ids_desc = IODescription(
                'token_type_ids', ['batch', 'max_seq_len_in_batch'],
                torch.int64,
                num_classes=2)
            self.position_ids_desc = IODescription(
                'position_ids', ['batch', 'max_seq_len_in_batch'],
                torch.int64,
                num_classes=self.max_position_embeddings)
            self.head_mask_desc = IODescription(
                'head_mask',
                [self.num_hidden_layers, self.num_attention_heads],
                torch.int64,
                num_classes=2)
            self.inputs_embeds_desc = IODescription(
                'inputs_embeds',
                ['batch', 'max_seq_len_in_batch', self.hidden_size],
                torch.float32)

            self.encoder_hidden_states_desc = IODescription(
                'encoder_hidden_states',
                ['batch', 'max_seq_len_in_batch', self.hidden_size],
                torch.float32)
            self.encoder_attention_mask_desc = IODescription(
                'encoder_attention_mask', ['batch', 'max_seq_len_in_batch'],
                torch.float32)

            # see BertForPreTraining doc
            self.masked_lm_labels_desc = IODescription(
                'masked_lm_labels', ['batch', 'max_seq_len_in_batch'],
                torch.int64,
                num_classes=self.vocab_size)
            self.next_sentence_label_desc = IODescription(
                'next_sentence_label', [
                    'batch',
                ], torch.int64, num_classes=2)

            # outputs
            self.loss_desc = IODescription('loss', [
                1,
            ], torch.float32)
            self.prediction_scores_desc = IODescription(
                'prediction_scores',
                ['batch', 'max_seq_len_in_batch', self.vocab_size],
                torch.float32)

            self.seq_relationship_scores_desc = IODescription(
                'seq_relationship_scores', ['batch', 2], torch.float32
            )  # IODescription('seq_relationship_scores', ['batch', 'max_seq_len_in_batch', 2], torch.float32)
            self.hidden_states_desc = IODescription('hidden_states', [
                self.num_hidden_layers, 'batch', 'max_seq_len_in_batch',
                self.hidden_size
            ], torch.float32)
            self.attentions_desc = IODescription('attentions', [
                self.num_hidden_layers, 'batch', self.num_attention_heads,
                'max_seq_len_in_batch', 'max_seq_len_in_batch'
            ], torch.float32)
            self.last_hidden_state_desc = IODescription(
                'last_hidden_state',
                ['batch', 'max_seq_len_in_batch', self.hidden_size],
                torch.float32)
            self.pooler_output_desc = IODescription(
                'pooler_output', ['batch', self.hidden_size], torch.float32)

            # BertForPreTraining forward:
            # def forward(self, input_ids=None, attention_mask=None, token_type_ids=None,
            # 	position_ids??=None, head_mask??=None, inputs_embeds??=None,
            #     masked_lm_labels=None, next_sentence_label=None):
            #
            # create_and_check_bert_for_pretraining calls BertForPreTraining:
            # model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids,
            #     masked_lm_labels=token_labels, next_sentence_label=sequence_labels)

        def BertForPreTraining_descs(self):
            return ModelDescription(
                [
                    self.input_ids_desc, self.attention_mask_desc,
                    self.token_type_ids_desc, self.masked_lm_labels_desc,
                    self.next_sentence_label_desc
                ],
                # returns loss_desc if both masked_lm_labels_desc, next_sentence_label are provided
                # hidden_states_desc, attentions_desc shall be included according to config.output_attentions, config.output_hidden_states
                [
                    self.loss_desc,
                    self.prediction_scores_desc,
                    self.seq_relationship_scores_desc,
                    #hidden_states_desc, attentions_desc
                ])

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size).to(self.device)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length],
                                        vocab_size=2).to(self.device)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.type_vocab_size).to(
                                                self.device)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size).to(
                                                 self.device)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels).to(self.device)
                choice_labels = ids_tensor([self.batch_size],
                                           self.num_choices).to(self.device)

            config = BertConfig(
                vocab_size_or_config_json_file=self.vocab_size,
                hidden_size=self.hidden_size,
                num_hidden_layers=self.num_hidden_layers,
                num_attention_heads=self.num_attention_heads,
                intermediate_size=self.intermediate_size,
                hidden_act=self.hidden_act,
                hidden_dropout_prob=self.hidden_dropout_prob,
                attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                max_position_embeddings=self.max_position_embeddings,
                type_vocab_size=self.type_vocab_size,
                is_decoder=False,
                initializer_range=self.initializer_range)

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels

        def prepare_config_and_inputs_for_decoder(self):
            config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels = self.prepare_config_and_inputs(
            )

            config.is_decoder = True
            encoder_hidden_states = floats_tensor(
                [self.batch_size, self.seq_length, self.hidden_size])
            encoder_attention_mask = ids_tensor(
                [self.batch_size, self.seq_length], vocab_size=2)

            return config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels, encoder_hidden_states, encoder_attention_mask

        def check_loss_output(self, result):
            self.parent.assertListEqual(list(result["loss"].size()), [])

        def create_and_check_bert_model(self, config, input_ids,
                                        token_type_ids, input_mask,
                                        sequence_labels, token_labels,
                                        choice_labels):
            model = BertModel(config=config)
            model.to(input_ids.device)
            model.eval()

            sequence_output, pooled_output = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids)

            # failed because there is not loss output
            model_desc = ModelDescription([
                self.input_ids_desc, self.attention_mask_desc,
                self.token_type_ids_desc
            ], [self.last_hidden_state_desc, self.pooler_output_desc])
            args_gradient_accumulation_steps = 8
            args_local_rank = 0
            args_world_size = 1
            args_fp16 = True
            args_allreduce_post_accumulation = True

            model = ORTTrainer(
                model,
                None,
                model_desc,
                "LambOptimizer",
                map_optimizer_attributes=map_optimizer_attributes,
                learning_rate_description=IODescription(
                    'Learning_Rate', [
                        1,
                    ], torch.float32),
                device=self.device,
                postprocess_model=postprocess_model,
                gradient_accumulation_steps=args_gradient_accumulation_steps,
                world_rank=args_local_rank,
                world_size=args_world_size,
                use_mixed_precision=True if args_fp16 else False,
                allreduce_post_accumulation=True
                if args_allreduce_post_accumulation else False)

            sequence_output, pooled_output = model(
                input_ids, token_type_ids=token_type_ids)
            sequence_output, pooled_output = model(input_ids)

            result = {
                "sequence_output": sequence_output,
                "pooled_output": pooled_output,
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size])
            self.parent.assertListEqual(list(result["pooled_output"].size()),
                                        [self.batch_size, self.hidden_size])

        def create_and_check_bert_model_as_decoder(self, config, input_ids,
                                                   token_type_ids, input_mask,
                                                   sequence_labels,
                                                   token_labels, choice_labels,
                                                   encoder_hidden_states,
                                                   encoder_attention_mask):
            model = BertModel(config)
            model.eval()
            sequence_output, pooled_output = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                encoder_hidden_states=encoder_hidden_states,
                encoder_attention_mask=encoder_attention_mask)
            sequence_output, pooled_output = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                encoder_hidden_states=encoder_hidden_states)
            sequence_output, pooled_output = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids)

            result = {
                "sequence_output": sequence_output,
                "pooled_output": pooled_output,
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size])
            self.parent.assertListEqual(list(result["pooled_output"].size()),
                                        [self.batch_size, self.hidden_size])

        def create_and_check_bert_for_masked_lm(self, config, input_ids,
                                                token_type_ids, input_mask,
                                                sequence_labels, token_labels,
                                                choice_labels):
            model = BertForMaskedLM(config=config)
            model.eval()
            loss, prediction_scores = model(input_ids,
                                            attention_mask=input_mask,
                                            token_type_ids=token_type_ids,
                                            masked_lm_labels=token_labels)

            #####
            model_desc = ModelDescription([
                self.input_ids_desc, self.attention_mask_desc,
                self.token_type_ids_desc, self.masked_lm_labels_desc
            ], [self.loss_desc, self.prediction_scores_desc])
            args_gradient_accumulation_steps = 8
            args_local_rank = 0
            args_world_size = 1
            args_fp16 = True
            args_allreduce_post_accumulation = True

            model = ORTTrainer(
                model,
                None,
                model_desc,
                "LambOptimizer",
                map_optimizer_attributes=map_optimizer_attributes,
                learning_rate_description=IODescription(
                    'Learning_Rate', [
                        1,
                    ], torch.float32),
                device=self.device,
                postprocess_model=postprocess_model,
                gradient_accumulation_steps=args_gradient_accumulation_steps,
                world_rank=args_local_rank,
                world_size=args_world_size,
                use_mixed_precision=True if args_fp16 else False,
                allreduce_post_accumulation=True
                if args_allreduce_post_accumulation else False)
            model(input_ids,
                  attention_mask=input_mask,
                  token_type_ids=token_type_ids,
                  masked_lm_labels=token_labels)

        def create_and_check_bert_model_for_masked_lm_as_decoder(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels,
                encoder_hidden_states, encoder_attention_mask):
            model = BertForMaskedLM(config=config)
            model.eval()
            loss, prediction_scores = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                masked_lm_labels=token_labels,
                encoder_hidden_states=encoder_hidden_states,
                encoder_attention_mask=encoder_attention_mask)
            loss, prediction_scores = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                masked_lm_labels=token_labels,
                encoder_hidden_states=encoder_hidden_states)
            result = {
                "loss": loss,
                "prediction_scores": prediction_scores,
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].size()),
                [self.batch_size, self.seq_length, self.vocab_size])
            self.check_loss_output(result)

        def create_and_check_bert_for_next_sequence_prediction(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = BertForNextSentencePrediction(config=config)
            model.eval()
            loss, seq_relationship_score = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                next_sentence_label=sequence_labels)
            result = {
                "loss": loss,
                "seq_relationship_score": seq_relationship_score,
            }
            self.parent.assertListEqual(
                list(result["seq_relationship_score"].size()),
                [self.batch_size, 2])
            self.check_loss_output(result)

        def create_and_check_bert_for_pretraining(self, config, input_ids,
                                                  token_type_ids, input_mask,
                                                  sequence_labels,
                                                  token_labels, choice_labels):
            model = BertForPreTraining(config=config)
            model.eval()
            loss, prediction_scores, seq_relationship_score = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                masked_lm_labels=token_labels,
                next_sentence_label=sequence_labels)
            model_desc = ModelDescription([
                self.input_ids_desc, self.attention_mask_desc,
                self.token_type_ids_desc, self.masked_lm_labels_desc,
                self.next_sentence_label_desc
            ], [
                self.loss_desc, self.prediction_scores_desc,
                self.seq_relationship_scores_desc
            ])

            import argparse
            args_ = argparse.Namespace(fp16=True, amp_opt_level='O1')

            from collections import namedtuple
            MyArgs = namedtuple(
                "MyArgs",
                "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
            )
            args = MyArgs(local_rank=0,
                          world_size=1,
                          max_steps=100,
                          learning_rate=0.00001,
                          warmup_proportion=0.01,
                          batch_size=13,
                          seq_len=7)

            from train_with_ort_trainer import get_lr

            def get_lr_this_step(global_step):
                return get_lr(args, global_step)

            loss_scaler = LossScaler('loss_scale_input_name',
                                     True,
                                     up_scale_window=2000)

            option_gradient_accumulation_steps = [8]
            option_fp16 = [True, False]
            option_allreduce_post_accumulation = True
            option_use_internal_get_lr_this_step = False
            option_use_internal_loss_scaler = False
            # TODO: with with fetches

            for gradient_accumulation_steps in option_gradient_accumulation_steps:
                for fp16 in option_fp16:
                    for option_split_batch in BatchArgsOption:
                        loss_ort, prediction_scores_ort, seq_relationship_score_ort =\
                            run_test(model, model_desc, self.device, args, gradient_accumulation_steps, fp16,
                                     option_allreduce_post_accumulation,
                                     get_lr_this_step, option_use_internal_get_lr_this_step,
                                     loss_scaler, option_use_internal_loss_scaler,
                                     option_split_batch)

                        print(loss_ort)
                        print(prediction_scores_ort)
                        print(seq_relationship_score_ort)

        def create_and_check_bert_for_question_answering(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            model = BertForQuestionAnswering(config=config)
            model.eval()
            loss, start_logits, end_logits = model(
                input_ids,
                attention_mask=input_mask,
                token_type_ids=token_type_ids,
                start_positions=sequence_labels,
                end_positions=sequence_labels)
            result = {
                "loss": loss,
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            self.parent.assertListEqual(list(result["start_logits"].size()),
                                        [self.batch_size, self.seq_length])
            self.parent.assertListEqual(list(result["end_logits"].size()),
                                        [self.batch_size, self.seq_length])
            self.check_loss_output(result)

        def create_and_check_bert_for_sequence_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = BertForSequenceClassification(config)
            model.eval()
            loss, logits = model(input_ids,
                                 attention_mask=input_mask,
                                 token_type_ids=token_type_ids,
                                 labels=sequence_labels)
            result = {
                "loss": loss,
                "logits": logits,
            }
            self.parent.assertListEqual(list(result["logits"].size()),
                                        [self.batch_size, self.num_labels])
            self.check_loss_output(result)

        def create_and_check_bert_for_token_classification(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_labels = self.num_labels
            model = BertForTokenClassification(config=config)
            model.eval()
            loss, logits = model(input_ids,
                                 attention_mask=input_mask,
                                 token_type_ids=token_type_ids,
                                 labels=token_labels)
            result = {
                "loss": loss,
                "logits": logits,
            }
            self.parent.assertListEqual(
                list(result["logits"].size()),
                [self.batch_size, self.seq_length, self.num_labels])
            self.check_loss_output(result)

        def create_and_check_bert_for_multiple_choice(
                self, config, input_ids, token_type_ids, input_mask,
                sequence_labels, token_labels, choice_labels):
            config.num_choices = self.num_choices
            model = BertForMultipleChoice(config=config)
            model.eval()
            multiple_choice_inputs_ids = input_ids.unsqueeze(1).expand(
                -1, self.num_choices, -1).contiguous()
            multiple_choice_token_type_ids = token_type_ids.unsqueeze(
                1).expand(-1, self.num_choices, -1).contiguous()
            multiple_choice_input_mask = input_mask.unsqueeze(1).expand(
                -1, self.num_choices, -1).contiguous()
            loss, logits = model(multiple_choice_inputs_ids,
                                 attention_mask=multiple_choice_input_mask,
                                 token_type_ids=multiple_choice_token_type_ids,
                                 labels=choice_labels)
            result = {
                "loss": loss,
                "logits": logits,
            }
            self.parent.assertListEqual(list(result["logits"].size()),
                                        [self.batch_size, self.num_choices])
            self.check_loss_output(result)

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (config, input_ids, token_type_ids, input_mask, sequence_labels,
             token_labels, choice_labels) = config_and_inputs
            inputs_dict = {
                'input_ids': input_ids,
                'token_type_ids': token_type_ids,
                'attention_mask': input_mask
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = BertModelTest.BertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=BertConfig,
                                          hidden_size=37)

    # def test_config(self):
    #     self.config_tester.run_common_tests()

    # def test_bert_model(self, use_cuda=False):
    #     # ^^ This could be a real fixture
    #     if use_cuda:
    #         self.model_tester.device = "cuda"
    #     config_and_inputs = self.model_tester.prepare_config_and_inputs()
    #     self.model_tester.create_and_check_bert_model(*config_and_inputs)

    # def test_bert_model_as_decoder(self):
    #     config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
    #     self.model_tester.create_and_check_bert_model_as_decoder(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_masked_lm(
            *config_and_inputs)

    # def test_for_masked_lm_decoder(self):
    #     config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
    #     self.model_tester.create_and_check_bert_model_for_masked_lm_as_decoder(*config_and_inputs)

    # def test_for_multiple_choice(self):
    #     config_and_inputs = self.model_tester.prepare_config_and_inputs()
    #     self.model_tester.create_and_check_bert_for_multiple_choice(*config_and_inputs)

    # def test_for_next_sequence_prediction(self):
    #     config_and_inputs = self.model_tester.prepare_config_and_inputs()
    #     self.model_tester.create_and_check_bert_for_next_sequence_prediction(*config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_bert_for_pretraining(
            *config_and_inputs)
Example #3
0
class TapasModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        TapasModel,
        TapasForMaskedLM,
        TapasForQuestionAnswering,
        TapasForSequenceClassification,
    ) if is_torch_available() else None)
    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = True
    test_head_masking = False

    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = copy.deepcopy(inputs_dict)
        if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
            inputs_dict = {
                k: v.unsqueeze(1).expand(-1, self.model_tester.num_choices,
                                         -1).contiguous()
                if isinstance(v, torch.Tensor) and v.ndim > 1 else v
                for k, v in inputs_dict.items()
            }

        if return_labels:
            if model_class in get_values(MODEL_FOR_MULTIPLE_CHOICE_MAPPING):
                inputs_dict["labels"] = torch.ones(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in get_values(
                    MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING):
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["aggregation_labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["numeric_values"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.float,
                    device=torch_device,
                )
                inputs_dict["numeric_values_scale"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.float,
                    device=torch_device,
                )
                inputs_dict["float_answer"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.float,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_NEXT_SENTENCE_PREDICTION_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in [
                    *get_values(MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING),
                    *get_values(MODEL_FOR_CAUSAL_LM_MAPPING),
                    *get_values(MODEL_FOR_MASKED_LM_MAPPING),
                    *get_values(MODEL_FOR_SEQ_TO_SEQ_CAUSAL_LM_MAPPING),
            ]:
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def setUp(self):
        self.model_tester = TapasModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=TapasConfig,
                                          dim=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)
Example #4
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument(
        "--models",
        required=False,
        type=str,
        default="all",
        help="Model checkpoints to be provided "
        "to the AutoModel classes. Leave "
        "blank to benchmark the base version "
        "of all available model "
        "architectures.",
    )
    parser.add_argument("--torch",
                        required=False,
                        action="store_true",
                        help="Benchmark the Pytorch version of the "
                        "models")
    parser.add_argument("--torch_cuda",
                        required=False,
                        action="store_true",
                        help="Pytorch only: run on available "
                        "cuda devices")
    parser.add_argument(
        "--torchscript",
        required=False,
        action="store_true",
        help="Pytorch only: trace the models "
        "using torchscript",
    )
    parser.add_argument(
        "--tensorflow",
        required=False,
        action="store_true",
        help="Benchmark the TensorFlow version "
        "of the models. Will run on GPU if "
        "the correct dependencies are "
        "installed",
    )
    parser.add_argument("--xla",
                        required=False,
                        action="store_true",
                        help="TensorFlow only: use XLA acceleration.")
    parser.add_argument(
        "--amp",
        required=False,
        action="store_true",
        help="TensorFlow only: use automatic mixed precision acceleration.",
    )
    parser.add_argument("--fp16",
                        required=False,
                        action="store_true",
                        help="PyTorch only: use FP16 to accelerate inference.")
    parser.add_argument(
        "--keras_predict",
        required=False,
        action="store_true",
        help="Whether to use model.predict "
        "instead of model() to do a "
        "forward pass.",
    )
    parser.add_argument("--save_to_csv",
                        required=False,
                        action="store_true",
                        help="Save to a CSV file.")
    parser.add_argument("--csv_filename",
                        required=False,
                        default=None,
                        help="CSV filename used if saving results to csv.")
    parser.add_argument("--average_over",
                        required=False,
                        default=30,
                        type=int,
                        help="Times an experiment will be run.")

    args = parser.parse_args()
    if args.models == "all":
        args.models = [
            "gpt2",
            "bert-base-cased",
            "xlnet-base-cased",
            "xlm-mlm-en-2048",
            "transfo-xl-wt103",
            "openai-gpt",
            "distilbert-base-uncased",
            "distilgpt2",
            "roberta-base",
            "ctrl",
        ]
    else:
        args.models = args.models.split()

    print("Running with arguments", args)

    if args.torch:
        if is_torch_available():
            create_setup_and_compute(
                model_names=args.models,
                tensorflow=False,
                gpu=args.torch_cuda,
                torchscript=args.torchscript,
                fp16=args.fp16,
                save_to_csv=args.save_to_csv,
                csv_filename=args.csv_filename,
                average_over=args.average_over,
            )
        else:
            raise ImportError(
                "Trying to run a PyTorch benchmark but PyTorch was not found in the environment."
            )

    if args.tensorflow:
        if is_tf_available():
            create_setup_and_compute(
                model_names=args.models,
                tensorflow=True,
                xla=args.xla,
                amp=args.amp,
                save_to_csv=args.save_to_csv,
                csv_filename=args.csv_filename,
                average_over=args.average_over,
            )
        else:
            raise ImportError(
                "Trying to run a TensorFlow benchmark but TensorFlow was not found in the environment."
            )
Example #5
0
class T5ModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        T5Model, T5ForConditionalGeneration) if is_torch_available() else ()
    all_generative_model_classes = (
        T5ForConditionalGeneration, ) if is_torch_available() else ()
    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False
    is_encoder_decoder = True

    def setUp(self):
        self.model_tester = T5ModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=T5Config,
                                          d_model=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_shift_right(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_prepare_lm_labels_via_shift_left(
            *config_and_inputs)

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_with_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_with_lm_head(*config_and_inputs)

    def test_decoder_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_past(
            *config_and_inputs)

    def test_decoder_model_past_with_attn_mask(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_decoder_model_attention_mask_past(
            *config_and_inputs)

    def test_generate_with_past_key_value_states(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_generate_with_past_key_value_states(
            *config_and_inputs)

    def test_encoder_decoder_shared_weights(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_encoder_decoder_shared_weights(
            *config_and_inputs)

    @unittest.skipIf(torch_device == "cpu", "Cant do half precision")
    def test_model_fp16_forward(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model_fp16_forward(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in T5_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = T5Model.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_export_to_onnx(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        model = T5Model(config_and_inputs[0]).to(torch_device)
        with tempfile.TemporaryDirectory() as tmpdirname:
            torch.onnx.export(
                model,
                config_and_inputs[1],
                f"{tmpdirname}/t5_test.onnx",
                export_params=True,
                opset_version=9,
            )
Example #6
0
class ElectraModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        ElectraModel,
        ElectraForPreTraining,
        ElectraForMaskedLM,
        ElectraForCausalLM,
        ElectraForMultipleChoice,
        ElectraForTokenClassification,
        ElectraForSequenceClassification,
        ElectraForQuestionAnswering,
    ) if is_torch_available() else ())
    all_generative_model_classes = (
        ElectraForCausalLM, ) if is_torch_available() else ()

    fx_ready_model_classes = all_model_classes
    fx_dynamic_ready_model_classes = all_model_classes

    # special case for ForPreTraining model
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)

        if return_labels:
            if model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def setUp(self):
        self.model_tester = ElectraModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=ElectraConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_electra_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_model(*config_and_inputs)

    def test_electra_model_as_decoder(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_electra_model_as_decoder(
            *config_and_inputs)

    def test_electra_model_various_embeddings(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        for type in ["absolute", "relative_key", "relative_key_query"]:
            config_and_inputs[0].position_embedding_type = type
            self.model_tester.create_and_check_electra_model(
                *config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_masked_lm(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_token_classification(
            *config_and_inputs)

    def test_for_pre_training(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_pretraining(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_sequence_classification(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_question_answering(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_electra_for_multiple_choice(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in ELECTRA_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = ElectraModel.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_for_causal_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_electra_for_causal_lm(
            *config_and_inputs)
Example #7
0
class GPT2ModelTest(ModelTesterMixin, GenerationTesterMixin,
                    unittest.TestCase):

    all_model_classes = ((GPT2Model, GPT2LMHeadModel, GPT2DoubleHeadsModel,
                          GPT2ForSequenceClassification)
                         if is_torch_available() else ())
    all_generative_model_classes = (
        GPT2LMHeadModel, GPT2DoubleHeadsModel) if is_torch_available() else ()
    all_parallelizable_model_classes = (
        GPT2LMHeadModel, ) if is_torch_available() else ()
    test_missing_keys = False
    test_model_parallel = True

    # special case for DoubleHeads model
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)

        if return_labels:
            if model_class.__name__ == "GPT2DoubleHeadsModel":
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.num_choices,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device,
                )
                inputs_dict["input_ids"] = inputs_dict["labels"]
                inputs_dict["token_type_ids"] = inputs_dict["labels"]
                inputs_dict["mc_token_ids"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.num_choices),
                    dtype=torch.long,
                    device=torch_device,
                )
                inputs_dict["mc_labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def setUp(self):
        self.model_tester = GPT2ModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=GPT2Config,
                                          n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_gpt2_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model(*config_and_inputs)

    def test_gpt2_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_past(*config_and_inputs)

    def test_gpt2_model_att_mask_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_attention_mask_past(
            *config_and_inputs)

    def test_gpt2_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_model_past_large_inputs(
            *config_and_inputs)

    def test_gpt2_lm_head_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lm_head_model(*config_and_inputs)

    def test_gpt2_double_lm_head_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_double_lm_head_model(
            *config_and_inputs)

    def test_gpt2_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt2_for_sequence_classification(
            *config_and_inputs)

    def test_gpt2_gradient_checkpointing(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs(
            gradient_checkpointing=True)
        self.model_tester.create_and_check_forward_and_backwards(
            *config_and_inputs)

    @slow
    def test_batch_generation(self):
        model = GPT2LMHeadModel.from_pretrained("gpt2")
        model.to(torch_device)
        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

        tokenizer.padding_side = "left"

        # Define PAD Token = EOS Token = 50256
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = model.config.eos_token_id

        # use different length sentences to test batching
        sentences = [
            "Hello, my dog is a little",
            "Today, I",
        ]

        inputs = tokenizer(sentences, return_tensors="pt", padding=True)
        input_ids = inputs["input_ids"].to(torch_device)
        token_type_ids = torch.cat(
            [
                input_ids.new_full(
                    (input_ids.shape[0], input_ids.shape[1] - 1), 0),
                input_ids.new_full((input_ids.shape[0], 1), 500),
            ],
            dim=-1,
        )

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"].to(torch_device),
        )

        outputs_tt = model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"].to(torch_device),
            token_type_ids=token_type_ids,
        )

        inputs_non_padded = tokenizer(
            sentences[0], return_tensors="pt").input_ids.to(torch_device)
        output_non_padded = model.generate(input_ids=inputs_non_padded)

        num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][
            -1].long().sum().cpu().item()
        inputs_padded = tokenizer(
            sentences[1], return_tensors="pt").input_ids.to(torch_device)
        output_padded = model.generate(input_ids=inputs_padded,
                                       max_length=model.config.max_length -
                                       num_paddings)

        batch_out_sentence = tokenizer.batch_decode(outputs,
                                                    skip_special_tokens=True)
        batch_out_sentence_tt = tokenizer.batch_decode(
            outputs_tt, skip_special_tokens=True)
        non_padded_sentence = tokenizer.decode(output_non_padded[0],
                                               skip_special_tokens=True)
        padded_sentence = tokenizer.decode(output_padded[0],
                                           skip_special_tokens=True)

        expected_output_sentence = [
            "Hello, my dog is a little bit of a mess. I'm not sure if he's going",
            "Today, I'm going to be doing a lot of research on this. I",
        ]
        self.assertListEqual(expected_output_sentence, batch_out_sentence)
        self.assertTrue(
            batch_out_sentence_tt !=
            batch_out_sentence)  # token_type_ids should change output
        self.assertListEqual(expected_output_sentence,
                             [non_padded_sentence, padded_sentence])

    @slow
    def test_batch_generation_2heads(self):
        model = GPT2DoubleHeadsModel.from_pretrained("gpt2")
        model.to(torch_device)
        tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

        tokenizer.padding_side = "left"

        # This tokenizer has no pad token, so we have to set it in some way
        # Define PAD Token = EOS Token = 50256
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = model.config.eos_token_id

        # use different length sentences to test batching
        sentences = [
            "Hello, my dog is a little",
            "Today, I",
        ]

        inputs = tokenizer(sentences, return_tensors="pt", padding=True)
        input_ids = inputs["input_ids"].to(torch_device)
        token_type_ids = torch.cat(
            [
                input_ids.new_full(
                    (input_ids.shape[0], input_ids.shape[1] - 1), 0),
                input_ids.new_full((input_ids.shape[0], 1), 500),
            ],
            dim=-1,
        )

        outputs = model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"].to(torch_device),
        )

        outputs_tt = model.generate(
            input_ids=input_ids,
            attention_mask=inputs["attention_mask"].to(torch_device),
            token_type_ids=token_type_ids,
        )

        inputs_non_padded = tokenizer(
            sentences[0], return_tensors="pt").input_ids.to(torch_device)
        output_non_padded = model.generate(input_ids=inputs_non_padded)

        num_paddings = inputs_non_padded.shape[-1] - inputs["attention_mask"][
            -1].long().sum().cpu().item()
        inputs_padded = tokenizer(
            sentences[1], return_tensors="pt").input_ids.to(torch_device)
        output_padded = model.generate(input_ids=inputs_padded,
                                       max_length=model.config.max_length -
                                       num_paddings)

        batch_out_sentence = tokenizer.batch_decode(outputs,
                                                    skip_special_tokens=True)
        batch_out_sentence_tt = tokenizer.batch_decode(
            outputs_tt, skip_special_tokens=True)
        non_padded_sentence = tokenizer.decode(output_non_padded[0],
                                               skip_special_tokens=True)
        padded_sentence = tokenizer.decode(output_padded[0],
                                           skip_special_tokens=True)

        expected_output_sentence = [
            "Hello, my dog is a little bit of a mess. I'm not sure if he's going",
            "Today, I'm going to be doing a lot of research on this. I",
        ]
        self.assertListEqual(expected_output_sentence, batch_out_sentence)
        self.assertTrue(
            batch_out_sentence_tt !=
            batch_out_sentence)  # token_type_ids should change output
        self.assertListEqual(expected_output_sentence,
                             [non_padded_sentence, padded_sentence])

    @slow
    def test_model_from_pretrained(self):
        for model_name in GPT2_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = GPT2Model.from_pretrained(model_name)
            self.assertIsNotNone(model)
Example #8
0
class CTRLModelTest(CommonTestCases.CommonModelTester):

    all_model_classes = (CTRLModel,
                         CTRLLMHeadModel) if is_torch_available() else ()
    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False
    test_head_masking = False

    class CTRLModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_token_type_ids=True,
            use_input_mask=True,
            use_labels=True,
            use_mc_token_ids=True,
            vocab_size=99,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            intermediate_size=37,
            hidden_act="gelu",
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_token_type_ids = use_token_type_ids
            self.use_input_mask = use_input_mask
            self.use_labels = use_labels
            self.use_mc_token_ids = use_mc_token_ids
            self.vocab_size = vocab_size
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.intermediate_size = intermediate_size
            self.hidden_act = hidden_act
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.type_vocab_size = type_vocab_size
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)

            input_mask = None
            if self.use_input_mask:
                input_mask = ids_tensor([self.batch_size, self.seq_length],
                                        vocab_size=2)

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.type_vocab_size)

            mc_token_ids = None
            if self.use_mc_token_ids:
                mc_token_ids = ids_tensor([self.batch_size, self.num_choices],
                                          self.seq_length)

            sequence_labels = None
            token_labels = None
            choice_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                choice_labels = ids_tensor([self.batch_size], self.num_choices)

            config = CTRLConfig(
                vocab_size_or_config_json_file=self.vocab_size,
                n_embd=self.hidden_size,
                n_layer=self.num_hidden_layers,
                n_head=self.num_attention_heads,
                # intermediate_size=self.intermediate_size,
                # hidden_act=self.hidden_act,
                # hidden_dropout_prob=self.hidden_dropout_prob,
                # attention_probs_dropout_prob=self.attention_probs_dropout_prob,
                n_positions=self.max_position_embeddings,
                n_ctx=self.max_position_embeddings
                # type_vocab_size=self.type_vocab_size,
                # initializer_range=self.initializer_range
            )

            head_mask = ids_tensor(
                [self.num_hidden_layers, self.num_attention_heads], 2)

            return config, input_ids, input_mask, head_mask, token_type_ids, mc_token_ids, sequence_labels, token_labels, choice_labels

        def check_loss_output(self, result):
            self.parent.assertListEqual(list(result["loss"].size()), [])

        def create_and_check_ctrl_model(self, config, input_ids, input_mask,
                                        head_mask, token_type_ids, *args):
            model = CTRLModel(config=config)
            model.eval()

            model(input_ids,
                  token_type_ids=token_type_ids,
                  head_mask=head_mask)
            model(input_ids, token_type_ids=token_type_ids)
            sequence_output, presents = model(input_ids)

            result = {
                "sequence_output": sequence_output,
                "presents": presents,
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size])
            self.parent.assertEqual(len(result["presents"]), config.n_layer)

        def create_and_check_lm_head_model(self, config, input_ids, input_mask,
                                           head_mask, token_type_ids, *args):
            model = CTRLLMHeadModel(config)
            model.eval()

            loss, lm_logits, _ = model(input_ids,
                                       token_type_ids=token_type_ids,
                                       labels=input_ids)

            result = {"loss": loss, "lm_logits": lm_logits}
            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["lm_logits"].size()),
                [self.batch_size, self.seq_length, self.vocab_size])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()

            (config, input_ids, input_mask, head_mask, token_type_ids,
             mc_token_ids, sequence_labels, token_labels,
             choice_labels) = config_and_inputs

            inputs_dict = {
                'input_ids': input_ids,
                'token_type_ids': token_type_ids,
                'head_mask': head_mask
            }

            return config, inputs_dict

    def setUp(self):
        self.model_tester = CTRLModelTest.CTRLModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=CTRLConfig,
                                          n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_ctrl_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_ctrl_model(*config_and_inputs)

    def test_ctrl_lm_head_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lm_head_model(*config_and_inputs)

    @pytest.mark.slow
    def test_model_from_pretrained(self):
        cache_dir = "/tmp/transformers_test/"
        for model_name in list(CTRL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = CTRLModel.from_pretrained(model_name, cache_dir=cache_dir)
            shutil.rmtree(cache_dir)
            self.assertIsNotNone(model)
Example #9
0
class GPTNeoModelTest(ModelTesterMixin, GenerationTesterMixin,
                      unittest.TestCase):

    all_model_classes = ((GPTNeoModel, GPTNeoForCausalLM,
                          GPTNeoForSequenceClassification)
                         if is_torch_available() else ())
    all_generative_model_classes = (
        GPTNeoForCausalLM, ) if is_torch_available() else ()
    fx_compatible = True
    test_missing_keys = False
    test_pruning = False
    test_model_parallel = False

    # special case for DoubleHeads model
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)
        return inputs_dict

    def setUp(self):
        self.model_tester = GPTNeoModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=GPTNeoConfig,
                                          n_embd=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_gpt_neo_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt_neo_model(*config_and_inputs)

    def test_gpt_neo_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt_neo_model_past(
            *config_and_inputs)

    def test_gpt_neo_model_att_mask_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt_neo_model_attention_mask_past(
            *config_and_inputs)

    def test_gpt_neo_model_past_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt_neo_model_past_large_inputs(
            *config_and_inputs)

    def test_gpt_neo_lm_head_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lm_head_model(*config_and_inputs)

    def test_gpt_neo_sequence_classification_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_gpt_neo_for_sequence_classification(
            *config_and_inputs)

    def test_gpt_neo_gradient_checkpointing(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_forward_and_backwards(
            *config_and_inputs, gradient_checkpointing=True)

    def _get_hidden_states(self):
        return torch.tensor(
            [[
                [0.4983, -0.7584, -1.6944, 0.5440],
                [2.6918, 0.4206, 0.4176, 0.2055],
                [-0.0071, -0.0405, -1.4920, -0.3630],
                [1.0492, 0.1599, -1.7648, 0.2419],
                [-1.8348, 2.0514, -0.1946, 0.3203],
                [0.7672, -1.1600, -1.7118, -0.9056],
                [0.2986, 0.5372, 0.7729, -0.1927],
                [0.0285, 0.2629, -1.1156, -1.1992],
            ]],
            dtype=torch.float32,
            device=torch_device,
        )

    def test_local_attn_probs(self):
        model = GPTNeoModel.from_pretrained(
            "valhalla/gpt-neo-random-tiny").eval()
        layer = model.h[1].attn.attention.to(torch_device)
        hidden_states = self._get_hidden_states()
        hidden_states = torch.cat([hidden_states, hidden_states - 0.5], dim=2)

        batch_size, seq_length, _ = hidden_states.shape
        mask_tokens = 2
        attention_mask = torch.ones(batch_size,
                                    seq_length,
                                    device=torch_device,
                                    dtype=torch.long)
        attention_mask[:, -mask_tokens:] = 0  # dont attend last mask_tokens

        attention_mask = attention_mask.view(batch_size, -1)
        attention_mask = attention_mask[:, None, None, :]
        attention_mask = (1.0 - attention_mask) * -10000.0

        attn_probs = layer(hidden_states,
                           attention_mask=attention_mask,
                           output_attentions=True)[-1]

        # the last 2 tokens are masked, and should have 0 attn_probs
        self.assertTrue(
            torch.all(attn_probs[:, :, -mask_tokens:, -mask_tokens:] == 0))

        # in loacal attention each token can only attend to the previous window_size tokens (inlcuding itself)
        # here window_size is 4, so a token at index 5 can only attend to indcies [2, 3, 4, 5]
        # and the attn_probs should be 0 for token [0, 1]
        self.assertTrue(torch.all(attn_probs[:, :, 5, 2:6] != 0))
        self.assertTrue(torch.all(attn_probs[:, :, 5, :2] == 0))
Example #10
0
class DPRModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        DPRContextEncoder,
        DPRQuestionEncoder,
        DPRReader,
    ) if is_torch_available() else ())

    test_resize_embeddings = False
    test_missing_keys = False  # why?
    test_pruning = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = DPRModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=DPRConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_context_encoder_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_context_encoder(*config_and_inputs)

    def test_question_encoder_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_question_encoder(*config_and_inputs)

    def test_reader_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_reader(*config_and_inputs)

    def test_init_changed_config(self):
        config = self.model_tester.prepare_config_and_inputs()[0]

        model = DPRQuestionEncoder(config=config)
        model.to(torch_device)
        model.eval()

        with tempfile.TemporaryDirectory() as tmp_dirname:
            model.save_pretrained(tmp_dirname)
            model = DPRQuestionEncoder.from_pretrained(tmp_dirname,
                                                       projection_dim=512)

        self.assertIsNotNone(model)

    @slow
    def test_model_from_pretrained(self):
        for model_name in DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST[:
                                                                            1]:
            model = DPRContextEncoder.from_pretrained(model_name)
            self.assertIsNotNone(model)

        for model_name in DPR_CONTEXT_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST[:
                                                                            1]:
            model = DPRContextEncoder.from_pretrained(model_name)
            self.assertIsNotNone(model)

        for model_name in DPR_QUESTION_ENCODER_PRETRAINED_MODEL_ARCHIVE_LIST[:
                                                                             1]:
            model = DPRQuestionEncoder.from_pretrained(model_name)
            self.assertIsNotNone(model)

        for model_name in DPR_READER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = DPRReader.from_pretrained(model_name)
            self.assertIsNotNone(model)
Example #11
0
class XLMModelTest(ModelTesterMixin, GenerationTesterMixin, unittest.TestCase):

    all_model_classes = ((
        XLMModel,
        XLMWithLMHeadModel,
        XLMForQuestionAnswering,
        XLMForSequenceClassification,
        XLMForQuestionAnsweringSimple,
        XLMForTokenClassification,
        XLMForMultipleChoice,
    ) if is_torch_available() else ())
    all_generative_model_classes = (
        (XLMWithLMHeadModel, ) if is_torch_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable
    test_sequence_classification_problem_types = True

    # XLM has 2 QA models -> need to manually set the correct labels for one of them here
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)

        if return_labels:
            if model_class.__name__ == "XLMForQuestionAnswering":
                inputs_dict["start_positions"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["end_positions"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)

        return inputs_dict

    def setUp(self):
        self.model_tester = XLMModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=XLMConfig,
                                          emb_dim=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_xlm_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_model(*config_and_inputs)

    def test_xlm_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_lm_head(*config_and_inputs)

    def test_xlm_simple_qa(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_simple_qa(*config_and_inputs)

    def test_xlm_qa(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_qa(*config_and_inputs)

    def test_xlm_sequence_classif(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_sequence_classif(
            *config_and_inputs)

    def test_xlm_token_classif(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_token_classif(
            *config_and_inputs)

    def test_xlm_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_for_multiple_choice(
            *config_and_inputs)

    def _check_attentions_for_generate(self,
                                       batch_size,
                                       attentions,
                                       min_length,
                                       max_length,
                                       config,
                                       use_cache=False,
                                       num_beam_groups=1):
        self.assertIsInstance(attentions, tuple)
        self.assertListEqual([
            isinstance(iter_attentions, tuple)
            for iter_attentions in attentions
        ], [True] * len(attentions))
        self.assertEqual(len(attentions),
                         (max_length - min_length) * num_beam_groups)

        for idx, iter_attentions in enumerate(attentions):
            # adds PAD dummy token
            tgt_len = min_length + idx + 1
            src_len = min_length + idx + 1

            expected_shape = (
                batch_size * num_beam_groups,
                config.num_attention_heads,
                tgt_len,
                src_len,
            )
            # check attn size
            self.assertListEqual(
                [layer_attention.shape for layer_attention in iter_attentions],
                [expected_shape] * len(iter_attentions))

    def _check_hidden_states_for_generate(self,
                                          batch_size,
                                          hidden_states,
                                          min_length,
                                          max_length,
                                          config,
                                          use_cache=False,
                                          num_beam_groups=1):
        self.assertIsInstance(hidden_states, tuple)
        self.assertListEqual(
            [
                isinstance(iter_hidden_states, tuple)
                for iter_hidden_states in hidden_states
            ],
            [True] * len(hidden_states),
        )
        self.assertEqual(len(hidden_states),
                         (max_length - min_length) * num_beam_groups)

        for idx, iter_hidden_states in enumerate(hidden_states):
            # adds PAD dummy token
            seq_len = min_length + idx + 1
            expected_shape = (batch_size * num_beam_groups, seq_len,
                              config.hidden_size)
            # check hidden size
            self.assertListEqual(
                [
                    layer_hidden_states.shape
                    for layer_hidden_states in iter_hidden_states
                ],
                [expected_shape] * len(iter_hidden_states),
            )
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in XLM_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = XLMModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
class T5ModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (T5Model, T5ForConditionalGeneration) if is_torch_available() else ()
    all_generative_model_classes = (T5ForConditionalGeneration,) if is_torch_available() else ()
    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False
    is_encoder_decoder = True

    class T5ModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            encoder_seq_length=7,
            decoder_seq_length=9,
            is_training=True,
            use_attention_mask=True,
            use_labels=True,
            vocab_size=99,
            n_positions=14,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            d_ff=37,
            relative_attention_num_buckets=8,
            dropout_rate=0.1,
            initializer_factor=0.002,
            eos_token_id=1,
            pad_token_id=0,
            decoder_start_token_id=0,
            scope=None,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.encoder_seq_length = encoder_seq_length
            self.decoder_seq_length = decoder_seq_length
            self.is_training = is_training
            self.use_attention_mask = use_attention_mask
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.n_positions = n_positions
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.d_ff = d_ff
            self.relative_attention_num_buckets = relative_attention_num_buckets
            self.dropout_rate = dropout_rate
            self.initializer_factor = initializer_factor
            self.scope = scope
            self.eos_token_id = eos_token_id
            self.pad_token_id = pad_token_id
            self.decoder_start_token_id = decoder_start_token_id

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.encoder_seq_length], self.vocab_size)
            decoder_input_ids = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)

            attention_mask = None
            decoder_attention_mask = None
            if self.use_attention_mask:
                attention_mask = ids_tensor([self.batch_size, self.encoder_seq_length], vocab_size=2)
                decoder_attention_mask = ids_tensor([self.batch_size, self.decoder_seq_length], vocab_size=2)

            lm_labels = None
            if self.use_labels:
                lm_labels = ids_tensor([self.batch_size, self.decoder_seq_length], self.vocab_size)

            config = T5Config(
                vocab_size=self.vocab_size,
                n_positions=self.n_positions,
                d_model=self.hidden_size,
                d_ff=self.d_ff,
                d_kv=self.hidden_size // self.num_attention_heads,
                num_layers=self.num_hidden_layers,
                num_heads=self.num_attention_heads,
                relative_attention_num_buckets=self.relative_attention_num_buckets,
                dropout_rate=self.dropout_rate,
                initializer_factor=self.initializer_factor,
                eos_token_id=self.eos_token_id,
                bos_token_id=self.pad_token_id,
                pad_token_id=self.pad_token_id,
                decoder_start_token_id=self.decoder_start_token_id,
            )

            return (
                config,
                input_ids,
                decoder_input_ids,
                attention_mask,
                decoder_attention_mask,
                lm_labels,
            )

        def check_loss_output(self, result):
            self.parent.assertListEqual(list(result["loss"].size()), [])

        def check_prepare_lm_labels_via_shift_left(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5Model(config=config)
            model.to(torch_device)
            model.eval()

            # make sure that lm_labels are correctly padded from the right
            lm_labels.masked_fill_((lm_labels == self.decoder_start_token_id), self.eos_token_id)

            # add casaul pad token mask
            triangular_mask = torch.tril(lm_labels.new_ones(lm_labels.shape)).logical_not()
            lm_labels.masked_fill_(triangular_mask, self.pad_token_id)
            decoder_input_ids = model._shift_right(lm_labels)

            for i, (decoder_input_ids_slice, lm_labels_slice) in enumerate(zip(decoder_input_ids, lm_labels)):
                # first item
                self.parent.assertEqual(decoder_input_ids_slice[0].item(), self.decoder_start_token_id)
                if i < decoder_input_ids_slice.shape[-1]:
                    if i < decoder_input_ids.shape[-1] - 1:
                        # items before diagonal
                        self.parent.assertListEqual(
                            decoder_input_ids_slice[1 : i + 1].tolist(), lm_labels_slice[:i].tolist()
                        )
                    # pad items after diagonal
                    if i < decoder_input_ids.shape[-1] - 2:
                        self.parent.assertListEqual(
                            decoder_input_ids_slice[i + 2 :].tolist(), lm_labels_slice[i + 1 : -1].tolist()
                        )
                else:
                    # all items after square
                    self.parent.assertListEqual(decoder_input_ids_slice[1:].tolist(), lm_labels_slice[:-1].tolist())

        def create_and_check_t5_model(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5Model(config=config)
            model.to(torch_device)
            model.eval()
            decoder_output, decoder_past, encoder_output = model(
                input_ids=input_ids,
                decoder_input_ids=decoder_input_ids,
                attention_mask=attention_mask,
                decoder_attention_mask=decoder_attention_mask,
            )
            decoder_output, decoder_past, encoder_output = model(
                input_ids=input_ids, decoder_input_ids=decoder_input_ids
            )

            result = {
                "encoder_output": encoder_output,
                "decoder_output": decoder_output,
                "decoder_past": decoder_past,
            }
            self.parent.assertListEqual(
                list(result["encoder_output"].size()), [self.batch_size, self.encoder_seq_length, self.hidden_size]
            )
            self.parent.assertListEqual(
                list(result["decoder_output"].size()), [self.batch_size, self.decoder_seq_length, self.hidden_size]
            )
            self.parent.assertEqual(len(decoder_past), 2)
            # decoder_past[0] should correspond to encoder output
            self.parent.assertTrue(torch.all(decoder_past[0][0] == encoder_output))
            # There should be `num_layers` key value embeddings stored in decoder_past[1]
            self.parent.assertEqual(len(decoder_past[1]), config.num_layers)
            # There should be a self attn key, a self attn value, a cross attn key and a cross attn value stored in each decoder_past[1] tuple
            self.parent.assertEqual(len(decoder_past[1][0]), 4)

        def create_and_check_t5_with_lm_head(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5ForConditionalGeneration(config=config)
            model.to(torch_device)
            model.eval()
            outputs = model(
                input_ids=input_ids,
                decoder_input_ids=decoder_input_ids,
                decoder_attention_mask=decoder_attention_mask,
                lm_labels=lm_labels,
            )
            loss, prediction_scores, _, _ = outputs
            self.parent.assertEqual(len(outputs), 4)
            result = {
                "loss": loss,
                "prediction_scores": prediction_scores,
            }
            self.parent.assertListEqual(
                list(result["prediction_scores"].size()), [self.batch_size, self.decoder_seq_length, self.vocab_size]
            )
            self.check_loss_output(result)

        def create_and_check_t5_decoder_model_past(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5Model(config=config).get_decoder()
            model.to(torch_device)
            model.eval()

            # first forward pass
            output, past_key_value_states = model(input_ids, use_cache=True)

            # create hypothetical next token and extent to next_input_ids
            next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)

            # append to next input_ids and
            next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)

            output_from_no_past = model(next_input_ids)[0]
            output_from_past = model(next_tokens, past_key_value_states=past_key_value_states)[0]

            # select random slice
            random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
            output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach()
            output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach()

            # test that outputs are equal for slice
            self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))

        def create_and_check_t5_decoder_model_attention_mask_past(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5Model(config=config).get_decoder()
            model.to(torch_device)
            model.eval()

            # create attention mask
            attn_mask = torch.ones(input_ids.shape, dtype=torch.long, device=torch_device)

            half_seq_length = input_ids.shape[-1] // 2
            attn_mask[:, half_seq_length:] = 0

            # first forward pass
            output, past_key_value_states = model(input_ids, attention_mask=attn_mask, use_cache=True)

            # create hypothetical next token and extent to next_input_ids
            next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size)

            # change a random masked slice from input_ids
            random_seq_idx_to_change = ids_tensor((1,), half_seq_length).item() + 1
            random_other_next_tokens = ids_tensor((self.batch_size, 1), config.vocab_size).squeeze(-1)
            input_ids[:, -random_seq_idx_to_change] = random_other_next_tokens

            # append to next input_ids and attn_mask
            next_input_ids = torch.cat([input_ids, next_tokens], dim=-1)
            attn_mask = torch.cat(
                [attn_mask, torch.ones((attn_mask.shape[0], 1), dtype=torch.long, device=torch_device)], dim=1,
            )

            # get two different outputs
            output_from_no_past = model(next_input_ids, attention_mask=attn_mask)[0]
            output_from_past = model(
                next_tokens, past_key_value_states=past_key_value_states, attention_mask=attn_mask
            )[0]

            # select random slice
            random_slice_idx = ids_tensor((1,), output_from_past.shape[-1]).item()
            output_from_no_past_slice = output_from_no_past[:, -1, random_slice_idx].detach()
            output_from_past_slice = output_from_past[:, 0, random_slice_idx].detach()

            # test that outputs are equal for slice
            self.parent.assertTrue(torch.allclose(output_from_past_slice, output_from_no_past_slice, atol=1e-3))

        def create_t5_and_check_t5_generate_with_past_key_value_states(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5ForConditionalGeneration(config=config)
            model.to(torch_device)
            model.eval()
            torch.manual_seed(0)
            output_without_past_cache = model.generate(
                input_ids[:1], num_beams=2, max_length=5, do_sample=True, use_cache=False
            )
            torch.manual_seed(0)
            output_with_past_cache = model.generate(input_ids[:1], num_beams=2, max_length=5, do_sample=True)
            self.parent.assertTrue(torch.all(output_with_past_cache == output_without_past_cache))

        def create_and_check_t5_model_fp16_forward(
            self, config, input_ids, decoder_input_ids, attention_mask, decoder_attention_mask, lm_labels,
        ):
            model = T5Model(config=config)
            model.to(torch_device)
            model.half()
            model.eval()
            output = model(input_ids, decoder_input_ids=input_ids, attention_mask=attention_mask)[0]
            self.parent.assertFalse(torch.isnan(output).any().item())

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (
                config,
                input_ids,
                decoder_input_ids,
                attention_mask,
                decoder_attention_mask,
                lm_labels,
            ) = config_and_inputs

            inputs_dict = {
                "input_ids": input_ids,
                "attention_mask": attention_mask,
                "decoder_input_ids": decoder_input_ids,
                "decoder_attention_mask": decoder_attention_mask,
                "use_cache": False,
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = T5ModelTest.T5ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=T5Config, d_model=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_shift_right(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_prepare_lm_labels_via_shift_left(*config_and_inputs)

    def test_t5_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_model(*config_and_inputs)

    def test_with_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_with_lm_head(*config_and_inputs)

    def test_t5_decoder_model_past(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_decoder_model_past(*config_and_inputs)

    def test_t5_decoder_model_past_with_attn_mask(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_decoder_model_attention_mask_past(*config_and_inputs)

    def test_t5_generate_with_past_key_value_states(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_t5_and_check_t5_generate_with_past_key_value_states(*config_and_inputs)

    @unittest.skipIf(torch_device == "cpu", "Cant do half precision")
    def test_t5_model_fp16_forward(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_t5_model_fp16_forward(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in list(T5_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = T5Model.from_pretrained(model_name)
            self.assertIsNotNone(model)
Example #13
0
class CanineModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        (
            CanineModel,
            CanineForMultipleChoice,
            CanineForQuestionAnswering,
            CanineForSequenceClassification,
            CanineForTokenClassification,
        )
        if is_torch_available()
        else ()
    )

    test_torchscript = False
    test_mismatched_shapes = False
    test_resize_embeddings = False
    test_pruning = False

    def setUp(self):
        self.model_tester = CanineModelTester(self)
        # we set has_text_modality to False as the config has no vocab_size attribute
        self.config_tester = ConfigTester(self, config_class=CanineConfig, has_text_modality=False, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(*config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(*config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(*config_and_inputs)

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.hidden_states
            # expected_num_layers equals num_hidden_layers of the deep encoder + 1, + 2 for the first shallow encoder, + 2
            # for the final shallow encoder
            expected_num_layers = self.model_tester.num_hidden_layers + 1 + 2 + 2
            self.assertEqual(len(hidden_states), expected_num_layers)

            seq_length = self.model_tester.seq_length
            for i in range(expected_num_layers):
                if (i < 2) or ((expected_num_layers - i) < 3):
                    # the expected length of the hidden_states of the first and final shallow encoders
                    # is equal to the seq_length
                    self.assertListEqual(
                        list(hidden_states[i].shape[-2:]),
                        [seq_length, self.model_tester.hidden_size],
                    )
                else:
                    # the expected length of the hidden_states of the deep encoder need to be updated
                    # for CANINE since the seq length is downsampled
                    self.assertListEqual(
                        list(hidden_states[i].shape[-2:]),
                        [seq_length // config.downsampling_rate, self.model_tester.hidden_size],
                    )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.return_dict = True

        seq_len = getattr(self.model_tester, "seq_length", None)

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            # we add + 2 due to the 2 shallow encoders
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers + 2)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.attentions
            # we add + 2 due to the 2 shallow encoders
            self.assertEqual(len(attentions), self.model_tester.num_hidden_layers + 2)

            self.assertListEqual(
                list(attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, seq_len, seq_len],
            )
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(**self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            else:
                added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.attentions

            self.assertEqual(len(self_attentions), self.model_tester.num_hidden_layers + 2)
            self.assertListEqual(
                list(self_attentions[0].shape[-3:]),
                [self.model_tester.num_attention_heads, seq_len, seq_len],
            )

    def test_model_outputs_equivalence(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        def set_nan_tensor_to_zero(t):
            t[t != t] = 0
            return t

        def check_equivalence(model, tuple_inputs, dict_inputs, additional_kwargs={}):
            with torch.no_grad():
                tuple_output = model(**tuple_inputs, return_dict=False, **additional_kwargs)
                dict_output = model(**dict_inputs, return_dict=True, **additional_kwargs).to_tuple()

                def recursive_check(tuple_object, dict_object):
                    if isinstance(tuple_object, (List, Tuple)):
                        for tuple_iterable_value, dict_iterable_value in zip(tuple_object, dict_object):
                            recursive_check(tuple_iterable_value, dict_iterable_value)
                    elif tuple_object is None:
                        return
                    else:
                        self.assertTrue(
                            torch.allclose(
                                set_nan_tensor_to_zero(tuple_object), set_nan_tensor_to_zero(dict_object), atol=1e-5
                            ),
                            msg=f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.",
                        )

                recursive_check(tuple_output, dict_output)

        for model_class in self.all_model_classes:
            print(model_class)
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs)

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            check_equivalence(model, tuple_inputs, dict_inputs)

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            check_equivalence(model, tuple_inputs, dict_inputs, {"output_hidden_states": True})

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            check_equivalence(model, tuple_inputs, dict_inputs, {"output_attentions": True})

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class, return_labels=True)
            check_equivalence(
                model, tuple_inputs, dict_inputs, {"output_hidden_states": True, "output_attentions": True}
            )

    def test_headmasking(self):
        if not self.test_head_masking:
            return

        global_rng.seed(42)
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        global_rng.seed()

        inputs_dict["output_attentions"] = True
        config.output_hidden_states = True
        configs_no_init = _config_zero_init(config)  # To be sure we have no Nan
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            model.to(torch_device)
            model.eval()

            # Prepare head_mask
            # Set require_grad after having prepared the tensor to avoid error (leaf variable has been moved into the graph interior)
            head_mask = torch.ones(
                self.model_tester.num_hidden_layers,
                self.model_tester.num_attention_heads,
                device=torch_device,
            )
            head_mask[0, 0] = 0
            head_mask[-1, :-1] = 0
            head_mask.requires_grad_(requires_grad=True)
            inputs = self._prepare_for_class(inputs_dict, model_class).copy()
            inputs["head_mask"] = head_mask

            outputs = model(**inputs, return_dict=True)

            # Test that we can get a gradient back for importance score computation
            output = sum(t.sum() for t in outputs[0])
            output = output.sum()
            output.backward()
            multihead_outputs = head_mask.grad

            self.assertIsNotNone(multihead_outputs)
            self.assertEqual(len(multihead_outputs), self.model_tester.num_hidden_layers)

            def check_attentions_validity(attentions):
                # Remove Nan
                for t in attentions:
                    self.assertLess(
                        torch.sum(torch.isnan(t)), t.numel() / 4
                    )  # Check we don't have more than 25% nans (arbitrary)
                attentions = [
                    t.masked_fill(torch.isnan(t), 0.0) for t in attentions
                ]  # remove them (the test is less complete)

                self.assertAlmostEqual(attentions[1][..., 0, :, :].flatten().sum().item(), 0.0)
                self.assertNotEqual(attentions[1][..., -1, :, :].flatten().sum().item(), 0.0)
                self.assertAlmostEqual(attentions[-2][..., -2, :, :].flatten().sum().item(), 0.0)
                self.assertNotEqual(attentions[-2][..., -1, :, :].flatten().sum().item(), 0.0)

            check_attentions_validity(outputs.attentions)

    @unittest.skip("CANINE does not have a get_input_embeddings() method.")
    def test_inputs_embeds(self):
        # ViT does not use inputs_embeds
        pass

    @unittest.skip("CANINE does not have a get_input_embeddings() method.")
    def test_model_common_attributes(self):
        pass

    @slow
    def test_model_from_pretrained(self):
        for model_name in CANINE_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = CanineModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
Example #14
0
class LxmertModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        LxmertModel, LxmertForPreTraining,
        LxmertForQuestionAnswering) if is_torch_available() else ()

    test_head_masking = False
    test_pruning = False
    test_torchscript = False

    # overwrite function because qa models takes different input label shape
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = copy.deepcopy(inputs_dict)

        if return_labels:
            if model_class in get_values(MODEL_FOR_QUESTION_ANSWERING_MAPPING):
                inputs_dict["labels"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
            elif model_class in get_values(MODEL_FOR_PRETRAINING_MAPPING):
                # special case for models like BERT that use multi-loss training for PreTraining
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def setUp(self):
        self.model_tester = LxmertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=LxmertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_lxmert_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lxmert_model(*config_and_inputs)

    def test_lxmert_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lxmert_for_question_answering(
            *config_and_inputs)

    def test_lxmert_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_lxmert_for_pretraining(
            *config_and_inputs)

    def test_lxmert_question_answering_labels_resize(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.resize_lxmert_num_qa_labels(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in LXMERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = LxmertModel.from_pretrained(model_name)
            model.to(torch_device)
            self.assertIsNotNone(model)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        seq_len = getattr(self.model_tester, "seq_length", None)
        encoder_seq_length = getattr(self.model_tester, "encoder_seq_length",
                                     seq_len)
        encoder_key_length = getattr(self.model_tester, "key_length",
                                     encoder_seq_length)
        chunk_length = getattr(self.model_tester, "chunk_length", None)
        if chunk_length is not None and hasattr(self.model_tester,
                                                "num_hashes"):
            encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            language_attentions, vision_attentions, cross_encoder_attentions = (
                outputs[-3], outputs[-2], outputs[-1])

            self.assertEqual(len(language_attentions),
                             self.model_tester.num_hidden_layers["language"])
            self.assertEqual(len(vision_attentions),
                             self.model_tester.num_hidden_layers["vision"])
            self.assertEqual(
                len(cross_encoder_attentions),
                self.model_tester.num_hidden_layers["cross_encoder"])

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            language_attentions, vision_attentions, cross_encoder_attentions = (
                outputs[-3], outputs[-2], outputs[-1])
            self.assertEqual(len(language_attentions),
                             self.model_tester.num_hidden_layers["language"])
            self.assertEqual(len(vision_attentions),
                             self.model_tester.num_hidden_layers["vision"])
            self.assertEqual(
                len(cross_encoder_attentions),
                self.model_tester.num_hidden_layers["cross_encoder"])

            attentions = [
                language_attentions, vision_attentions,
                cross_encoder_attentions
            ]
            attention_shapes = [
                [
                    self.model_tester.num_attention_heads, encoder_seq_length,
                    encoder_key_length
                ],
                [
                    self.model_tester.num_attention_heads,
                    self.model_tester.num_visual_features,
                    self.model_tester.num_visual_features,
                ],
                [
                    self.model_tester.num_attention_heads, encoder_key_length,
                    self.model_tester.num_visual_features
                ],
            ]

            for attention, attention_shape in zip(attentions,
                                                  attention_shapes):
                self.assertListEqual(list(attention[0].shape[-3:]),
                                     attention_shape)
            out_len = len(outputs)

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            # 2 hidden states were added
            self.assertEqual(out_len + 2, len(outputs))

            language_attentions, vision_attentions, cross_encoder_attentions = (
                outputs[-3], outputs[-2], outputs[-1])
            self.assertEqual(len(language_attentions),
                             self.model_tester.num_hidden_layers["language"])
            self.assertEqual(len(vision_attentions),
                             self.model_tester.num_hidden_layers["vision"])
            self.assertEqual(
                len(cross_encoder_attentions),
                self.model_tester.num_hidden_layers["cross_encoder"])

            attentions = [
                language_attentions, vision_attentions,
                cross_encoder_attentions
            ]
            attention_shapes = [
                [
                    self.model_tester.num_attention_heads, encoder_seq_length,
                    encoder_key_length
                ],
                [
                    self.model_tester.num_attention_heads,
                    self.model_tester.num_visual_features,
                    self.model_tester.num_visual_features,
                ],
                [
                    self.model_tester.num_attention_heads, encoder_key_length,
                    self.model_tester.num_visual_features
                ],
            ]

            for attention, attention_shape in zip(attentions,
                                                  attention_shapes):
                self.assertListEqual(list(attention[0].shape[-3:]),
                                     attention_shape)

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            language_hidden_states, vision_hidden_states = outputs[
                -2], outputs[-1]

            self.assertEqual(
                len(language_hidden_states),
                self.model_tester.num_hidden_layers["language"] + 1)
            self.assertEqual(len(vision_hidden_states),
                             self.model_tester.num_hidden_layers["vision"] + 1)

            seq_length = self.model_tester.seq_length
            num_visual_features = self.model_tester.num_visual_features

            self.assertListEqual(
                list(language_hidden_states[0].shape[-2:]),
                [seq_length, self.model_tester.hidden_size],
            )
            self.assertListEqual(
                list(vision_hidden_states[0].shape[-2:]),
                [num_visual_features, self.model_tester.hidden_size],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_retain_grad_hidden_states_attentions(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.output_hidden_states = True
        config.output_attentions = True

        # no need to test all models as different heads yield the same functionality
        model_class = self.all_model_classes[0]
        model = model_class(config)
        model.to(torch_device)

        inputs = self._prepare_for_class(inputs_dict, model_class)

        outputs = model(**inputs)

        hidden_states_lang = outputs.language_hidden_states[0]
        attentions_lang = outputs.language_attentions[0]

        hidden_states_vision = outputs.vision_hidden_states[0]
        attentions_vision = outputs.vision_attentions[0]

        hidden_states_lang.retain_grad()
        attentions_lang.retain_grad()
        hidden_states_vision.retain_grad()
        attentions_vision.retain_grad()

        outputs.language_output.flatten()[0].backward(retain_graph=True)
        outputs.vision_output.flatten()[0].backward(retain_graph=True)

        self.assertIsNotNone(hidden_states_lang.grad)
        self.assertIsNotNone(attentions_vision.grad)
        self.assertIsNotNone(hidden_states_vision.grad)
        self.assertIsNotNone(attentions_vision.grad)

    def prepare_tf_inputs_from_pt_inputs(self, pt_inputs_dict):

        tf_inputs_dict = {}
        for key, value in pt_inputs_dict.items():
            # skip key that does not exist in tf
            if isinstance(value, dict):
                tf_inputs_dict[key] = self.prepare_pt_inputs_from_tf_inputs(
                    value)
            elif isinstance(value, (list, tuple)):
                tf_inputs_dict[key] = (
                    self.prepare_pt_inputs_from_tf_inputs(iter_value)
                    for iter_value in value)
            elif type(value) == bool:
                tf_inputs_dict[key] = value
            elif key == "input_values":
                tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(),
                                                           dtype=tf.float32)
            elif key == "pixel_values":
                tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(),
                                                           dtype=tf.float32)
            elif key == "input_features":
                tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(),
                                                           dtype=tf.float32)
            # other general float inputs
            elif value.is_floating_point():
                tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(),
                                                           dtype=tf.float32)
            else:
                tf_inputs_dict[key] = tf.convert_to_tensor(value.cpu().numpy(),
                                                           dtype=tf.int32)

        return tf_inputs_dict
Example #15
0
class PoolFormerModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        PoolFormerModel,
        PoolFormerForImageClassification) if is_torch_available() else ()

    test_head_masking = False
    test_pruning = False
    test_resize_embeddings = False
    test_torchscript = False

    def setUp(self):
        self.model_tester = PoolFormerModelTester(self)
        self.config_tester = PoolFormerConfigTester(
            self, config_class=PoolFormerConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    @unittest.skip("PoolFormer does not use inputs_embeds")
    def test_inputs_embeds(self):
        pass

    @unittest.skip(
        "PoolFormer does not have get_input_embeddings method and get_output_embeddings methods"
    )
    def test_model_common_attributes(self):
        pass

    def test_retain_grad_hidden_states_attentions(self):
        # Since poolformer doesn't use Attention
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.output_hidden_states = True

        # no need to test all models as different heads yield the same functionality
        model_class = self.all_model_classes[0]
        model = model_class(config)
        model.to(torch_device)

        inputs = self._prepare_for_class(inputs_dict, model_class)

        outputs = model(**inputs)

        output = outputs[0]

        hidden_states = outputs.hidden_states[0]

        hidden_states.retain_grad()

        output.flatten()[0].backward(retain_graph=True)

        self.assertIsNotNone(hidden_states.grad)

    def test_model_outputs_equivalence(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        def set_nan_tensor_to_zero(t):
            t[t != t] = 0
            return t

        def check_equivalence(model,
                              tuple_inputs,
                              dict_inputs,
                              additional_kwargs={}):
            with torch.no_grad():
                tuple_output = model(**tuple_inputs,
                                     return_dict=False,
                                     **additional_kwargs)
                dict_output = model(**dict_inputs,
                                    return_dict=True,
                                    **additional_kwargs).to_tuple()

                def recursive_check(tuple_object, dict_object):
                    if isinstance(tuple_object, (List, Tuple)):
                        for tuple_iterable_value, dict_iterable_value in zip(
                                tuple_object, dict_object):
                            recursive_check(tuple_iterable_value,
                                            dict_iterable_value)
                    elif isinstance(tuple_object, Dict):
                        for tuple_iterable_value, dict_iterable_value in zip(
                                tuple_object.values(), dict_object.values()):
                            recursive_check(tuple_iterable_value,
                                            dict_iterable_value)
                    elif tuple_object is None:
                        return
                    else:
                        self.assertTrue(
                            torch.allclose(
                                set_nan_tensor_to_zero(tuple_object),
                                set_nan_tensor_to_zero(dict_object),
                                atol=1e-5),
                            msg=
                            f"Tuple and dict output are not equal. Difference: {torch.max(torch.abs(tuple_object - dict_object))}. Tuple has `nan`: {torch.isnan(tuple_object).any()} and `inf`: {torch.isinf(tuple_object)}. Dict has `nan`: {torch.isnan(dict_object).any()} and `inf`: {torch.isinf(dict_object)}.",
                        )

                recursive_check(tuple_output, dict_output)

        for model_class in self.all_model_classes:
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs)

            tuple_inputs = self._prepare_for_class(inputs_dict,
                                                   model_class,
                                                   return_labels=True)
            dict_inputs = self._prepare_for_class(inputs_dict,
                                                  model_class,
                                                  return_labels=True)
            check_equivalence(model, tuple_inputs, dict_inputs)

            tuple_inputs = self._prepare_for_class(inputs_dict, model_class)
            dict_inputs = self._prepare_for_class(inputs_dict, model_class)
            check_equivalence(model, tuple_inputs, dict_inputs,
                              {"output_hidden_states": True})

            tuple_inputs = self._prepare_for_class(inputs_dict,
                                                   model_class,
                                                   return_labels=True)
            dict_inputs = self._prepare_for_class(inputs_dict,
                                                  model_class,
                                                  return_labels=True)
            check_equivalence(model, tuple_inputs, dict_inputs,
                              {"output_hidden_states": True})

    def test_forward_signature(self):
        config, _ = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            model = model_class(config)
            signature = inspect.signature(model.forward)
            # signature.parameters is an OrderedDict => so arg_names order is deterministic
            arg_names = [*signature.parameters.keys()]

            expected_arg_names = ["pixel_values"]
            self.assertListEqual(arg_names[:1], expected_arg_names)

    @unittest.skip("PoolFormer does not have attention")
    def test_attention_outputs(self):
        pass

    def test_hidden_states_output(self):
        def check_hidden_states_output(inputs_dict, config, model_class):
            model = model_class(config)
            model.to(torch_device)
            model.eval()

            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            hidden_states = outputs.hidden_states

            expected_num_layers = self.model_tester.num_encoder_blocks
            self.assertEqual(len(hidden_states), expected_num_layers)

            # verify the first hidden states (first block)
            self.assertListEqual(
                list(hidden_states[0].shape[-3:]),
                [
                    self.model_tester.hidden_sizes[0],
                    self.model_tester.image_size // 4,
                    self.model_tester.image_size // 4,
                ],
            )

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            inputs_dict["output_hidden_states"] = True
            check_hidden_states_output(inputs_dict, config, model_class)

            # check that output_hidden_states also work using config
            del inputs_dict["output_hidden_states"]
            config.output_hidden_states = True

            check_hidden_states_output(inputs_dict, config, model_class)

    def test_training(self):
        if not self.model_tester.is_training:
            return

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        for model_class in self.all_model_classes:
            if model_class in get_values(MODEL_MAPPING):
                continue
            model = model_class(config)
            model.to(torch_device)
            model.train()
            inputs = self._prepare_for_class(inputs_dict,
                                             model_class,
                                             return_labels=True)
            loss = model(**inputs).loss
            loss.backward()

    @slow
    def test_model_from_pretrained(self):
        for model_name in POOLFORMER_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = PoolFormerModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
Example #16
0
class AlbertModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        AlbertModel,
        AlbertForPreTraining,
        AlbertForMaskedLM,
        AlbertForMultipleChoice,
        AlbertForSequenceClassification,
        AlbertForTokenClassification,
        AlbertForQuestionAnswering,
    ) if is_torch_available() else ())

    # special case for ForPreTraining model
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)

        if return_labels:
            if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["sentence_order_label"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def setUp(self):
        self.model_tester = AlbertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=AlbertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_pretraining(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_model_various_embeddings(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        for type in ["absolute", "relative_key", "relative_key_query"]:
            config_and_inputs[0].position_embedding_type = type
            self.model_tester.create_and_check_model(*config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in ALBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = AlbertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)
Example #17
0
class ConvBertModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        ConvBertModel,
        ConvBertForMaskedLM,
        ConvBertForMultipleChoice,
        ConvBertForQuestionAnswering,
        ConvBertForSequenceClassification,
        ConvBertForTokenClassification,
    ) if is_torch_available() else ())
    test_pruning = False
    test_head_masking = False

    def setUp(self):
        self.model_tester = ConvBertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=ConvBertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in CONVBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = ConvBertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)

    def test_attention_outputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.return_dict = True

        seq_len = getattr(self.model_tester, "seq_length", None)
        decoder_seq_length = getattr(self.model_tester, "decoder_seq_length",
                                     seq_len)
        encoder_seq_length = getattr(self.model_tester, "encoder_seq_length",
                                     seq_len)
        decoder_key_length = getattr(self.model_tester, "decoder_key_length",
                                     decoder_seq_length)
        encoder_key_length = getattr(self.model_tester, "key_length",
                                     encoder_seq_length)
        chunk_length = getattr(self.model_tester, "chunk_length", None)
        if chunk_length is not None and hasattr(self.model_tester,
                                                "num_hashes"):
            encoder_seq_length = encoder_seq_length * self.model_tester.num_hashes

        for model_class in self.all_model_classes:
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = False
            config.return_dict = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            # check that output_attentions also work using config
            del inputs_dict["output_attentions"]
            config.output_attentions = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))
            attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions
            self.assertEqual(len(attentions),
                             self.model_tester.num_hidden_layers)

            if chunk_length is not None:
                self.assertListEqual(
                    list(attentions[0].shape[-4:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, chunk_length, encoder_key_length
                    ],
                )
            else:
                self.assertListEqual(
                    list(attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, encoder_key_length
                    ],
                )
            out_len = len(outputs)

            if self.is_encoder_decoder:
                correct_outlen = 5

                # loss is at first position
                if "labels" in inputs_dict:
                    correct_outlen += 1  # loss is added to beginning
                # Question Answering model returns start_logits and end_logits
                if model_class in get_values(
                        MODEL_FOR_QUESTION_ANSWERING_MAPPING):
                    correct_outlen += 1  # start_logits and end_logits instead of only 1 output
                if "past_key_values" in outputs:
                    correct_outlen += 1  # past_key_values have been returned

                self.assertEqual(out_len, correct_outlen)

                # decoder attentions
                decoder_attentions = outputs.decoder_attentions
                self.assertIsInstance(decoder_attentions, (list, tuple))
                self.assertEqual(len(decoder_attentions),
                                 self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(decoder_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads,
                        decoder_seq_length, decoder_key_length
                    ],
                )

                # cross attentions
                cross_attentions = outputs.cross_attentions
                self.assertIsInstance(cross_attentions, (list, tuple))
                self.assertEqual(len(cross_attentions),
                                 self.model_tester.num_hidden_layers)
                self.assertListEqual(
                    list(cross_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads,
                        decoder_seq_length,
                        encoder_key_length,
                    ],
                )

            # Check attention is always last and order is fine
            inputs_dict["output_attentions"] = True
            inputs_dict["output_hidden_states"] = True
            model = model_class(config)
            model.to(torch_device)
            model.eval()
            with torch.no_grad():
                outputs = model(
                    **self._prepare_for_class(inputs_dict, model_class))

            if hasattr(self.model_tester, "num_hidden_states_types"):
                added_hidden_states = self.model_tester.num_hidden_states_types
            elif self.is_encoder_decoder:
                added_hidden_states = 2
            else:
                added_hidden_states = 1
            self.assertEqual(out_len + added_hidden_states, len(outputs))

            self_attentions = outputs.encoder_attentions if config.is_encoder_decoder else outputs.attentions

            self.assertEqual(len(self_attentions),
                             self.model_tester.num_hidden_layers)
            if chunk_length is not None:
                self.assertListEqual(
                    list(self_attentions[0].shape[-4:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, chunk_length, encoder_key_length
                    ],
                )
            else:
                self.assertListEqual(
                    list(self_attentions[0].shape[-3:]),
                    [
                        self.model_tester.num_attention_heads / 2,
                        encoder_seq_length, encoder_key_length
                    ],
                )

    @slow
    @require_torch_gpu
    def test_torchscript_device_change(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        for model_class in self.all_model_classes:

            # ConvBertForMultipleChoice behaves incorrectly in JIT environments.
            if model_class == ConvBertForMultipleChoice:
                return

            config.torchscript = True
            model = model_class(config=config)

            inputs_dict = self._prepare_for_class(inputs_dict, model_class)
            traced_model = torch.jit.trace(
                model, (inputs_dict["input_ids"].to("cpu"),
                        inputs_dict["attention_mask"].to("cpu")))

            with tempfile.TemporaryDirectory() as tmp:
                torch.jit.save(traced_model,
                               os.path.join(tmp, "traced_model.pt"))
                loaded = torch.jit.load(os.path.join(tmp, "traced_model.pt"),
                                        map_location=torch_device)
                loaded(inputs_dict["input_ids"].to(torch_device),
                       inputs_dict["attention_mask"].to(torch_device))
class QDQBertModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        QDQBertModel,
        QDQBertForMaskedLM,
        QDQBertForMultipleChoice,
        QDQBertForNextSentencePrediction,
        QDQBertForQuestionAnswering,
        QDQBertForSequenceClassification,
        QDQBertForTokenClassification,
        QDQBertLMHeadModel,
    ) if is_torch_available() else ())
    all_generative_model_classes = (
        QDQBertLMHeadModel, ) if is_torch_available() else ()

    def setUp(self):
        self.model_tester = QDQBertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=QDQBertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_various_embeddings(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        for type in ["absolute", "relative_key", "relative_key_query"]:
            config_and_inputs[0].position_embedding_type = type
            self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_as_decoder(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            encoder_hidden_states,
            encoder_attention_mask,
        ) = self.model_tester.prepare_config_and_inputs_for_decoder()

        input_mask = None

        self.model_tester.create_and_check_model_as_decoder(
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            encoder_hidden_states,
            encoder_attention_mask,
        )

    def test_for_causal_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_for_causal_lm(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_causal_lm_decoder(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_model_for_causal_lm_as_decoder(
            *config_and_inputs)

    def test_decoder_model_past_with_large_inputs(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_decoder_model_past_large_inputs(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    def test_for_next_sequence_prediction(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_next_sequence_prediction(
            *config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in QDQBERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = QDQBertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)

    # Override
    def test_feed_forward_chunking(self):
        # feed forward chunking is not supported in QDQBert
        pass
Example #19
0
class LongformerModelTest(ModelTesterMixin, unittest.TestCase):
    test_pruning = False  # pruning is not supported
    test_torchscript = False

    all_model_classes = ((
        LongformerModel,
        LongformerForMaskedLM,
        LongformerForSequenceClassification,
        LongformerForQuestionAnswering,
        LongformerForTokenClassification,
        LongformerForMultipleChoice,
    ) if is_torch_available() else ())

    def setUp(self):
        self.model_tester = LongformerModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=LongformerConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_attention_mask_determinism(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_attention_mask_determinism(
            *config_and_inputs)

    def test_model_global_attention_mask(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model_with_global_attention_mask(
            *config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_question_answering(
        )
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    def test_retain_grad_hidden_states_attentions(self):
        # longformer cannot keep gradients in attentions or hidden states
        return
Example #20
0
class XLMModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = ((
        XLMModel,
        XLMWithLMHeadModel,
        XLMForQuestionAnswering,
        XLMForSequenceClassification,
        XLMForQuestionAnsweringSimple,
    ) if is_torch_available() else ())
    all_generative_model_classes = (
        (XLMWithLMHeadModel, ) if is_torch_available() else ()
    )  # TODO (PVP): Check other models whether language generation is also applicable

    class XLMModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            is_training=True,
            use_input_lengths=True,
            use_token_type_ids=True,
            use_labels=True,
            gelu_activation=True,
            sinusoidal_embeddings=False,
            causal=False,
            asm=False,
            n_langs=2,
            vocab_size=99,
            n_special=0,
            hidden_size=32,
            num_hidden_layers=5,
            num_attention_heads=4,
            hidden_dropout_prob=0.1,
            attention_probs_dropout_prob=0.1,
            max_position_embeddings=512,
            type_vocab_size=16,
            type_sequence_label_size=2,
            initializer_range=0.02,
            num_labels=3,
            num_choices=4,
            summary_type="last",
            use_proj=True,
            scope=None,
            bos_token_id=0,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.is_training = is_training
            self.use_input_lengths = use_input_lengths
            self.use_token_type_ids = use_token_type_ids
            self.use_labels = use_labels
            self.gelu_activation = gelu_activation
            self.sinusoidal_embeddings = sinusoidal_embeddings
            self.asm = asm
            self.n_langs = n_langs
            self.vocab_size = vocab_size
            self.n_special = n_special
            self.summary_type = summary_type
            self.causal = causal
            self.use_proj = use_proj
            self.hidden_size = hidden_size
            self.num_hidden_layers = num_hidden_layers
            self.num_attention_heads = num_attention_heads
            self.hidden_dropout_prob = hidden_dropout_prob
            self.attention_probs_dropout_prob = attention_probs_dropout_prob
            self.max_position_embeddings = max_position_embeddings
            self.n_langs = n_langs
            self.type_sequence_label_size = type_sequence_label_size
            self.initializer_range = initializer_range
            self.summary_type = summary_type
            self.num_labels = num_labels
            self.num_choices = num_choices
            self.scope = scope
            self.bos_token_id = bos_token_id

        def prepare_config_and_inputs(self):
            input_ids = ids_tensor([self.batch_size, self.seq_length],
                                   self.vocab_size)
            input_mask = ids_tensor([self.batch_size, self.seq_length],
                                    2).float()

            input_lengths = None
            if self.use_input_lengths:
                input_lengths = (ids_tensor([self.batch_size], vocab_size=2) +
                                 self.seq_length - 2
                                 )  # small variation of seq_length

            token_type_ids = None
            if self.use_token_type_ids:
                token_type_ids = ids_tensor([self.batch_size, self.seq_length],
                                            self.n_langs)

            sequence_labels = None
            token_labels = None
            is_impossible_labels = None
            if self.use_labels:
                sequence_labels = ids_tensor([self.batch_size],
                                             self.type_sequence_label_size)
                token_labels = ids_tensor([self.batch_size, self.seq_length],
                                          self.num_labels)
                is_impossible_labels = ids_tensor([self.batch_size], 2).float()

            config = XLMConfig(
                vocab_size=self.vocab_size,
                n_special=self.n_special,
                emb_dim=self.hidden_size,
                n_layers=self.num_hidden_layers,
                n_heads=self.num_attention_heads,
                dropout=self.hidden_dropout_prob,
                attention_dropout=self.attention_probs_dropout_prob,
                gelu_activation=self.gelu_activation,
                sinusoidal_embeddings=self.sinusoidal_embeddings,
                asm=self.asm,
                causal=self.causal,
                n_langs=self.n_langs,
                max_position_embeddings=self.max_position_embeddings,
                initializer_range=self.initializer_range,
                summary_type=self.summary_type,
                use_proj=self.use_proj,
                bos_token_id=self.bos_token_id,
            )

            return (
                config,
                input_ids,
                token_type_ids,
                input_lengths,
                sequence_labels,
                token_labels,
                is_impossible_labels,
                input_mask,
            )

        def check_loss_output(self, result):
            self.parent.assertListEqual(list(result["loss"].size()), [])

        def create_and_check_xlm_model(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = XLMModel(config=config)
            model.to(torch_device)
            model.eval()
            outputs = model(input_ids,
                            lengths=input_lengths,
                            langs=token_type_ids)
            outputs = model(input_ids, langs=token_type_ids)
            outputs = model(input_ids)
            sequence_output = outputs[0]
            result = {
                "sequence_output": sequence_output,
            }
            self.parent.assertListEqual(
                list(result["sequence_output"].size()),
                [self.batch_size, self.seq_length, self.hidden_size])

        def create_and_check_xlm_lm_head(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = XLMWithLMHeadModel(config)
            model.to(torch_device)
            model.eval()

            loss, logits = model(input_ids,
                                 token_type_ids=token_type_ids,
                                 labels=token_labels)

            result = {
                "loss": loss,
                "logits": logits,
            }

            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["logits"].size()),
                [self.batch_size, self.seq_length, self.vocab_size])

        def create_and_check_xlm_simple_qa(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = XLMForQuestionAnsweringSimple(config)
            model.to(torch_device)
            model.eval()

            outputs = model(input_ids)

            outputs = model(input_ids,
                            start_positions=sequence_labels,
                            end_positions=sequence_labels)
            loss, start_logits, end_logits = outputs

            result = {
                "loss": loss,
                "start_logits": start_logits,
                "end_logits": end_logits,
            }
            self.parent.assertListEqual(list(result["start_logits"].size()),
                                        [self.batch_size, self.seq_length])
            self.parent.assertListEqual(list(result["end_logits"].size()),
                                        [self.batch_size, self.seq_length])
            self.check_loss_output(result)

        def create_and_check_xlm_qa(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = XLMForQuestionAnswering(config)
            model.to(torch_device)
            model.eval()

            outputs = model(input_ids)
            start_top_log_probs, start_top_index, end_top_log_probs, end_top_index, cls_logits = outputs

            outputs = model(
                input_ids,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
                cls_index=sequence_labels,
                is_impossible=is_impossible_labels,
                p_mask=input_mask,
            )

            outputs = model(
                input_ids,
                start_positions=sequence_labels,
                end_positions=sequence_labels,
                cls_index=sequence_labels,
                is_impossible=is_impossible_labels,
            )

            (total_loss, ) = outputs

            outputs = model(input_ids,
                            start_positions=sequence_labels,
                            end_positions=sequence_labels)

            (total_loss, ) = outputs

            result = {
                "loss": total_loss,
                "start_top_log_probs": start_top_log_probs,
                "start_top_index": start_top_index,
                "end_top_log_probs": end_top_log_probs,
                "end_top_index": end_top_index,
                "cls_logits": cls_logits,
            }

            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["start_top_log_probs"].size()),
                [self.batch_size, model.config.start_n_top])
            self.parent.assertListEqual(
                list(result["start_top_index"].size()),
                [self.batch_size, model.config.start_n_top])
            self.parent.assertListEqual(
                list(result["end_top_log_probs"].size()),
                [
                    self.batch_size,
                    model.config.start_n_top * model.config.end_n_top
                ],
            )
            self.parent.assertListEqual(
                list(result["end_top_index"].size()),
                [
                    self.batch_size,
                    model.config.start_n_top * model.config.end_n_top
                ],
            )
            self.parent.assertListEqual(list(result["cls_logits"].size()),
                                        [self.batch_size])

        def create_and_check_xlm_sequence_classif(
            self,
            config,
            input_ids,
            token_type_ids,
            input_lengths,
            sequence_labels,
            token_labels,
            is_impossible_labels,
            input_mask,
        ):
            model = XLMForSequenceClassification(config)
            model.to(torch_device)
            model.eval()

            (logits, ) = model(input_ids)
            loss, logits = model(input_ids, labels=sequence_labels)

            result = {
                "loss": loss,
                "logits": logits,
            }

            self.parent.assertListEqual(list(result["loss"].size()), [])
            self.parent.assertListEqual(
                list(result["logits"].size()),
                [self.batch_size, self.type_sequence_label_size])

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (
                config,
                input_ids,
                token_type_ids,
                input_lengths,
                sequence_labels,
                token_labels,
                is_impossible_labels,
                input_mask,
            ) = config_and_inputs
            inputs_dict = {
                "input_ids": input_ids,
                "token_type_ids": token_type_ids,
                "lengths": input_lengths
            }
            return config, inputs_dict

    def setUp(self):
        self.model_tester = XLMModelTest.XLMModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=XLMConfig,
                                          emb_dim=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_xlm_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_model(*config_and_inputs)

    def test_xlm_lm_head(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_lm_head(*config_and_inputs)

    def test_xlm_simple_qa(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_simple_qa(*config_and_inputs)

    def test_xlm_qa(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_qa(*config_and_inputs)

    def test_xlm_sequence_classif(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_xlm_sequence_classif(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in list(XLM_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = XLMModel.from_pretrained(model_name, cache_dir=CACHE_DIR)
            self.assertIsNotNone(model)
Example #21
0
class BARTModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (
        BartModel, BartForMaskedLM,
        BartForSequenceClassification) if is_torch_available() else ()
    is_encoder_decoder = True
    # TODO(SS): fix the below in a separate PR
    test_pruning = False
    test_torchscript = False
    test_head_masking = False
    test_resize_embeddings = False  # This requires inputs_dict['input_ids']

    def setUp(self):
        self.model_tester = ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=BartConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_advanced_inputs(self):
        # (config, input_ids, token_type_ids, input_mask, *unused) = \
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        decoder_input_ids, decoder_attn_mask = _prepare_bart_decoder_inputs(
            config, inputs_dict["input_ids"])
        model = BartModel(config)
        model.to(torch_device)
        model.eval()
        # test init
        self.assertTrue(
            (model.encoder.embed_tokens.weight == model.shared.weight
             ).all().item())

        def _check_var(module):
            """Check that we initialized various parameters from N(0, config.init_std)."""
            self.assertAlmostEqual(
                torch.std(module.weight).item(), config.init_std, 2)

        _check_var(model.encoder.embed_tokens)
        _check_var(model.encoder.layers[0].self_attn.k_proj)
        _check_var(model.encoder.layers[0].fc1)
        _check_var(model.encoder.embed_positions)

        decoder_features_with_created_mask = model.forward(**inputs_dict)[0]
        decoder_features_with_passed_mask = model.forward(
            decoder_attention_mask=decoder_attn_mask,
            decoder_input_ids=decoder_input_ids,
            **inputs_dict)[0]
        _assert_tensors_equal(decoder_features_with_passed_mask,
                              decoder_features_with_created_mask)
        useless_mask = torch.zeros_like(decoder_attn_mask)
        decoder_features = model.forward(decoder_attention_mask=useless_mask,
                                         **inputs_dict)[0]
        self.assertTrue(isinstance(
            decoder_features, torch.Tensor))  # no hidden states or attentions
        self.assertEqual(decoder_features.size(),
                         (self.model_tester.batch_size,
                          self.model_tester.seq_length, config.d_model))
        if decoder_attn_mask.min().item() < -1e3:  # some tokens were masked
            self.assertFalse(
                (decoder_features_with_created_mask == decoder_features
                 ).all().item())

        # Test different encoder attention masks
        decoder_features_with_long_encoder_mask = model.forward(
            inputs_dict["input_ids"],
            attention_mask=inputs_dict["attention_mask"].long())[0]
        _assert_tensors_equal(decoder_features_with_long_encoder_mask,
                              decoder_features_with_created_mask)

    def test_save_load_strict(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        for model_class in self.all_model_classes:
            model = model_class(config)

            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname)
                model2, info = model_class.from_pretrained(
                    tmpdirname, output_loading_info=True)
            self.assertEqual(info["missing_keys"], [])

    @unittest.skip("Passing inputs_embeds not implemented for Bart.")
    def test_inputs_embeds(self):
        pass
class FSMTModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (
        FSMTModel,
        FSMTForConditionalGeneration) if is_torch_available() else ()
    all_generative_model_classes = (
        FSMTForConditionalGeneration, ) if is_torch_available() else ()
    is_encoder_decoder = True
    # TODO(SS): fix the below in a separate PR
    test_pruning = False
    test_torchscript = True
    test_head_masking = False
    test_resize_embeddings = True  # This requires inputs_dict['input_ids']
    test_missing_keys = False  # because FSMTForConditionalGeneration and FSMTModel now have identical state_dict

    def setUp(self):
        self.model_tester = ModelTester(self)
        self.langs = ["en", "ru"]
        config = {
            "langs": self.langs,
            "src_vocab_size": 10,
            "tgt_vocab_size": 20,
        }
        # XXX: hack to appease to all other models requiring `vocab_size`
        config["vocab_size"] = 99  # no such thing in FSMT
        self.config_tester = ConfigTester(self,
                                          config_class=FSMTConfig,
                                          **config)

    def test_config(self):
        self.config_tester.run_common_tests()

    # XXX: override test_model_common_attributes / different Embedding type
    def test_model_common_attributes(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        for model_class in self.all_model_classes:
            model = model_class(config)
            self.assertIsInstance(model.get_input_embeddings(),
                                  (torch.nn.Embedding))
            model.set_input_embeddings(torch.nn.Embedding(10, 10))
            x = model.get_output_embeddings()
            self.assertTrue(
                x is None or isinstance(x, torch.nn.modules.sparse.Embedding))

    def test_initialization_more(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        model = FSMTModel(config)
        model.to(torch_device)
        model.eval()

        # test init
        # self.assertTrue((model.encoder.embed_tokens.weight == model.shared.weight).all().item())

        def _check_var(module):
            """Check that we initialized various parameters from N(0, config.init_std)."""
            self.assertAlmostEqual(
                torch.std(module.weight).item(), config.init_std, 2)

        _check_var(model.encoder.embed_tokens)
        _check_var(model.encoder.layers[0].self_attn.k_proj)
        _check_var(model.encoder.layers[0].fc1)
        # XXX: different std for fairseq version of SinusoidalPositionalEmbedding
        # self.assertAlmostEqual(torch.std(model.encoder.embed_positions.weights).item(), config.init_std, 2)

    def test_advanced_inputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.use_cache = False
        inputs_dict["input_ids"][:, -2:] = config.pad_token_id
        decoder_input_ids, decoder_attn_mask, causal_mask = _prepare_fsmt_decoder_inputs(
            config, inputs_dict["input_ids"])
        model = FSMTModel(config).to(torch_device).eval()

        decoder_features_with_created_mask = model(**inputs_dict)[0]
        decoder_features_with_passed_mask = model(
            decoder_attention_mask=invert_mask(decoder_attn_mask),
            decoder_input_ids=decoder_input_ids,
            **inputs_dict)[0]
        _assert_tensors_equal(decoder_features_with_passed_mask,
                              decoder_features_with_created_mask)
        useless_mask = torch.zeros_like(decoder_attn_mask)
        decoder_features = model(decoder_attention_mask=useless_mask,
                                 **inputs_dict)[0]
        self.assertTrue(isinstance(
            decoder_features, torch.Tensor))  # no hidden states or attentions
        self.assertEqual(
            decoder_features.size(),
            (self.model_tester.batch_size, self.model_tester.seq_length,
             config.tgt_vocab_size),
        )
        if decoder_attn_mask.min().item() < -1e3:  # some tokens were masked
            self.assertFalse(
                (decoder_features_with_created_mask == decoder_features
                 ).all().item())

        # Test different encoder attention masks
        decoder_features_with_long_encoder_mask = model(
            inputs_dict["input_ids"],
            attention_mask=inputs_dict["attention_mask"].long())[0]
        _assert_tensors_equal(decoder_features_with_long_encoder_mask,
                              decoder_features_with_created_mask)

    def test_save_load_strict(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        for model_class in self.all_model_classes:
            model = model_class(config)

            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname)
                model2, info = model_class.from_pretrained(
                    tmpdirname, output_loading_info=True)
            self.assertEqual(info["missing_keys"], [])

    def test_save_load_no_save_keys(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        for model_class in self.all_model_classes:
            model = model_class(config)

            state_dict_no_save_keys = getattr(model, "state_dict_no_save_keys",
                                              None)
            if state_dict_no_save_keys is None:
                continue

            # check the keys are in the original state_dict
            for k in state_dict_no_save_keys:
                self.assertIn(k, model.state_dict())

            # check that certain keys didn't get saved with the model
            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname)
                output_model_file = os.path.join(tmpdirname, WEIGHTS_NAME)
                state_dict_saved = torch.load(output_model_file)
                for k in state_dict_no_save_keys:
                    self.assertNotIn(k, state_dict_saved)

    @unittest.skip("can't be implemented for FSMT due to dual vocab.")
    def test_resize_tokens_embeddings(self):
        pass

    @unittest.skip("Passing inputs_embeds not implemented for FSMT.")
    def test_inputs_embeds(self):
        pass

    @unittest.skip("model weights aren't tied in FSMT.")
    def test_tie_model_weights(self):
        pass
Example #23
0
    def test_pt_tf_model_equivalence(self):
        if not is_torch_available():
            return

        import torch
        import transformers

        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        for model_class in self.all_model_classes:
            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beggining
            pt_model_class = getattr(transformers, pt_model_class_name)

            config.output_hidden_states = True
            tf_model = model_class(config)
            pt_model = pt_model_class(config)

            # Check we can load pt model in tf and vice-versa with model => model functions
            tf_model = transformers.load_pytorch_model_in_tf2_model(tf_model, pt_model, tf_inputs=inputs_dict)
            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
            )
            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(inputs_dict, training=False)
            tf_hidden_states = tfo[0].numpy()
            pt_hidden_states = pto[0].numpy()

            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))

            pt_hidden_states[tf_nans] = 0
            tf_hidden_states[tf_nans] = 0
            pt_hidden_states[pt_nans] = 0
            tf_hidden_states[pt_nans] = 0

            max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
            # Debug info (remove when fixed)
            if max_diff >= 2e-2:
                print("===")
                print(model_class)
                print(config)
                print(inputs_dict)
                print(pt_inputs_dict)
            self.assertLessEqual(max_diff, 2e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long)) for name, key in inputs_dict.items()
            )
            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(inputs_dict)
            tfo = tfo[0].numpy()
            pto = pto[0].numpy()
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))

            pto[tf_nans] = 0
            tfo[tf_nans] = 0
            pto[pt_nans] = 0
            tfo[pt_nans] = 0

            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 2e-2)
class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (Wav2Vec2ForCTC, Wav2Vec2Model,
                         Wav2Vec2ForMaskedLM) if is_torch_available() else ()
    test_pruning = False
    test_headmasking = False
    test_torchscript = False

    def setUp(self):
        self.model_tester = Wav2Vec2ModelTester(self,
                                                conv_stride=(3, 3, 3),
                                                feat_extract_norm="layer",
                                                do_stable_layer_norm=True)
        self.config_tester = ConfigTester(self,
                                          config_class=Wav2Vec2Config,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_batched_inference(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_batch_inference(*config_and_inputs)

    def test_ctc_loss_inference(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_ctc_loss(*config_and_inputs)

    def test_train(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.check_training(*config_and_inputs)

    # Wav2Vec2 has no inputs_embeds
    def test_inputs_embeds(self):
        pass

    # `input_ids` is renamed to `input_values`
    def test_forward_signature(self):
        pass

    # Wav2Vec2 cannot resize token embeddings
    # since it has no tokens embeddings
    def test_resize_tokens_embeddings(self):
        pass

    # Wav2Vec2 has no inputs_embeds
    # and thus the `get_input_embeddings` fn
    # is not implemented
    def test_model_common_attributes(self):
        pass

    def test_retain_grad_hidden_states_attentions(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )
        config.output_hidden_states = True
        config.output_attentions = True

        # no need to test all models as different heads yield the same functionality
        model_class = self.all_model_classes[0]
        model = model_class(config)
        model.to(torch_device)

        # set layer drop to 0
        model.config.layerdrop = 0.0

        input_values = inputs_dict["input_values"]

        input_lengths = torch.tensor(
            [input_values.shape[1] for _ in range(input_values.shape[0])],
            dtype=torch.long,
            device=torch_device)
        output_lengths = model._get_feat_extract_output_lengths(input_lengths)

        labels = ids_tensor((input_values.shape[0], output_lengths[0] - 2),
                            self.model_tester.vocab_size)
        inputs_dict["attention_mask"] = torch.ones_like(
            inputs_dict["attention_mask"])
        inputs_dict["labels"] = labels

        outputs = model(**inputs_dict)

        output = outputs[0]

        # Encoder-/Decoder-only models
        hidden_states = outputs.hidden_states[0]
        attentions = outputs.attentions[0]

        hidden_states.retain_grad()
        attentions.retain_grad()

        output.flatten()[0].backward(retain_graph=True)

        self.assertIsNotNone(hidden_states.grad)
        self.assertIsNotNone(attentions.grad)

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
        )

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if param.requires_grad:
                    if "conv.weight" in name or "masked_spec_embed" in name:
                        self.assertTrue(
                            -1.0 <=
                            ((param.data.mean() * 1e9).round() / 1e9).item() <=
                            1.0,
                            msg=
                            f"Parameter {name} of model {model_class} seems not properly initialized",
                        )
                    else:
                        self.assertIn(
                            ((param.data.mean() * 1e9).round() / 1e9).item(),
                            [0.0, 1.0],
                            msg=
                            f"Parameter {name} of model {model_class} seems not properly initialized",
                        )

    # overwrite from test_modeling_common
    def _mock_init_weights(self, module):
        if hasattr(module, "weight") and module.weight is not None:
            module.weight.data.fill_(3)
        if hasattr(module, "weight_g") and module.weight is not None:
            module.weight_g.data.fill_(3)
        if hasattr(module, "bias") and module.bias is not None:
            module.bias.data.fill_(3)

    @slow
    def test_model_from_pretrained(self):
        model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
        self.assertIsNotNone(model)
class TransfoXLModelTest(ModelTesterMixin, unittest.TestCase):

    all_model_classes = (TransfoXLModel,
                         TransfoXLLMHeadModel) if is_torch_available() else ()
    all_generative_model_classes = (
        TransfoXLLMHeadModel, ) if is_torch_available() else ()
    test_pruning = False
    test_torchscript = False
    test_resize_embeddings = False

    class TransfoXLModelTester(object):
        def __init__(
            self,
            parent,
            batch_size=13,
            seq_length=7,
            mem_len=30,
            clamp_len=15,
            is_training=True,
            use_labels=True,
            vocab_size=99,
            cutoffs=[10, 50, 80],
            hidden_size=32,
            d_embed=32,
            num_attention_heads=4,
            d_head=8,
            d_inner=128,
            div_val=2,
            num_hidden_layers=5,
            scope=None,
            seed=1,
            eos_token_id=0,
        ):
            self.parent = parent
            self.batch_size = batch_size
            self.seq_length = seq_length
            self.mem_len = mem_len
            self.key_length = seq_length + mem_len
            self.clamp_len = clamp_len
            self.is_training = is_training
            self.use_labels = use_labels
            self.vocab_size = vocab_size
            self.cutoffs = cutoffs
            self.hidden_size = hidden_size
            self.d_embed = d_embed
            self.num_attention_heads = num_attention_heads
            self.d_head = d_head
            self.d_inner = d_inner
            self.div_val = div_val
            self.num_hidden_layers = num_hidden_layers
            self.scope = scope
            self.seed = seed
            self.eos_token_id = eos_token_id

        def prepare_config_and_inputs(self):
            input_ids_1 = ids_tensor([self.batch_size, self.seq_length],
                                     self.vocab_size)
            input_ids_2 = ids_tensor([self.batch_size, self.seq_length],
                                     self.vocab_size)

            lm_labels = None
            if self.use_labels:
                lm_labels = ids_tensor([self.batch_size, self.seq_length],
                                       self.vocab_size)

            config = TransfoXLConfig(
                vocab_size=self.vocab_size,
                mem_len=self.mem_len,
                clamp_len=self.clamp_len,
                cutoffs=self.cutoffs,
                d_model=self.hidden_size,
                d_embed=self.d_embed,
                n_head=self.num_attention_heads,
                d_head=self.d_head,
                d_inner=self.d_inner,
                div_val=self.div_val,
                n_layer=self.num_hidden_layers,
                eos_token_ids=self.eos_token_id,
            )

            return (config, input_ids_1, input_ids_2, lm_labels)

        def set_seed(self):
            random.seed(self.seed)
            torch.manual_seed(self.seed)

        def create_transfo_xl_model(self, config, input_ids_1, input_ids_2,
                                    lm_labels):
            model = TransfoXLModel(config)
            model.to(torch_device)
            model.eval()

            hidden_states_1, mems_1 = model(input_ids_1)
            hidden_states_2, mems_2 = model(input_ids_2, mems_1)
            outputs = {
                "hidden_states_1": hidden_states_1,
                "mems_1": mems_1,
                "hidden_states_2": hidden_states_2,
                "mems_2": mems_2,
            }
            return outputs

        def check_transfo_xl_model_output(self, result):
            self.parent.assertListEqual(
                list(result["hidden_states_1"].size()),
                [self.batch_size, self.seq_length, self.hidden_size],
            )
            self.parent.assertListEqual(
                list(result["hidden_states_2"].size()),
                [self.batch_size, self.seq_length, self.hidden_size],
            )
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
                [[self.mem_len, self.batch_size, self.hidden_size]] *
                self.num_hidden_layers,
            )
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_2"]),
                [[self.mem_len, self.batch_size, self.hidden_size]] *
                self.num_hidden_layers,
            )

        def create_transfo_xl_lm_head(self, config, input_ids_1, input_ids_2,
                                      lm_labels):
            model = TransfoXLLMHeadModel(config)
            model.to(torch_device)
            model.eval()

            lm_logits_1, mems_1 = model(input_ids_1)
            loss_1, _, mems_1 = model(input_ids_1, labels=lm_labels)
            lm_logits_2, mems_2 = model(input_ids_2, mems=mems_1)
            loss_2, _, mems_2 = model(input_ids_2,
                                      labels=lm_labels,
                                      mems=mems_1)

            outputs = {
                "loss_1": loss_1,
                "mems_1": mems_1,
                "lm_logits_1": lm_logits_1,
                "loss_2": loss_2,
                "mems_2": mems_2,
                "lm_logits_2": lm_logits_2,
            }
            return outputs

        def check_transfo_xl_lm_head_output(self, result):
            self.parent.assertListEqual(list(result["loss_1"].size()),
                                        [self.batch_size, self.seq_length])
            self.parent.assertListEqual(
                list(result["lm_logits_1"].size()),
                [self.batch_size, self.seq_length, self.vocab_size],
            )
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_1"]),
                [[self.mem_len, self.batch_size, self.hidden_size]] *
                self.num_hidden_layers,
            )

            self.parent.assertListEqual(list(result["loss_2"].size()),
                                        [self.batch_size, self.seq_length])
            self.parent.assertListEqual(
                list(result["lm_logits_2"].size()),
                [self.batch_size, self.seq_length, self.vocab_size],
            )
            self.parent.assertListEqual(
                list(list(mem.size()) for mem in result["mems_2"]),
                [[self.mem_len, self.batch_size, self.hidden_size]] *
                self.num_hidden_layers,
            )

        def prepare_config_and_inputs_for_common(self):
            config_and_inputs = self.prepare_config_and_inputs()
            (config, input_ids_1, input_ids_2, lm_labels) = config_and_inputs
            inputs_dict = {"input_ids": input_ids_1}
            return config, inputs_dict

    def setUp(self):
        self.model_tester = TransfoXLModelTest.TransfoXLModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=TransfoXLConfig,
                                          d_embed=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_transfo_xl_model(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        output_result = self.model_tester.create_transfo_xl_model(
            *config_and_inputs)
        self.model_tester.check_transfo_xl_model_output(output_result)

    def test_transfo_xl_lm_head(self):
        self.model_tester.set_seed()
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        output_result = self.model_tester.create_transfo_xl_lm_head(
            *config_and_inputs)
        self.model_tester.check_transfo_xl_lm_head_output(output_result)

    @slow
    def test_model_from_pretrained(self):
        for model_name in list(
                TRANSFO_XL_PRETRAINED_MODEL_ARCHIVE_MAP.keys())[:1]:
            model = TransfoXLModel.from_pretrained(model_name,
                                                   cache_dir=CACHE_DIR)
            self.assertIsNotNone(model)
Example #26
0
class BARTModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (
        (BartModel, BartForConditionalGeneration, BartForSequenceClassification, BartForQuestionAnswering)
        if is_torch_available()
        else ()
    )
    all_generative_model_classes = (BartForConditionalGeneration,) if is_torch_available() else ()
    is_encoder_decoder = True
    # TODO(SS): fix the below in a separate PR
    test_pruning = False
    test_torchscript = True
    test_head_masking = False
    test_resize_embeddings = True  # This requires inputs_dict['input_ids']
    test_missing_keys = False  # because BartForConditionalGeneration and BartModel now have identical state_dict

    def setUp(self):
        self.model_tester = ModelTester(self)
        self.config_tester = ConfigTester(self, config_class=BartConfig)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_initialization_more(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        model = BartModel(config)
        model.to(torch_device)
        model.eval()
        # test init
        self.assertTrue((model.encoder.embed_tokens.weight == model.shared.weight).all().item())

        def _check_var(module):
            """Check that we initialized various parameters from N(0, config.init_std)."""
            self.assertAlmostEqual(torch.std(module.weight).item(), config.init_std, 2)

        _check_var(model.encoder.embed_tokens)
        _check_var(model.encoder.layers[0].self_attn.k_proj)
        _check_var(model.encoder.layers[0].fc1)
        _check_var(model.encoder.embed_positions)

    def test_advanced_inputs(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        config.use_cache = False
        inputs_dict["input_ids"][:, -2:] = config.pad_token_id
        decoder_input_ids, decoder_attn_mask, causal_mask = _prepare_bart_decoder_inputs(
            config, inputs_dict["input_ids"]
        )
        model = BartModel(config).to(torch_device).eval()

        decoder_features_with_created_mask = model(**inputs_dict)[0]
        decoder_features_with_passed_mask = model(
            decoder_attention_mask=invert_mask(decoder_attn_mask), decoder_input_ids=decoder_input_ids, **inputs_dict
        )[0]
        _assert_tensors_equal(decoder_features_with_passed_mask, decoder_features_with_created_mask)
        useless_mask = torch.zeros_like(decoder_attn_mask)
        decoder_features = model(decoder_attention_mask=useless_mask, **inputs_dict)[0]
        self.assertTrue(isinstance(decoder_features, torch.Tensor))  # no hidden states or attentions
        self.assertEqual(
            decoder_features.size(), (self.model_tester.batch_size, self.model_tester.seq_length, config.d_model)
        )
        if decoder_attn_mask.min().item() < -1e3:  # some tokens were masked
            self.assertFalse((decoder_features_with_created_mask == decoder_features).all().item())

        # Test different encoder attention masks
        decoder_features_with_long_encoder_mask = model(
            inputs_dict["input_ids"], attention_mask=inputs_dict["attention_mask"].long()
        )[0]
        _assert_tensors_equal(decoder_features_with_long_encoder_mask, decoder_features_with_created_mask)

    def test_save_load_strict(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()
        for model_class in self.all_model_classes:
            model = model_class(config)

            with tempfile.TemporaryDirectory() as tmpdirname:
                model.save_pretrained(tmpdirname)
                model2, info = model_class.from_pretrained(tmpdirname, output_loading_info=True)
            self.assertEqual(info["missing_keys"], [])

    @unittest.skip("Passing inputs_embeds not implemented for Bart.")
    def test_inputs_embeds(self):
        pass

    def test_tiny_model(self):
        model_name = "sshleifer/bart-tiny-random"
        tiny = AutoModel.from_pretrained(model_name)  # same vocab size
        tok = AutoTokenizer.from_pretrained(model_name)  # same tokenizer
        inputs_dict = tok.batch_encode_plus(["Hello my friends"], return_tensors="pt")

        with torch.no_grad():
            tiny(**inputs_dict)
Example #27
0
    def test_pt_tf_model_equivalence(self):
        from transformers import is_torch_available

        if not is_torch_available():
            return

        import torch

        import transformers

        for model_class in self.all_model_classes:
            config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common(
                return_obj_labels="PreTraining" in model_class.__name__
            )

            pt_model_class_name = model_class.__name__[2:]  # Skip the "TF" at the beginning
            pt_model_class = getattr(transformers, pt_model_class_name)

            config.output_hidden_states = True
            config.task_obj_predict = False

            tf_model = model_class(config)
            pt_model = pt_model_class(config)

            # Check we can load pt model in tf and vice-versa with model => model functions

            tf_model = transformers.load_pytorch_model_in_tf2_model(
                tf_model, pt_model, tf_inputs=self._prepare_for_class(inputs_dict, model_class)
            )
            pt_model = transformers.load_tf2_model_in_pytorch_model(pt_model, tf_model)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()

            # Delete obj labels as we want to compute the hidden states and not the loss

            if "obj_labels" in inputs_dict:
                del inputs_dict["obj_labels"]

            def torch_type(key):
                if key in ("visual_feats", "visual_pos"):
                    return torch.float32
                else:
                    return torch.long

            def recursive_numpy_convert(iterable):
                return_dict = {}
                for key, value in iterable.items():
                    if isinstance(value, dict):
                        return_dict[key] = recursive_numpy_convert(value)
                    else:
                        if isinstance(value, (list, tuple)):
                            return_dict[key] = (
                                torch.from_numpy(iter_value.numpy()).to(torch_type(key)) for iter_value in value
                            )
                        else:
                            return_dict[key] = torch.from_numpy(value.numpy()).to(torch_type(key))
                return return_dict

            pt_inputs_dict = recursive_numpy_convert(self._prepare_for_class(inputs_dict, model_class))

            # need to rename encoder-decoder "inputs" for PyTorch
            if "inputs" in pt_inputs_dict and self.is_encoder_decoder:
                pt_inputs_dict["input_ids"] = pt_inputs_dict.pop("inputs")

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class), training=False)
            tf_hidden_states = tfo[0].numpy()
            pt_hidden_states = pto[0].numpy()

            import numpy as np

            tf_nans = np.copy(np.isnan(tf_hidden_states))
            pt_nans = np.copy(np.isnan(pt_hidden_states))

            pt_hidden_states[tf_nans] = 0
            tf_hidden_states[tf_nans] = 0
            pt_hidden_states[pt_nans] = 0
            tf_hidden_states[pt_nans] = 0

            max_diff = np.amax(np.abs(tf_hidden_states - pt_hidden_states))
            # Debug info (remove when fixed)
            if max_diff >= 2e-2:
                print("===")
                print(model_class)
                print(config)
                print(inputs_dict)
                print(pt_inputs_dict)
            self.assertLessEqual(max_diff, 6e-2)

            # Check we can load pt model in tf and vice-versa with checkpoint => model functions
            with tempfile.TemporaryDirectory() as tmpdirname:
                import os

                pt_checkpoint_path = os.path.join(tmpdirname, "pt_model.bin")
                torch.save(pt_model.state_dict(), pt_checkpoint_path)
                tf_model = transformers.load_pytorch_checkpoint_in_tf2_model(tf_model, pt_checkpoint_path)

                tf_checkpoint_path = os.path.join(tmpdirname, "tf_model.h5")
                tf_model.save_weights(tf_checkpoint_path)
                pt_model = transformers.load_tf2_checkpoint_in_pytorch_model(pt_model, tf_checkpoint_path)

            # Check predictions on first output (logits/hidden-states) are close enought given low-level computational differences
            pt_model.eval()
            pt_inputs_dict = dict(
                (name, torch.from_numpy(key.numpy()).to(torch.long))
                for name, key in self._prepare_for_class(inputs_dict, model_class).items()
            )

            for key, value in pt_inputs_dict.items():
                if key in ("visual_feats", "visual_pos"):
                    pt_inputs_dict[key] = value.to(torch.float32)
                else:
                    pt_inputs_dict[key] = value.to(torch.long)

            with torch.no_grad():
                pto = pt_model(**pt_inputs_dict)
            tfo = tf_model(self._prepare_for_class(inputs_dict, model_class))
            tfo = tfo[0].numpy()
            pto = pto[0].numpy()
            tf_nans = np.copy(np.isnan(tfo))
            pt_nans = np.copy(np.isnan(pto))

            pto[tf_nans] = 0
            tfo[tf_nans] = 0
            pto[pt_nans] = 0
            tfo[pt_nans] = 0

            max_diff = np.amax(np.abs(tfo - pto))
            self.assertLessEqual(max_diff, 6e-2)
Example #28
0
class Wav2Vec2RobustModelTest(ModelTesterMixin, unittest.TestCase):
    all_model_classes = (Wav2Vec2Model, Wav2Vec2ForMaskedLM, Wav2Vec2ForCTC) if is_torch_available() else ()
    test_pruning = False
    test_headmasking = False
    test_torchscript = False

    def setUp(self):
        self.model_tester = Wav2Vec2ModelTester(
            self, conv_stride=(3, 3, 3), feat_extract_norm="layer", do_stable_layer_norm=True
        )
        self.config_tester = ConfigTester(self, config_class=Wav2Vec2Config, hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    # Wav2Vec2 has no inputs_embeds
    def test_inputs_embeds(self):
        pass

    # `input_ids` is renamed to `input_values`
    def test_forward_signature(self):
        pass

    # Wav2Vec2 cannot resize token embeddings
    # since it has no tokens embeddings
    def test_resize_tokens_embeddings(self):
        pass

    # Wav2Vec2 has no inputs_embeds
    # and thus the `get_input_embeddings` fn
    # is not implemented
    def test_model_common_attributes(self):
        pass

    def test_initialization(self):
        config, inputs_dict = self.model_tester.prepare_config_and_inputs_for_common()

        configs_no_init = _config_zero_init(config)
        for model_class in self.all_model_classes:
            model = model_class(config=configs_no_init)
            for name, param in model.named_parameters():
                if param.requires_grad:
                    if "conv.weight" in name:
                        self.assertTrue(
                            -1.0 <= ((param.data.mean() * 1e9).round() / 1e9).item() <= 1.0,
                            msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
                        )
                    else:
                        self.assertIn(
                            ((param.data.mean() * 1e9).round() / 1e9).item(),
                            [0.0, 1.0],
                            msg="Parameter {} of model {} seems not properly initialized".format(name, model_class),
                        )

    @slow
    def test_model_from_pretrained(self):
        model = Wav2Vec2Model.from_pretrained("facebook/wav2vec2-base-960h")
        self.assertIsNotNone(model)
Example #29
0
# If checking the tensors placement
# tf.debugging.set_log_device_placement(True)

from typing import List
import timeit
from transformers import is_tf_available, is_torch_available
from time import time
import argparse
import csv

if is_tf_available():
    import tensorflow as tf
    from transformers import TFAutoModel

if is_torch_available():
    import torch
    from transformers import AutoModel

from transformers import AutoConfig, AutoTokenizer

input_text = """Bent over their instruments, three hundred Fertilizers were plunged, as 
the Director of Hatcheries and Conditioning entered the room, in the 



scarcely breathing silence, the absent-minded, soliloquizing hum or 
whistle, of absorbed concentration. A troop of newly arrived students, 
very young, pink and callow, followed nervously, rather abjectly, at the 
Director's heels. Each of them carried a notebook, in which, whenever 
the great man spoke, he desperately scribbled. Straight from the 
class BertModelTest(ModelTesterMixin, GenerationTesterMixin,
                    unittest.TestCase):

    all_model_classes = ((
        BertModel,
        BertLMHeadModel,
        BertForMaskedLM,
        BertForMultipleChoice,
        BertForNextSentencePrediction,
        BertForPreTraining,
        BertForQuestionAnswering,
        BertForSequenceClassification,
        BertForTokenClassification,
    ) if is_torch_available() else ())
    all_generative_model_classes = (
        BertLMHeadModel, ) if is_torch_available() else ()

    # special case for ForPreTraining model
    def _prepare_for_class(self,
                           inputs_dict,
                           model_class,
                           return_labels=False):
        inputs_dict = super()._prepare_for_class(inputs_dict,
                                                 model_class,
                                                 return_labels=return_labels)

        if return_labels:
            if model_class in MODEL_FOR_PRETRAINING_MAPPING.values():
                inputs_dict["labels"] = torch.zeros(
                    (self.model_tester.batch_size,
                     self.model_tester.seq_length),
                    dtype=torch.long,
                    device=torch_device)
                inputs_dict["next_sentence_label"] = torch.zeros(
                    self.model_tester.batch_size,
                    dtype=torch.long,
                    device=torch_device)
        return inputs_dict

    def setUp(self):
        self.model_tester = BertModelTester(self)
        self.config_tester = ConfigTester(self,
                                          config_class=BertConfig,
                                          hidden_size=37)

    def test_config(self):
        self.config_tester.run_common_tests()

    def test_model(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_various_embeddings(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        for type in ["absolute", "relative_key", "relative_key_query"]:
            config_and_inputs[0].position_embedding_type = type
            self.model_tester.create_and_check_model(*config_and_inputs)

    def test_model_as_decoder(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

    def test_model_as_decoder_with_default_input_mask(self):
        # This regression test was failing with PyTorch < 1.3
        (
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            encoder_hidden_states,
            encoder_attention_mask,
        ) = self.model_tester.prepare_config_and_inputs_for_decoder()

        input_mask = None

        self.model_tester.create_and_check_model_as_decoder(
            config,
            input_ids,
            token_type_ids,
            input_mask,
            sequence_labels,
            token_labels,
            choice_labels,
            encoder_hidden_states,
            encoder_attention_mask,
        )

    def test_for_causal_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_for_causal_lm(*config_and_inputs)

    def test_for_masked_lm(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_masked_lm(*config_and_inputs)

    def test_for_causal_lm_decoder(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder(
        )
        self.model_tester.create_and_check_model_for_causal_lm_as_decoder(
            *config_and_inputs)

    def test_for_multiple_choice(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_multiple_choice(
            *config_and_inputs)

    def test_for_next_sequence_prediction(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_next_sequence_prediction(
            *config_and_inputs)

    def test_for_pretraining(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_pretraining(*config_and_inputs)

    def test_for_question_answering(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_question_answering(
            *config_and_inputs)

    def test_for_sequence_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_sequence_classification(
            *config_and_inputs)

    def test_for_token_classification(self):
        config_and_inputs = self.model_tester.prepare_config_and_inputs()
        self.model_tester.create_and_check_for_token_classification(
            *config_and_inputs)

    @slow
    def test_model_from_pretrained(self):
        for model_name in BERT_PRETRAINED_MODEL_ARCHIVE_LIST[:1]:
            model = BertModel.from_pretrained(model_name)
            self.assertIsNotNone(model)