def test_save_load_custom_head(self):
        model_name = "bert-base-uncased"
        model_config = AutoConfig.from_pretrained(
            model_name, custom_heads={"tag": CustomHead})
        model1 = AutoModelWithHeads.from_pretrained(model_name,
                                                    config=model_config)
        model2 = AutoModelWithHeads.from_pretrained(model_name,
                                                    config=model_config)
        config = {
            "head_type": "tag",
            "num_labels": 3,
            "layers": 2,
            "activation_function": "tanh"
        }
        model1.add_custom_head("custom_head", config)

        with tempfile.TemporaryDirectory() as temp_dir:
            model1.save_head(temp_dir, "custom_head")
            model2.load_head(temp_dir)

        model1.eval()
        model2.eval()

        in_data = ids_tensor((1, 128), 1000)
        output1 = model1(in_data)
        output2 = model2(in_data)
        self.assertEqual(output1[0].size(), output2[0].size())
        state1 = model1.config.prediction_heads["custom_head"].state_dict()
        state2 = model2.config.prediction_heads["custom_head"].state_dict()
        for ((k1, v1), (k2, v2)) in zip(state1.items(), state2.items()):
            self.assertTrue(torch.equal(v1, v2))
Esempio n. 2
0
    def test_load_full_model(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.add_classification_head("dummy", layers=1)

        true_config = model.get_prediction_heads_config()
        with tempfile.TemporaryDirectory() as temp_dir:
            # save
            model.save_pretrained(temp_dir)
            # reload
            model = AutoModelWithHeads.from_pretrained(temp_dir)
        self.assertIn("dummy", model.heads)
        self.assertDictEqual(true_config, model.get_prediction_heads_config())
    def test_loading_adapter_weights_without_prefix(self):
        model_base, model_with_head_base = create_twin_models(
            AutoModel, self.config)

        model_with_head = AutoModelWithHeads.from_config(
            model_with_head_base.config)
        setattr(model_with_head, model_with_head.base_model_prefix,
                model_with_head_base)

        model_base.add_adapter("dummy")

        with tempfile.TemporaryDirectory() as temp_dir:
            model_base.save_adapter(temp_dir, "dummy")

            loading_info = {}
            model_with_head.load_adapter(temp_dir, loading_info=loading_info)

        self.assertEqual(0, len(loading_info["missing_keys"]))
        self.assertEqual(0, len(loading_info["unexpected_keys"]))

        # check equal output
        input_ids = self.get_input_samples((1, 128),
                                           config=model_with_head.config)
        output1 = model_with_head(input_ids)
        output2 = model_base(input_ids)
        self.assertEqual(len(output1), len(output2))
        self.assertTrue(torch.equal(output1[0], output2[0]))
    def test_train_single_adapter(self):
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name,
                                                  use_fast=False)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelWithHeads.from_config(self.config())

        # add two adapters: one will be trained and the other should be frozen
        model.add_adapter("mrpc")
        model.add_adapter("dummy")
        model.add_classification_head("mrpc")

        self.assertIn("mrpc", model.config.adapters.adapters)
        self.assertIn("dummy", model.config.adapters.adapters)

        # train the mrpc adapter -> should be activated & unfreezed
        model.train_adapter("mrpc")
        self.assertEqual(set(["mrpc"]), model.active_adapters.flatten())

        # all weights of the adapter should be activated
        for k, v in filter_parameters(model, "adapters.mrpc.").items():
            self.assertTrue(v.requires_grad, k)
        # all weights of the adapter not used for training should be freezed
        for k, v in filter_parameters(model, "adapters.dummy.").items():
            self.assertFalse(v.requires_grad, k)
        # weights of the model should be freezed (check on some examples)
        for k, v in filter_parameters(model,
                                      "encoder.layer.0.attention").items():
            self.assertFalse(v.requires_grad, k)

        state_dict_pre = copy.deepcopy(model.state_dict())

        # setup dataset
        data_args = GlueDataTrainingArguments(
            task_name="mrpc",
            data_dir="./tests/fixtures/tests_samples/MRPC",
            overwrite_cache=True)
        train_dataset = GlueDataset(data_args,
                                    tokenizer=tokenizer,
                                    mode="train")
        training_args = TrainingArguments(output_dir="./examples",
                                          do_train=True,
                                          learning_rate=0.1,
                                          max_steps=7,
                                          no_cuda=True)

        # evaluate
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
        )
        trainer.train()

        for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(),
                                        model.state_dict().items()):
            if "mrpc" in k1:
                self.assertFalse(torch.equal(v1, v2))
            else:
                self.assertTrue(torch.equal(v1, v2))
    def test_train_adapter_fusion(self):
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name,
                                                  use_fast=False)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelWithHeads.from_config(self.config())
        self.add_head(model, "head")

        # add the adapters to be fused
        model.add_adapter("a")
        model.add_adapter("b")
        model.add_adapter("c")

        self.assertIn("a", model.config.adapters.adapters)
        self.assertIn("b", model.config.adapters.adapters)
        self.assertIn("c", model.config.adapters.adapters)

        # setup fusion
        adapter_setup = Fuse("a", "b", "c")
        model.add_adapter_fusion(adapter_setup)
        model.train_adapter_fusion(adapter_setup)
        model.set_active_adapters(adapter_setup)
        self.assertEqual(adapter_setup, model.active_adapters)

        # all weights of the adapters should be frozen (test for one)
        for k, v in filter_parameters(model, "adapters.a.").items():
            self.assertFalse(v.requires_grad, k)
        # all weights of the fusion layer should be activated
        for k, v in filter_parameters(model, "adapter_fusion_layer").items():
            self.assertTrue(v.requires_grad, k)
        # weights of the model should be freezed (check on some examples)
        for k, v in filter_parameters(model,
                                      "encoder.layer.0.attention").items():
            self.assertFalse(v.requires_grad, k)

        state_dict_pre = copy.deepcopy(model.state_dict())

        # Since our config has a value matrix, make sure it is regularized.
        # We do this by patching the fusion regularization function.
        regularization_called = False
        orig_fusion_regularization_loss = model.base_model.get_fusion_regularization_loss

        def patched_fusion_reg_loss():
            nonlocal regularization_called
            regularization_called = True
            return orig_fusion_regularization_loss()

        model.base_model.get_fusion_regularization_loss = patched_fusion_reg_loss

        self.trainings_run(model, tokenizer)

        for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(),
                                        model.state_dict().items()):
            if ("adapter_fusion_layer" in k1 or "classifier" in k1
                    or "classification_head" in k1 or "score" in k1
                    or "heads" in k1):
                self.assertFalse(torch.equal(v1, v2), k1)
            else:
                self.assertTrue(torch.equal(v1, v2), k1)
        self.assertTrue(regularization_called)
Esempio n. 6
0
    def test_parallel_inference_with_heads(self):
        model = AutoModelWithHeads.from_config(self.config())

        model.add_adapter("a")
        model.add_adapter("b")
        model.add_classification_head("a", num_labels=2)
        model.add_classification_head("b", num_labels=3)
        model.eval()

        inputs = {}
        inputs["attention_mask"] = torch.randint(0, 2, size=(2, 128))
        inputs["input_ids"] = self.get_input_samples((2, 128),
                                                     config=model.config)

        # for reference, pass through single adapters
        model.active_adapters = "a"
        model.active_head = "a"
        outputs_a = model(**inputs)
        model.active_adapters = "b"
        model.active_head = "b"
        outputs_b = model(**inputs)

        model.active_adapters = Parallel("a", "b")
        # active_adapters should set parallel heads too
        self.assertEqual(model.active_head, ["a", "b"])
        outputs = model(**inputs)

        self.assertEqual(len(outputs), 2)
        self.assertEqual(outputs[0][0].shape, (2, 2))
        self.assertEqual(outputs[1][0].shape, (2, 3))
        self.assertTrue(torch.allclose(outputs[0][0], outputs_a[0]))
        self.assertTrue(torch.allclose(outputs[1][0], outputs_b[0]))
    def test_loading_adapter_weights_without_prefix(self):
        if self.config_class not in MODEL_WITH_HEADS_MAPPING:
            self.skipTest("Does not support flex heads.")

        model_base, model_with_head_base = create_twin_models(
            self.model_class, self.config)

        model_with_head = AutoModelWithHeads.from_config(
            model_with_head_base.config)
        setattr(model_with_head, model_with_head.base_model_prefix,
                model_with_head_base)

        model_base.add_adapter("dummy")

        with tempfile.TemporaryDirectory() as temp_dir:
            model_base.save_adapter(temp_dir, "dummy")

            loading_info = {}
            model_with_head.load_adapter(temp_dir, loading_info=loading_info)

        self.assertEqual(0, len(loading_info["missing_keys"]))
        self.assertEqual(0, len(loading_info["unexpected_keys"]))

        # check equal output
        input_data = self.get_input_samples((1, 128),
                                            config=model_with_head.config)
        output1 = model_with_head(**input_data)
        output2 = model_base(**input_data)
        self.assertEqual(len(output1), len(output2))
        self.assertTrue(torch.equal(output1[0], output2[0]))
Esempio n. 8
0
    def _load_pipeline_instance(pipeline_class, adapter_id):
        adapter_info = get_adapter_info(adapter_id, source="hf")
        if adapter_info is None:
            raise ValueError(f"Adapter with id '{adapter_id}' not available.")

        tokenizer = AutoTokenizer.from_pretrained(adapter_info.model_name)
        model = AutoModelWithHeads.from_pretrained(adapter_info.model_name)
        model.load_adapter(adapter_id, source="hf", set_active=True)
        return pipeline_class(model=model, tokenizer=tokenizer)
    def run_test(self, static_model, input_shape=None, label_dict=None):
        flex_model = AutoModelWithHeads.from_pretrained(
            None, config=self.config(), state_dict=static_model.state_dict())
        static_model.eval()
        flex_model.eval()
        if (static_model.base_model.__class__ !=
                flex_model.base_model.__class__
                and not static_model.base_model == static_model):
            self.skipTest("Skipping as base model classes are different.")

        with tempfile.TemporaryDirectory() as temp_dir:
            static_model.save_head(temp_dir)

            loading_info = {}
            flex_model.load_head(temp_dir,
                                 load_as="test",
                                 loading_info=loading_info)

        self.assertEqual(
            0, len(loading_info["missing_keys"]),
            "Missing keys: {}".format(", ".join(loading_info["missing_keys"])))
        # We don't need to convert some of the weights, so remove them for the check
        unexpected_keys = loading_info["unexpected_keys"]
        if static_model._keys_to_ignore_on_load_missing is not None:
            for pat in static_model._keys_to_ignore_on_load_missing:
                unexpected_keys = [
                    k for k in unexpected_keys if re.search(pat, k) is None
                ]
        # HACK for bert-based models
        if isinstance(static_model, BertPreTrainedModel):
            unexpected_keys = [
                k for k in unexpected_keys if "cls.predictions.bias" not in k
            ]
        elif isinstance(static_model, RobertaPreTrainedModel):
            unexpected_keys = [
                k for k in unexpected_keys if "lm_head.bias" not in k
            ]
        self.assertEqual(
            0, len(unexpected_keys),
            "Unexpected keys: {}".format(", ".join(unexpected_keys)))

        # adapter and head were loaded
        self.assertIn("test", flex_model.heads)

        # check equal output
        input_shape = input_shape or (self.batch_size, self.seq_length)
        in_data = self.get_input_samples(input_shape, config=flex_model.config)
        if label_dict:
            for k, v in label_dict.items():
                in_data[k] = v
        output1 = static_model(**in_data)
        output2 = flex_model(**in_data)
        self.assertTrue(torch.allclose(output1.loss, output2.loss))
        self.assertTrue(torch.allclose(
            output1[1],
            output2[1]))  # it's not called "logits" for all classes
Esempio n. 10
0
 def test_custom_head_from_model_config(self):
     model_name = "bert-base-uncased"
     model_config = AutoConfig.from_pretrained(model_name, custom_heads={"tag": CustomHead})
     model = AutoModelWithHeads.from_pretrained(model_name, config=model_config)
     config = {"head_type": "tag", "num_labels": 3, "layers": 2, "activation_function": "tanh"}
     model.add_custom_head("custom_head", config)
     model.eval()
     in_data = ids_tensor((1, 128), 1000)
     output1 = model(in_data)
     model.add_tagging_head("tagging_head", num_labels=3, layers=2)
     output2 = model(in_data)
     self.assertEqual(output1[0].size(), output2[0].size())
Esempio n. 11
0
    def test_delete_head(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        name = "test_head"
        self.add_head(model, name)
        self.assertTrue(name in model.heads)
        self.assertTrue(name in model.config.prediction_heads)
        self.assertEqual(name, model.active_head)

        model.delete_head(name)
        self.assertFalse(name in model.heads)
        self.assertFalse(name in model.config.prediction_heads)
        self.assertNotEqual(name, model.active_head)
Esempio n. 12
0
    def test_batch_split_head(self):
        if not hasattr(MODEL_WITH_HEADS_MAPPING[self.config_class],
                       "add_classification_head"):
            self.skipTest("No classification head available")
        model = AutoModelWithHeads.from_config(self.config())
        model.add_classification_head("a")
        model.add_classification_head("b")
        model.active_head = BatchSplit("a", "b", batch_sizes=[1, 2])
        in_data = self.get_input_samples((3, 128), config=model.config)

        out = model(**in_data)
        self.assertEqual(2, len(out))
        self.assertEqual((1, 2), out[0][0].shape)
        self.assertEqual((2, 2), out[1][0].shape)
Esempio n. 13
0
    def test_model_with_heads_tagging_head_labels(self):
        model = AutoModelWithHeads.from_pretrained(self.model_name,
                                                   config=self.config)
        model.add_tagging_head("test_head",
                               num_labels=len(self.labels),
                               id2label=self.label_map)
        with TemporaryDirectory() as temp_dir:
            model.save_head(temp_dir, "test_head")
            model.load_head(temp_dir)
        # this is just loaded to test whether loading an adapter changes the label information
        model.load_adapter("sst-2", "text_task")

        self.assertEqual(self.labels, model.get_labels())
        self.assertDictEqual(self.label_map, model.get_labels_dict())
Esempio n. 14
0
    def test_batch_split_adapter_head(self):
        model = AutoModelWithHeads.from_config(self.config())
        self.add_head(model, "a")
        self.add_head(model, "b")
        model.add_adapter("a")
        model.add_adapter("b")
        model.add_adapter("c")
        model.set_active_adapters(
            BatchSplit(Stack("c", "a"), "b", batch_sizes=[2, 1]))

        in_data = self.get_input_samples((3, 128), config=model.config)
        out = model(**in_data)

        self.assertEqual(2, len(out))
        self.assertTrue(isinstance(model.active_head, BatchSplit))
Esempio n. 15
0
    def test_model_with_heads_multiple_heads(self):
        model = AutoModelWithHeads.from_pretrained(self.model_name,
                                                   config=self.config)
        model.add_tagging_head("test_head",
                               num_labels=len(self.labels),
                               id2label=self.label_map)
        model.add_classification_head("second_head", num_labels=5)
        with TemporaryDirectory() as temp_dir:
            model.save_head(temp_dir + "/test_head", "test_head")
            model.load_head(temp_dir + "/test_head")
            model.save_head(temp_dir + "/second_head", "second_head")
            model.load_head(temp_dir + "/second_head")
        model.load_adapter("sst-2", "text_task")

        self.assertEqual(model.get_labels("test_head"), self.labels)
        self.assertEqual(model.get_labels_dict("test_head"), self.label_map)
    def test_general(self):
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        data_args = GlueDataTrainingArguments(
            task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True
        )
        train_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="train")

        model = AutoModelWithHeads.from_pretrained("bert-base-uncased")

        model.add_classification_head("task", num_labels=3)

        # add the adapters to be fused
        model.add_adapter("task")
        model.add_adapter("additional_adapter")

        model.train_adapter("task")
        self.assertEqual("task", model.active_head)
        self.assertEqual(Stack("task"), model.active_adapters)
        with TemporaryDirectory() as tempdir:
            training_args = TrainingArguments(
                output_dir=tempdir,
                do_train=True,
                learning_rate=0.1,
                logging_steps=1,
                max_steps=1,
                save_steps=1,
                remove_unused_columns=False,
            )
            trainer = AdapterTrainer(
                model=model,
                args=training_args,
                train_dataset=train_dataset,
            )

            trainer.train()

            # Check that adapters are actually saved but the full model is not
            files_dir_checkpoint = [file_or_dir for file_or_dir in os.listdir(os.path.join(tempdir, "checkpoint-1"))]
            self.assertTrue("task" in files_dir_checkpoint)
            self.assertTrue("additional_adapter" in files_dir_checkpoint)
            # Check that full model weights are not stored
            self.assertFalse("pytorch_model.bin" in files_dir_checkpoint)

            # this should always be false in the adapter trainer
            self.assertFalse(trainer.args.remove_unused_columns)
            self.assertEqual("task", model.active_head)
            self.assertEqual(Stack("task"), model.active_adapters)
Esempio n. 17
0
    def test_invertible_adapter_with_head(self):
        if hasattr(MODEL_WITH_HEADS_MAPPING[self.config_class],
                   "add_masked_lm_head"):
            lm_head = "masked_lm"
        elif hasattr(MODEL_WITH_HEADS_MAPPING[self.config_class],
                     "add_causal_lm_head"):
            lm_head = "casual_lm"
        elif hasattr(MODEL_WITH_HEADS_MAPPING[self.config_class],
                     "add_seq2seq_lm_head"):
            lm_head = "seq2seq_lm"
        else:
            self.skipTest("No masked or causel language model head")

        model = AutoModelWithHeads.from_config(self.config())
        model.add_adapter("test", config="pfeiffer+inv")
        if lm_head == "casual_lm":
            model.add_causal_lm_head("test")
        elif lm_head == "masked_lm":
            model.add_masked_lm_head("test")
        elif lm_head == "seq2seq_lm":
            model.add_seq2seq_lm_head("test")
        else:
            raise RuntimeError("{} is not a valid lm head".format(lm_head))
        model.set_active_adapters("test")

        # Set a hook before the invertible adapter to make sure it's actually called twice:
        # Once after the embedding layer and once in the prediction head.
        calls = 0

        def forward_pre_hook(module, input):
            nonlocal calls
            calls += 1

        inv_adapter = model.base_model.get_invertible_adapter()
        self.assertIsNotNone(inv_adapter)
        inv_adapter.register_forward_pre_hook(forward_pre_hook)

        in_data = self.get_input_samples((self.batch_size, self.seq_length),
                                         config=model.config)
        out = model(**in_data)

        self.assertEqual(
            (self.batch_size, self.seq_length, model.config.vocab_size),
            out[0].shape)
        self.assertEqual(2, calls)
Esempio n. 18
0
    def test_multiple_heads_label(self):
        model = AutoModelWithHeads.from_pretrained(self.model_name,
                                                   config=self.config)
        model.add_tagging_head("test_head",
                               num_labels=len(self.labels),
                               id2label=self.label_map)
        with TemporaryDirectory() as temp_dir:
            model.save_head(temp_dir, "test_head")
            model.load_head(temp_dir)
        # adapter loaded for testing whether it changes label information
        model.load_adapter("sst-2", "text_task")
        model.add_classification_head("classification_head")
        default_label, default_label_dict = get_default(2)

        self.assertEqual(model.get_labels("classification_head"),
                         default_label)
        self.assertEqual(model.get_labels_dict("classification_head"),
                         default_label_dict)
Esempio n. 19
0
    def test_parallel_inference_with_wrong_number_of_heads(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        model.add_adapter("a")
        model.add_adapter("b")
        self.add_head(model, "a", num_labels=2)

        inputs = self.get_input_samples((2, 128), config=model.config)

        model.active_adapters = Parallel("a", "b")
        model.active_head = ["a"]
        with self.assertRaises(ValueError):
            model(**inputs)

        model.active_head = "a"
        with self.assertRaises(ValueError):
            model(**inputs)
Esempio n. 20
0
    def test_parallel_training(self):
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelWithHeads.from_config(self.config())

        model.add_adapter("mrpc1")
        model.add_adapter("mrpc2")
        self.add_head(model, "mrpc1", num_labels=2)
        self.add_head(model, "mrpc2", num_labels=3)
        model.active_adapters = Parallel("mrpc1", "mrpc2")
        model.train_adapter(Parallel("mrpc1", "mrpc2"))
        # model.eval()

        # all weights of the adapter should be activated
        for k, v in filter_parameters(model, "adapters.mrpc1.").items():
            self.assertTrue(v.requires_grad, k)
        # all weights of the adapter not used for training should be freezed
        for k, v in filter_parameters(model, "adapters.mrpc2.").items():
            self.assertTrue(v.requires_grad, k)
        # weights of the model should be freezed (check on some examples)
        for k, v in filter_parameters(model, "encoder.layer.0.attention").items():
            self.assertFalse(v.requires_grad, k)

        state_dict_pre = copy.deepcopy(model.state_dict())

        train_dataset = self.dataset(tokenizer)
        training_args = TrainingArguments(
            output_dir="./examples", do_train=True, learning_rate=0.1, max_steps=10, no_cuda=True
        )

        # evaluate
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
        )
        trainer.train()

        for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(), model.state_dict().items()):
            if "mrpc" in k1:
                self.assertFalse(torch.equal(v1, v2), k1)
            else:
                self.assertTrue(torch.equal(v1, v2))
    def test_train_single_adapter(self):
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name,
                                                  use_fast=False)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelWithHeads.from_config(self.config())

        # add two adapters: one will be trained and the other should be frozen
        model.add_adapter("mrpc")
        model.add_adapter("dummy")
        self.add_head(model, "mrpc")

        self.assertIn("mrpc", model.config.adapters.adapters)
        self.assertIn("dummy", model.config.adapters.adapters)

        # train the mrpc adapter -> should be activated & unfreezed
        model.train_adapter("mrpc")
        self.assertEqual(set(["mrpc"]), model.active_adapters.flatten())

        # all weights of the adapter should be activated
        for k, v in filter_parameters(model, "adapters.mrpc.").items():
            self.assertTrue(v.requires_grad, k)
        # all weights of the adapter not used for training should be freezed
        for k, v in filter_parameters(model, "adapters.dummy.").items():
            self.assertFalse(v.requires_grad, k)
        # weights of the model should be freezed (check on some examples)
        for k, v in filter_parameters(model,
                                      "encoder.layer.0.attention").items():
            self.assertFalse(v.requires_grad, k)

        state_dict_pre = copy.deepcopy(model.state_dict())

        self.trainings_run(model, tokenizer)

        for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(),
                                        model.state_dict().items()):
            if "mrpc" in k1:
                self.assertFalse(torch.equal(v1, v2))
            else:
                self.assertTrue(torch.equal(v1, v2))
Esempio n. 22
0
    def test_reload_static_to_flex_head(self):
        if not hasattr(MODEL_WITH_HEADS_MAPPING[self.config_class],
                       "add_classification_head"):
            self.skipTest("No classification head available")
        static_head_model = AutoModelForSequenceClassification.from_config(
            self.config())
        flex_head_model = AutoModelWithHeads.from_pretrained(
            None,
            config=self.config(),
            state_dict=static_head_model.state_dict())
        static_head_model.eval()
        flex_head_model.eval()

        static_head_model.add_adapter("test")

        with tempfile.TemporaryDirectory() as temp_dir:
            static_head_model.save_adapter(temp_dir, "test")

            loading_info = {}
            flex_head_model.load_adapter(temp_dir, loading_info=loading_info)

            # Load the adapter a second time to make sure our conversion script doesn't break anything
            flex_head_model.load_adapter(temp_dir, loading_info=loading_info)
        self.assertEqual(0, len(loading_info["missing_keys"]))
        self.assertEqual(0, len(loading_info["unexpected_keys"]))

        # adapter and head were loaded
        self.assertIn("test", flex_head_model.config.adapters)
        self.assertIn("test", flex_head_model.heads)

        # check equal output
        in_data = self.get_input_samples((1, 128),
                                         config=flex_head_model.config)
        output1 = static_head_model(**in_data, adapter_names=["test"])
        output2 = flex_head_model(**in_data,
                                  adapter_names=["test"],
                                  head="test")
        self.assertTrue(
            torch.all(torch.isclose(output1.logits, output2.logits)))
Esempio n. 23
0
    def test_batch_split_with_heads(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.add_adapter("a")
        model.add_adapter("b")
        self.add_head(model, "a", num_labels=2)
        self.add_head(model, "b", num_labels=3)
        model.eval()

        inputs = {"input_ids": self.get_input_samples((2, 128), config=model.config)["input_ids"]}
        if isinstance(model, T5ModelWithHeads):
            inputs["decoder_input_ids"] = inputs["input_ids"]

        # for reference, pass through single adapters
        model.active_adapters = "a"
        model.active_head = "a"
        outputs_a = model(**{k: v[:1] for k, v in inputs.items()})
        model.active_adapters = "b"
        model.active_head = "b"
        outputs_b = model(**{k: v[1:] for k, v in inputs.items()})

        model.set_active_adapters(BatchSplit("a", "b", batch_sizes=[1, 1]))
        output = model(**inputs)

        self.assertEqual(2, len(output))
        self.assertTrue(
            torch.allclose(
                output[0]["logits"],
                outputs_a["logits"],
                atol=1e-05,
            )
        )
        self.assertTrue(
            torch.allclose(
                output[1]["logits"],
                outputs_b["logits"],
                atol=1e-05,
            )
        )
Esempio n. 24
0
    def test_parallel_training_single_forward_pass(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        a1, a2 = self.create_twin_adapters(model, "a")
        b1, b2 = self.create_twin_adapters(model, "b")

        state_dict = model.state_dict()
        for k, v in state_dict.items():
            if a1 in k:
                self.assertTrue(torch.equal(v, state_dict[k.replace(a1, a2)]))
            if b1 in k:
                self.assertTrue(torch.equal(v, state_dict[k.replace(b1, b2)]))

        input_data = self.get_input_samples((3, 128), config=model.config)
        if isinstance(model, T5ModelWithHeads):
            input_data["labels"] = torch.randint(0, 2, (3, 128))
        else:
            input_data["labels"] = torch.randint(0, 2, (3, 1))

        outputs = []
        for adapter in [a1, b1]:
            model.active_head = adapter
            model.set_active_adapters(adapter)
            model.train_adapter(adapter)
            model.eval()
            outputs.append(model(**input_data))

        model.set_active_adapters(Parallel(a2, b2))
        model.train_adapter((Parallel(a2, b2)))
        model.eval()

        parallel_outputs = model(**input_data)

        for out1, out2 in zip(outputs, parallel_outputs.head_outputs):
            self.assertTrue(torch.allclose(out1["loss"], out2["loss"]))
            self.assertTrue(torch.allclose(out1["logits"], out2["logits"], atol=1e-5))
Esempio n. 25
0
    def test_parallel_training_equivalent_to_single_adapters(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        a1, a2 = self.create_twin_adapters(model, "a")
        b1, b2 = self.create_twin_adapters(model, "b")

        dataset = []
        for i in range(3):
            input_data = self.get_input_samples((3, 128), config=model.config)
            if isinstance(model, T5ModelWithHeads):
                input_data["labels"] = torch.randint(0, 2, (3, 128))
            else:
                input_data["labels"] = torch.randint(0, 2, (3, 1))
            dataset.append(input_data)

        for adapter in [a1, b1]:
            model.active_head = adapter
            model.set_active_adapters(adapter)
            model.train_adapter(adapter)
            model.eval()

            model = self.train_model(model, dataset)

        model.set_active_adapters(Parallel(a2, b2))
        model.train_adapter((Parallel(a2, b2)))
        model.eval()

        model = self.train_model(model, dataset)

        state_dict = model.state_dict()
        for k, v in state_dict.items():
            if a1 in k:
                self.assertTrue(torch.allclose(v, state_dict[k.replace(a1, a2)], atol=1e-5))
            if b1 in k:
                self.assertTrue(torch.allclose(v, state_dict[k.replace(b1, b2)], atol=1e-5))
    def test_batch_split_training(self):
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name,
                                                  use_fast=False)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelWithHeads.from_config(self.config())

        model.add_adapter("mrpc1")
        model.add_adapter("mrpc2")
        self.add_head(model, "mrpc1")
        self.add_head(model, "mrpc2")
        adapter_setup = BatchSplit("mrpc1", "mrpc2", batch_sizes=[1, 1])
        model.active_adapters = adapter_setup
        model.train_adapter(adapter_setup)

        # all weights of the adapter should be activated
        for k, v in filter_parameters(model, "adapters.mrpc1.").items():
            self.assertTrue(v.requires_grad, k)
        # all weights of the adapter not used for training should be freezed
        for k, v in filter_parameters(model, "adapters.mrpc2.").items():
            self.assertTrue(v.requires_grad, k)
        # weights of the model should be freezed (check on some examples)
        for k, v in filter_parameters(model,
                                      "encoder.layer.0.attention").items():
            self.assertFalse(v.requires_grad, k)

        state_dict_pre = copy.deepcopy(model.state_dict())

        self.trainings_run(model, tokenizer)

        for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(),
                                        model.state_dict().items()):
            if "mrpc" in k1:
                self.assertFalse(torch.equal(v1, v2))
            else:
                self.assertTrue(torch.equal(v1, v2))
Esempio n. 27
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser((ModelArguments, DataTrainingArguments,
                               TrainingArguments, MultiLingAdapterArguments))

    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args, adapter_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args, adapter_args = parser.parse_args_into_dataclasses(
        )

    if (os.path.exists(training_args.output_dir)
            and os.listdir(training_args.output_dir) and training_args.do_train
            and not training_args.overwrite_output_dir):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. "
            "Use --overwrite_output_dir to overcome.")

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
        if is_main_process(training_args.local_rank) else logging.WARN,
    )

    # Log on each process the small summary:
    logger.warning(
        f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}"
        +
        f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}"
    )
    # Set the verbosity to info of the Transformers logger (on main process only):
    if is_main_process(training_args.local_rank):
        transformers.utils.logging.set_verbosity_info()
    logger.info(f"Training/evaluation parameters {training_args}")

    # Set seed before initializing model.
    set_seed(training_args.seed)

    # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below)
    # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub).
    #
    # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the
    # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named
    # label if at least two columns are provided.
    #
    # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this
    # single column. You can easily tweak this behavior (see below)
    #
    # In distributed training, the load_dataset function guarantee that only one local process can concurrently
    # download the dataset.
    if data_args.task_name is not None:
        # Downloading and loading a dataset from the hub.
        datasets = load_dataset("glue", data_args.task_name)
    elif data_args.train_file.endswith(".csv"):
        # Loading a dataset from local csv files
        datasets = load_dataset("csv",
                                data_files={
                                    "train": data_args.train_file,
                                    "validation": data_args.validation_file
                                })
    else:
        # Loading a dataset from local json files
        datasets = load_dataset("json",
                                data_files={
                                    "train": data_args.train_file,
                                    "validation": data_args.validation_file
                                })
    # See more about loading any type of standard or custom dataset at
    # https://huggingface.co/docs/datasets/loading_datasets.html.

    # Labels
    label_list = None
    if data_args.task_name is not None:
        is_regression = data_args.task_name == "stsb"
        if not is_regression:
            label_list = datasets["train"].features["label"].names
            num_labels = len(label_list)
        else:
            num_labels = 1
    else:
        # Trying to have good defaults here, don't hesitate to tweak to your needs.
        is_regression = datasets["train"].features["label"].dtype in [
            "float32", "float64"
        ]
        if is_regression:
            num_labels = 1
        else:
            # A useful fast method:
            # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique
            label_list = datasets["train"].unique("label")
            label_list.sort()  # Let's sort it for determinism
            num_labels = len(label_list)

    # Load pretrained model and tokenizer
    #
    # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.
    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
        use_fast=model_args.use_fast_tokenizer,
    )
    # We use the AutoModelWithHeads class here for better adapter support.
    model = AutoModelWithHeads.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
    )
    model.add_classification_head(
        data_args.task_name or "glue",
        num_labels=num_labels,
        id2label={i: v
                  for i, v in enumerate(label_list)}
        if num_labels > 0 else None,
    )

    # Setup adapters
    if adapter_args.train_adapter:
        task_name = data_args.task_name or "glue"
        # check if adapter already exists, otherwise add it
        if task_name not in model.config.adapters:
            # resolve the adapter config
            adapter_config = AdapterConfig.load(
                adapter_args.adapter_config,
                non_linearity=adapter_args.adapter_non_linearity,
                reduction_factor=adapter_args.adapter_reduction_factor,
            )
            # load a pre-trained from Hub if specified
            if adapter_args.load_adapter:
                model.load_adapter(
                    adapter_args.load_adapter,
                    config=adapter_config,
                    load_as=task_name,
                )
            # otherwise, add a fresh adapter
            else:
                model.add_adapter(task_name, config=adapter_config)
        # optionally load a pre-trained language adapter
        if adapter_args.load_lang_adapter:
            # resolve the language adapter config
            lang_adapter_config = AdapterConfig.load(
                adapter_args.lang_adapter_config,
                non_linearity=adapter_args.lang_adapter_non_linearity,
                reduction_factor=adapter_args.lang_adapter_reduction_factor,
            )
            # load the language adapter from Hub
            lang_adapter_name = model.load_adapter(
                adapter_args.load_lang_adapter,
                config=lang_adapter_config,
                load_as=adapter_args.language,
            )
        else:
            lang_adapter_name = None
        # Freeze all model weights except of those of this adapter
        model.train_adapter([task_name])
        # Set the adapters to be used in every forward pass
        if lang_adapter_name:
            model.set_active_adapters([lang_adapter_name, task_name])
        else:
            model.set_active_adapters([task_name])
    else:
        if adapter_args.load_adapter or adapter_args.load_lang_adapter:
            raise ValueError(
                "Adapters can only be loaded in adapters training mode."
                "Use --train_adapter to enable adapter training")

    # Preprocessing the datasets
    if data_args.task_name is not None:
        sentence1_key, sentence2_key = task_to_keys[data_args.task_name]
    else:
        # Again, we try to have some nice defaults but don't hesitate to tweak to your use case.
        non_label_column_names = [
            name for name in datasets["train"].column_names if name != "label"
        ]
        if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names:
            sentence1_key, sentence2_key = "sentence1", "sentence2"
        else:
            if len(non_label_column_names) >= 2:
                sentence1_key, sentence2_key = non_label_column_names[:2]
            else:
                sentence1_key, sentence2_key = non_label_column_names[0], None

    # Padding strategy
    if data_args.pad_to_max_length:
        padding = "max_length"
        max_length = data_args.max_seq_length
    else:
        # We will pad later, dynamically at batch creation, to the max sequence length in each batch
        padding = False
        max_length = None

    # Some models have set the order of the labels to use, so let's make sure we do use it.
    label_to_id = None
    if (model.config.label2id !=
            PretrainedConfig(num_labels=num_labels).label2id
            and data_args.task_name is not None and is_regression):
        # Some have all caps in their config, some don't.
        label_name_to_id = {
            k.lower(): v
            for k, v in model.config.label2id.items()
        }
        if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)):
            label_to_id = {
                i: label_name_to_id[label_list[i]]
                for i in range(num_labels)
            }
        else:
            logger.warn(
                "Your model seems to have been trained with labels, but they don't match the dataset: ",
                f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}."
                "\nIgnoring the model labels as a result.",
            )
    elif data_args.task_name is None:
        label_to_id = {v: i for i, v in enumerate(label_list)}

    def preprocess_function(examples):
        # Tokenize the texts
        args = ((examples[sentence1_key], ) if sentence2_key is None else
                (examples[sentence1_key], examples[sentence2_key]))
        result = tokenizer(*args,
                           padding=padding,
                           max_length=max_length,
                           truncation=True)

        # Map labels to IDs (not necessary for GLUE tasks)
        if label_to_id is not None and "label" in examples:
            result["label"] = [label_to_id[l] for l in examples["label"]]
        return result

    datasets = datasets.map(preprocess_function,
                            batched=True,
                            load_from_cache_file=not data_args.overwrite_cache)

    train_dataset = datasets["train"]
    eval_dataset = datasets["validation_matched" if data_args.task_name ==
                            "mnli" else "validation"]
    if data_args.task_name is not None:
        test_dataset = datasets["test_matched" if data_args.task_name ==
                                "mnli" else "test"]

    # Log a few random samples from the training set:
    for index in random.sample(range(len(train_dataset)), 3):
        logger.info(
            f"Sample {index} of the training set: {train_dataset[index]}.")

    # Get the metric function
    if data_args.task_name is not None:
        metric = load_metric("glue", data_args.task_name)
    # TODO: When datasets metrics include regular accuracy, make an else here and remove special branch from
    # compute_metrics

    # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a
    # predictions and label_ids field) and has to return a dictionary string to float.
    def compute_metrics(p: EvalPrediction):
        preds = p.predictions[0] if isinstance(p.predictions,
                                               tuple) else p.predictions
        preds = np.squeeze(preds) if is_regression else np.argmax(preds,
                                                                  axis=1)
        if data_args.task_name is not None:
            result = metric.compute(predictions=preds, references=p.label_ids)
            if len(result) > 1:
                result["combined_score"] = np.mean(list(
                    result.values())).item()
            return result
        elif is_regression:
            return {"mse": ((preds - p.label_ids)**2).mean().item()}
        else:
            return {
                "accuracy":
                (preds == p.label_ids).astype(np.float32).mean().item()
            }

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset if training_args.do_eval else None,
        compute_metrics=compute_metrics,
        tokenizer=tokenizer,
        # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding.
        data_collator=default_data_collator
        if data_args.pad_to_max_length else None,
        do_save_full_model=not adapter_args.train_adapter,
        do_save_adapters=adapter_args.train_adapter,
    )

    # Training
    if training_args.do_train:
        trainer.train(model_path=model_args.model_name_or_path if os.path.
                      isdir(model_args.model_name_or_path) else None)
        trainer.save_model()  # Saves the tokenizer too for easy upload

    # Evaluation
    eval_results = {}
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        tasks = [data_args.task_name]
        eval_datasets = [eval_dataset]
        if data_args.task_name == "mnli":
            tasks.append("mnli-mm")
            eval_datasets.append(datasets["validation_mismatched"])

        for eval_dataset, task in zip(eval_datasets, tasks):
            eval_result = trainer.evaluate(eval_dataset=eval_dataset)

            output_eval_file = os.path.join(training_args.output_dir,
                                            f"eval_results_{task}.txt")
            if trainer.is_world_process_zero():
                with open(output_eval_file, "w") as writer:
                    logger.info(f"***** Eval results {task} *****")
                    for key, value in eval_result.items():
                        logger.info(f"  {key} = {value}")
                        writer.write(f"{key} = {value}\n")

            eval_results.update(eval_result)

    if training_args.do_predict:
        logger.info("*** Test ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        tasks = [data_args.task_name]
        test_datasets = [test_dataset]
        if data_args.task_name == "mnli":
            tasks.append("mnli-mm")
            test_datasets.append(datasets["test_mismatched"])

        for test_dataset, task in zip(test_datasets, tasks):
            # Removing the `label` columns because it contains -1 and Trainer won't like that.
            test_dataset.remove_columns_("label")
            predictions = trainer.predict(
                test_dataset=test_dataset).predictions
            predictions = np.squeeze(
                predictions) if is_regression else np.argmax(predictions,
                                                             axis=1)

            output_test_file = os.path.join(training_args.output_dir,
                                            f"test_results_{task}.txt")
            if trainer.is_world_process_zero():
                with open(output_test_file, "w") as writer:
                    logger.info(f"***** Test results {task} *****")
                    writer.write("index\tprediction\n")
                    for index, item in enumerate(predictions):
                        if is_regression:
                            writer.write(f"{index}\t{item:3.3f}\n")
                        else:
                            item = label_list[item]
                            writer.write(f"{index}\t{item}\n")
    return eval_results
    def test_reloading_prediction_head(self):
        tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
        data_args = GlueDataTrainingArguments(
            task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True
        )
        train_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="train")

        model = AutoModelWithHeads.from_pretrained("bert-base-uncased")

        model.add_classification_head("adapter", num_labels=3)
        model.add_classification_head("dummy", num_labels=2)

        # add the adapters to be fused
        model.add_adapter("adapter")
        model.add_adapter("additional_adapter")

        # setup fusion
        adapter_setup = Fuse("adapter", "additional_adapter")
        model.add_adapter_fusion(adapter_setup)
        model.train_adapter_fusion(adapter_setup)
        model.set_active_adapters(adapter_setup)
        self.assertEqual(adapter_setup, model.active_adapters)
        self.assertEqual("dummy", model.active_head)
        with TemporaryDirectory() as tempdir:
            training_args = TrainingArguments(
                output_dir=tempdir,
                do_train=True,
                learning_rate=0.1,
                logging_steps=1,
                max_steps=1,
                save_steps=1,
                remove_unused_columns=False,
            )
            trainer = AdapterTrainer(
                model=model,
                args=training_args,
                train_dataset=train_dataset,
            )

            trainer.train()
            # create second model that should resume the training of the first
            model_resume = AutoModelWithHeads.from_pretrained("bert-base-uncased")

            model_resume.add_classification_head("adapter", num_labels=3)
            model_resume.add_classification_head("dummy", num_labels=2)
            model_resume.add_adapter("adapter")
            model_resume.add_adapter("additional_adapter")
            # setup fusion
            adapter_setup = Fuse("adapter", "additional_adapter")
            model_resume.add_adapter_fusion(adapter_setup)
            model_resume.train_adapter_fusion(adapter_setup)
            model_resume.set_active_adapters(adapter_setup)
            trainer_resume = AdapterTrainer(
                model=model_resume,
                args=TrainingArguments(do_train=True, max_steps=1, output_dir=tempdir),
                train_dataset=train_dataset,
            )
            trainer_resume.train(resume_from_checkpoint=True)

            self.assertEqual("dummy", model.active_head)
            self.assertEqual(model.config.adapters.adapters, model_resume.config.adapters.adapters)

            for ((k1, v1), (k2, v2)) in zip(
                trainer.model.state_dict().items(), trainer_resume.model.state_dict().items()
            ):
                self.assertEqual(k1, k2)
                if "adapter" in k1 or "dummy" in k1:
                    self.assertTrue(torch.equal(v1, v2), k1)
Esempio n. 29
0
for split in ['train', 'val', 'test']:
    d = {"text":dataset_dict[f'{split}_text'], 'labels':dataset_dict[f'{split}_labels']}
    if split == 'val':
        split = 'validation' #name mismatch with xlm-t dataset and library datasets
    dataset[split] = Dataset.from_dict(d)
    

# --- MODEL ---

config = AutoConfig.from_pretrained(
    MODEL,
    num_labels=NUM_LABELS,
)
model = AutoModelWithHeads.from_pretrained(
    MODEL,
    config=config,
)

# Add a new adapter
adapter_name = f"adapter_{UNIQUE_NAME}" 
#adapter_name = f"xlm-t-sentiment"
model.add_adapter(adapter_name, AdapterType.text_task)

# Add a matching classification head
model.add_classification_head(
    adapter_name,
    num_labels=NUM_LABELS,
    id2label={ 0: "Neg", 1:"Neu", 2:"Pos"}
  )

# Activate the adapter
Esempio n. 30
0
def main():
    # See all possible arguments in src/transformers/training_args.py
    # or by passing the --help flag to this script.
    # We now keep distinct sets of args, for a cleaner separation of concerns.

    parser = HfArgumentParser((ModelArguments, DataTrainingArguments,
                               TrainingArguments, MultiLingAdapterArguments))

    if len(sys.argv) == 2 and sys.argv[1].endswith(".json"):
        # If we pass only one argument to the script and it's the path to a json file,
        # let's parse it to get our arguments.
        model_args, data_args, training_args, adapter_args = parser.parse_json_file(
            json_file=os.path.abspath(sys.argv[1]))
    else:
        model_args, data_args, training_args, adapter_args = parser.parse_args_into_dataclasses(
        )

    if (os.path.exists(training_args.output_dir)
            and os.listdir(training_args.output_dir) and training_args.do_train
            and not training_args.overwrite_output_dir):
        raise ValueError(
            f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome."
        )

    # Setup logging
    logging.basicConfig(
        format="%(asctime)s - %(levelname)s - %(name)s -   %(message)s",
        datefmt="%m/%d/%Y %H:%M:%S",
        level=logging.INFO
        if training_args.local_rank in [-1, 0] else logging.WARN,
    )
    logger.warning(
        "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s",
        training_args.local_rank,
        training_args.device,
        training_args.n_gpu,
        bool(training_args.local_rank != -1),
        training_args.fp16,
    )
    logger.info("Training/evaluation parameters %s", training_args)

    # Set seed
    set_seed(training_args.seed)

    try:
        num_labels = glue_tasks_num_labels[data_args.task_name]
        output_mode = glue_output_modes[data_args.task_name]
    except KeyError:
        raise ValueError("Task not found: %s" % (data_args.task_name))

    # Load pretrained model and tokenizer
    #
    # Distributed training:
    # The .from_pretrained methods guarantee that only one local process can concurrently
    # download model & vocab.

    config = AutoConfig.from_pretrained(
        model_args.config_name
        if model_args.config_name else model_args.model_name_or_path,
        num_labels=num_labels,
        finetuning_task=data_args.task_name,
        cache_dir=model_args.cache_dir,
    )
    tokenizer = AutoTokenizer.from_pretrained(
        model_args.tokenizer_name
        if model_args.tokenizer_name else model_args.model_name_or_path,
        cache_dir=model_args.cache_dir,
    )
    model = AutoModelWithHeads.from_pretrained(
        model_args.model_name_or_path,
        from_tf=bool(".ckpt" in model_args.model_name_or_path),
        config=config,
        cache_dir=model_args.cache_dir,
    )
    model.add_classification_head(data_args.task_name, num_labels=num_labels)

    # Setup adapters
    if adapter_args.train_adapter:
        task_name = data_args.task_name
        # check if adapter already exists, otherwise add it
        if task_name not in model.config.adapters.adapter_list(
                AdapterType.text_task):
            # resolve the adapter config
            adapter_config = AdapterConfig.load(
                adapter_args.adapter_config,
                non_linearity=adapter_args.adapter_non_linearity,
                reduction_factor=adapter_args.adapter_reduction_factor,
            )
            # load a pre-trained from Hub if specified
            if adapter_args.load_adapter:
                model.load_adapter(
                    adapter_args.load_adapter,
                    AdapterType.text_task,
                    config=adapter_config,
                    load_as=task_name,
                )
            # otherwise, add a fresh adapter
            else:
                model.add_adapter(task_name,
                                  AdapterType.text_task,
                                  config=adapter_config)
        # optionally load a pre-trained language adapter
        if adapter_args.load_lang_adapter:
            # resolve the language adapter config
            lang_adapter_config = AdapterConfig.load(
                adapter_args.lang_adapter_config,
                non_linearity=adapter_args.lang_adapter_non_linearity,
                reduction_factor=adapter_args.lang_adapter_reduction_factor,
            )
            # load the language adapter from Hub
            lang_adapter_name = model.load_adapter(
                adapter_args.load_lang_adapter,
                AdapterType.text_lang,
                config=lang_adapter_config,
                load_as=adapter_args.language,
            )
        else:
            lang_adapter_name = None
        # Freeze all model weights except of those of this adapter
        model.train_adapter([task_name])
        # Set the adapters to be used in every forward pass
        if lang_adapter_name:
            model.set_active_adapters([lang_adapter_name, task_name])
        else:
            model.set_active_adapters([task_name])

    # Get datasets
    train_dataset = GlueDataset(
        data_args, tokenizer=tokenizer) if training_args.do_train else None
    eval_dataset = GlueDataset(data_args, tokenizer=tokenizer,
                               mode="dev") if training_args.do_eval else None
    test_dataset = GlueDataset(
        data_args, tokenizer=tokenizer,
        mode="test") if training_args.do_predict else None

    def compute_metrics(p: EvalPrediction) -> Dict:
        if output_mode == "classification":
            preds = np.argmax(p.predictions, axis=1)
        elif output_mode == "regression":
            preds = np.squeeze(p.predictions)
        return glue_compute_metrics(data_args.task_name, preds, p.label_ids)

    # Initialize our Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=eval_dataset,
        compute_metrics=compute_metrics,
        do_save_full_model=not adapter_args.train_adapter,
        do_save_adapters=adapter_args.train_adapter,
    )

    # Training
    if training_args.do_train:
        trainer.train(model_path=model_args.model_name_or_path if os.path.
                      isdir(model_args.model_name_or_path) else None)
        trainer.save_model()
        # For convenience, we also re-save the tokenizer to the same directory,
        # so that you can share your model easily on huggingface.co/models =)
        if trainer.is_world_master():
            tokenizer.save_pretrained(training_args.output_dir)

    # Evaluation
    eval_results = {}
    if training_args.do_eval:
        logger.info("*** Evaluate ***")

        # Loop to handle MNLI double evaluation (matched, mis-matched)
        eval_datasets = [eval_dataset]
        if data_args.task_name == "mnli":
            mnli_mm_data_args = dataclasses.replace(data_args,
                                                    task_name="mnli-mm")
            eval_datasets.append(
                GlueDataset(mnli_mm_data_args, tokenizer=tokenizer,
                            mode="dev"))

        for eval_dataset in eval_datasets:
            eval_result = trainer.evaluate(eval_dataset=eval_dataset)

            output_eval_file = os.path.join(
                training_args.output_dir,
                f"eval_results_{eval_dataset.args.task_name}.txt")
            if trainer.is_world_master():
                with open(output_eval_file, "w") as writer:
                    logger.info("***** Eval results {} *****".format(
                        eval_dataset.args.task_name))
                    for key, value in eval_result.items():
                        logger.info("  %s = %s", key, value)
                        writer.write("%s = %s\n" % (key, value))

            eval_results.update(eval_result)

    if training_args.do_predict:
        logging.info("*** Test ***")
        test_datasets = [test_dataset]
        if data_args.task_name == "mnli":
            mnli_mm_data_args = dataclasses.replace(data_args,
                                                    task_name="mnli-mm")
            test_datasets.append(
                GlueDataset(mnli_mm_data_args,
                            tokenizer=tokenizer,
                            mode="test"))

        for test_dataset in test_datasets:
            predictions = trainer.predict(
                test_dataset=test_dataset).predictions
            if output_mode == "classification":
                predictions = np.argmax(predictions, axis=1)

            output_test_file = os.path.join(
                training_args.output_dir,
                f"test_results_{test_dataset.args.task_name}.txt")
            if trainer.is_world_master():
                with open(output_test_file, "w") as writer:
                    logger.info("***** Test results {} *****".format(
                        test_dataset.args.task_name))
                    writer.write("index\tprediction\n")
                    for index, item in enumerate(predictions):
                        if output_mode == "regression":
                            writer.write("%d\t%3.3f\n" % (index, item))
                        else:
                            item = test_dataset.get_labels()[item]
                            writer.write("%d\t%s\n" % (index, item))
    return eval_results