コード例 #1
0
    def test_parallel(self):
        self.model.set_active_adapters(Parallel("a", "b", "c", "d"))

        inputs = {}
        inputs["input_ids"] = ids_tensor((1, 128), 1000)
        logits = self.model(**inputs).logits
        self.assertEqual(logits.shape, (4, 2))
コード例 #2
0
    def test_parallel_inference_with_heads(self):
        model = AutoModelWithHeads.from_config(self.config())

        model.add_adapter("a")
        model.add_adapter("b")
        model.add_classification_head("a", num_labels=2)
        model.add_classification_head("b", num_labels=3)
        model.eval()

        inputs = {}
        inputs["attention_mask"] = torch.randint(0, 2, size=(2, 128))
        inputs["input_ids"] = self.get_input_samples((2, 128),
                                                     config=model.config)

        # for reference, pass through single adapters
        model.active_adapters = "a"
        model.active_head = "a"
        outputs_a = model(**inputs)
        model.active_adapters = "b"
        model.active_head = "b"
        outputs_b = model(**inputs)

        model.active_adapters = Parallel("a", "b")
        # active_adapters should set parallel heads too
        self.assertEqual(model.active_head, ["a", "b"])
        outputs = model(**inputs)

        self.assertEqual(len(outputs), 2)
        self.assertEqual(outputs[0][0].shape, (2, 2))
        self.assertEqual(outputs[1][0].shape, (2, 3))
        self.assertTrue(torch.allclose(outputs[0][0], outputs_a[0]))
        self.assertTrue(torch.allclose(outputs[1][0], outputs_b[0]))
コード例 #3
0
    def test_parallel_training(self):
        tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False)
        if tokenizer.pad_token is None:
            tokenizer.pad_token = tokenizer.eos_token
        model = AutoModelWithHeads.from_config(self.config())

        model.add_adapter("mrpc1")
        model.add_adapter("mrpc2")
        self.add_head(model, "mrpc1", num_labels=2)
        self.add_head(model, "mrpc2", num_labels=3)
        model.active_adapters = Parallel("mrpc1", "mrpc2")
        model.train_adapter(Parallel("mrpc1", "mrpc2"))
        # model.eval()

        # all weights of the adapter should be activated
        for k, v in filter_parameters(model, "adapters.mrpc1.").items():
            self.assertTrue(v.requires_grad, k)
        # all weights of the adapter not used for training should be freezed
        for k, v in filter_parameters(model, "adapters.mrpc2.").items():
            self.assertTrue(v.requires_grad, k)
        # weights of the model should be freezed (check on some examples)
        for k, v in filter_parameters(model, "encoder.layer.0.attention").items():
            self.assertFalse(v.requires_grad, k)

        state_dict_pre = copy.deepcopy(model.state_dict())

        train_dataset = self.dataset(tokenizer)
        training_args = TrainingArguments(
            output_dir="./examples", do_train=True, learning_rate=0.1, max_steps=10, no_cuda=True
        )

        # evaluate
        trainer = Trainer(
            model=model,
            args=training_args,
            train_dataset=train_dataset,
        )
        trainer.train()

        for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(), model.state_dict().items()):
            if "mrpc" in k1:
                self.assertFalse(torch.equal(v1, v2), k1)
            else:
                self.assertTrue(torch.equal(v1, v2))
コード例 #4
0
    def test_parallel_training_single_forward_pass(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        a1, a2 = self.create_twin_adapters(model, "a")
        b1, b2 = self.create_twin_adapters(model, "b")

        state_dict = model.state_dict()
        for k, v in state_dict.items():
            if a1 in k:
                self.assertTrue(torch.equal(v, state_dict[k.replace(a1, a2)]))
            if b1 in k:
                self.assertTrue(torch.equal(v, state_dict[k.replace(b1, b2)]))

        input_data = self.get_input_samples((3, 128), config=model.config)
        if isinstance(model, T5ModelWithHeads):
            input_data["labels"] = torch.randint(0, 2, (3, 128))
        else:
            input_data["labels"] = torch.randint(0, 2, (3, 1))

        outputs = []
        for adapter in [a1, b1]:
            model.active_head = adapter
            model.set_active_adapters(adapter)
            model.train_adapter(adapter)
            model.eval()
            outputs.append(model(**input_data))

        model.set_active_adapters(Parallel(a2, b2))
        model.train_adapter((Parallel(a2, b2)))
        model.eval()

        parallel_outputs = model(**input_data)

        for out1, out2 in zip(outputs, parallel_outputs.head_outputs):
            self.assertTrue(torch.allclose(out1["loss"], out2["loss"]))
            self.assertTrue(torch.allclose(out1["logits"], out2["logits"], atol=1e-5))
コード例 #5
0
    def test_parallel_training_equivalent_to_single_adapters(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        a1, a2 = self.create_twin_adapters(model, "a")
        b1, b2 = self.create_twin_adapters(model, "b")

        dataset = []
        for i in range(3):
            input_data = self.get_input_samples((3, 128), config=model.config)
            if isinstance(model, T5ModelWithHeads):
                input_data["labels"] = torch.randint(0, 2, (3, 128))
            else:
                input_data["labels"] = torch.randint(0, 2, (3, 1))
            dataset.append(input_data)

        for adapter in [a1, b1]:
            model.active_head = adapter
            model.set_active_adapters(adapter)
            model.train_adapter(adapter)
            model.eval()

            model = self.train_model(model, dataset)

        model.set_active_adapters(Parallel(a2, b2))
        model.train_adapter((Parallel(a2, b2)))
        model.eval()

        model = self.train_model(model, dataset)

        state_dict = model.state_dict()
        for k, v in state_dict.items():
            if a1 in k:
                self.assertTrue(torch.allclose(v, state_dict[k.replace(a1, a2)], atol=1e-5))
            if b1 in k:
                self.assertTrue(torch.allclose(v, state_dict[k.replace(b1, b2)], atol=1e-5))
コード例 #6
0
    def test_parallel_inference_with_wrong_number_of_heads(self):
        model = AutoModelWithHeads.from_config(self.config())
        model.eval()

        model.add_adapter("a")
        model.add_adapter("b")
        self.add_head(model, "a", num_labels=2)

        inputs = self.get_input_samples((2, 128), config=model.config)

        model.active_adapters = Parallel("a", "b")
        model.active_head = ["a"]
        with self.assertRaises(ValueError):
            model(**inputs)

        model.active_head = "a"
        with self.assertRaises(ValueError):
            model(**inputs)