def test_parallel(self): self.model.set_active_adapters(Parallel("a", "b", "c", "d")) inputs = {} inputs["input_ids"] = ids_tensor((1, 128), 1000) logits = self.model(**inputs).logits self.assertEqual(logits.shape, (4, 2))
def test_parallel_inference_with_heads(self): model = AutoModelWithHeads.from_config(self.config()) model.add_adapter("a") model.add_adapter("b") model.add_classification_head("a", num_labels=2) model.add_classification_head("b", num_labels=3) model.eval() inputs = {} inputs["attention_mask"] = torch.randint(0, 2, size=(2, 128)) inputs["input_ids"] = self.get_input_samples((2, 128), config=model.config) # for reference, pass through single adapters model.active_adapters = "a" model.active_head = "a" outputs_a = model(**inputs) model.active_adapters = "b" model.active_head = "b" outputs_b = model(**inputs) model.active_adapters = Parallel("a", "b") # active_adapters should set parallel heads too self.assertEqual(model.active_head, ["a", "b"]) outputs = model(**inputs) self.assertEqual(len(outputs), 2) self.assertEqual(outputs[0][0].shape, (2, 2)) self.assertEqual(outputs[1][0].shape, (2, 3)) self.assertTrue(torch.allclose(outputs[0][0], outputs_a[0])) self.assertTrue(torch.allclose(outputs[1][0], outputs_b[0]))
def test_parallel_training(self): tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelWithHeads.from_config(self.config()) model.add_adapter("mrpc1") model.add_adapter("mrpc2") self.add_head(model, "mrpc1", num_labels=2) self.add_head(model, "mrpc2", num_labels=3) model.active_adapters = Parallel("mrpc1", "mrpc2") model.train_adapter(Parallel("mrpc1", "mrpc2")) # model.eval() # all weights of the adapter should be activated for k, v in filter_parameters(model, "adapters.mrpc1.").items(): self.assertTrue(v.requires_grad, k) # all weights of the adapter not used for training should be freezed for k, v in filter_parameters(model, "adapters.mrpc2.").items(): self.assertTrue(v.requires_grad, k) # weights of the model should be freezed (check on some examples) for k, v in filter_parameters(model, "encoder.layer.0.attention").items(): self.assertFalse(v.requires_grad, k) state_dict_pre = copy.deepcopy(model.state_dict()) train_dataset = self.dataset(tokenizer) training_args = TrainingArguments( output_dir="./examples", do_train=True, learning_rate=0.1, max_steps=10, no_cuda=True ) # evaluate trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, ) trainer.train() for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(), model.state_dict().items()): if "mrpc" in k1: self.assertFalse(torch.equal(v1, v2), k1) else: self.assertTrue(torch.equal(v1, v2))
def test_parallel_training_single_forward_pass(self): model = AutoModelWithHeads.from_config(self.config()) model.eval() a1, a2 = self.create_twin_adapters(model, "a") b1, b2 = self.create_twin_adapters(model, "b") state_dict = model.state_dict() for k, v in state_dict.items(): if a1 in k: self.assertTrue(torch.equal(v, state_dict[k.replace(a1, a2)])) if b1 in k: self.assertTrue(torch.equal(v, state_dict[k.replace(b1, b2)])) input_data = self.get_input_samples((3, 128), config=model.config) if isinstance(model, T5ModelWithHeads): input_data["labels"] = torch.randint(0, 2, (3, 128)) else: input_data["labels"] = torch.randint(0, 2, (3, 1)) outputs = [] for adapter in [a1, b1]: model.active_head = adapter model.set_active_adapters(adapter) model.train_adapter(adapter) model.eval() outputs.append(model(**input_data)) model.set_active_adapters(Parallel(a2, b2)) model.train_adapter((Parallel(a2, b2))) model.eval() parallel_outputs = model(**input_data) for out1, out2 in zip(outputs, parallel_outputs.head_outputs): self.assertTrue(torch.allclose(out1["loss"], out2["loss"])) self.assertTrue(torch.allclose(out1["logits"], out2["logits"], atol=1e-5))
def test_parallel_training_equivalent_to_single_adapters(self): model = AutoModelWithHeads.from_config(self.config()) model.eval() a1, a2 = self.create_twin_adapters(model, "a") b1, b2 = self.create_twin_adapters(model, "b") dataset = [] for i in range(3): input_data = self.get_input_samples((3, 128), config=model.config) if isinstance(model, T5ModelWithHeads): input_data["labels"] = torch.randint(0, 2, (3, 128)) else: input_data["labels"] = torch.randint(0, 2, (3, 1)) dataset.append(input_data) for adapter in [a1, b1]: model.active_head = adapter model.set_active_adapters(adapter) model.train_adapter(adapter) model.eval() model = self.train_model(model, dataset) model.set_active_adapters(Parallel(a2, b2)) model.train_adapter((Parallel(a2, b2))) model.eval() model = self.train_model(model, dataset) state_dict = model.state_dict() for k, v in state_dict.items(): if a1 in k: self.assertTrue(torch.allclose(v, state_dict[k.replace(a1, a2)], atol=1e-5)) if b1 in k: self.assertTrue(torch.allclose(v, state_dict[k.replace(b1, b2)], atol=1e-5))
def test_parallel_inference_with_wrong_number_of_heads(self): model = AutoModelWithHeads.from_config(self.config()) model.eval() model.add_adapter("a") model.add_adapter("b") self.add_head(model, "a", num_labels=2) inputs = self.get_input_samples((2, 128), config=model.config) model.active_adapters = Parallel("a", "b") model.active_head = ["a"] with self.assertRaises(ValueError): model(**inputs) model.active_head = "a" with self.assertRaises(ValueError): model(**inputs)