def test_save_load_custom_head(self): model_name = "bert-base-uncased" model_config = AutoConfig.from_pretrained( model_name, custom_heads={"tag": CustomHead}) model1 = AutoModelWithHeads.from_pretrained(model_name, config=model_config) model2 = AutoModelWithHeads.from_pretrained(model_name, config=model_config) config = { "head_type": "tag", "num_labels": 3, "layers": 2, "activation_function": "tanh" } model1.add_custom_head("custom_head", config) with tempfile.TemporaryDirectory() as temp_dir: model1.save_head(temp_dir, "custom_head") model2.load_head(temp_dir) model1.eval() model2.eval() in_data = ids_tensor((1, 128), 1000) output1 = model1(in_data) output2 = model2(in_data) self.assertEqual(output1[0].size(), output2[0].size()) state1 = model1.config.prediction_heads["custom_head"].state_dict() state2 = model2.config.prediction_heads["custom_head"].state_dict() for ((k1, v1), (k2, v2)) in zip(state1.items(), state2.items()): self.assertTrue(torch.equal(v1, v2))
def _load_pipeline_instance(pipeline_class, adapter_id): adapter_info = get_adapter_info(adapter_id, source="hf") if adapter_info is None: raise ValueError(f"Adapter with id '{adapter_id}' not available.") tokenizer = AutoTokenizer.from_pretrained(adapter_info.model_name) model = AutoModelWithHeads.from_pretrained(adapter_info.model_name) model.load_adapter(adapter_id, source="hf", set_active=True) return pipeline_class(model=model, tokenizer=tokenizer)
def run_test(self, static_model, input_shape=None, label_dict=None): flex_model = AutoModelWithHeads.from_pretrained( None, config=self.config(), state_dict=static_model.state_dict()) static_model.eval() flex_model.eval() if (static_model.base_model.__class__ != flex_model.base_model.__class__ and not static_model.base_model == static_model): self.skipTest("Skipping as base model classes are different.") with tempfile.TemporaryDirectory() as temp_dir: static_model.save_head(temp_dir) loading_info = {} flex_model.load_head(temp_dir, load_as="test", loading_info=loading_info) self.assertEqual( 0, len(loading_info["missing_keys"]), "Missing keys: {}".format(", ".join(loading_info["missing_keys"]))) # We don't need to convert some of the weights, so remove them for the check unexpected_keys = loading_info["unexpected_keys"] if static_model._keys_to_ignore_on_load_missing is not None: for pat in static_model._keys_to_ignore_on_load_missing: unexpected_keys = [ k for k in unexpected_keys if re.search(pat, k) is None ] # HACK for bert-based models if isinstance(static_model, BertPreTrainedModel): unexpected_keys = [ k for k in unexpected_keys if "cls.predictions.bias" not in k ] elif isinstance(static_model, RobertaPreTrainedModel): unexpected_keys = [ k for k in unexpected_keys if "lm_head.bias" not in k ] self.assertEqual( 0, len(unexpected_keys), "Unexpected keys: {}".format(", ".join(unexpected_keys))) # adapter and head were loaded self.assertIn("test", flex_model.heads) # check equal output input_shape = input_shape or (self.batch_size, self.seq_length) in_data = self.get_input_samples(input_shape, config=flex_model.config) if label_dict: for k, v in label_dict.items(): in_data[k] = v output1 = static_model(**in_data) output2 = flex_model(**in_data) self.assertTrue(torch.allclose(output1.loss, output2.loss)) self.assertTrue(torch.allclose( output1[1], output2[1])) # it's not called "logits" for all classes
def test_custom_head_from_model_config(self): model_name = "bert-base-uncased" model_config = AutoConfig.from_pretrained(model_name, custom_heads={"tag": CustomHead}) model = AutoModelWithHeads.from_pretrained(model_name, config=model_config) config = {"head_type": "tag", "num_labels": 3, "layers": 2, "activation_function": "tanh"} model.add_custom_head("custom_head", config) model.eval() in_data = ids_tensor((1, 128), 1000) output1 = model(in_data) model.add_tagging_head("tagging_head", num_labels=3, layers=2) output2 = model(in_data) self.assertEqual(output1[0].size(), output2[0].size())
def test_load_full_model(self): model = AutoModelWithHeads.from_config(self.config()) model.add_classification_head("dummy", layers=1) true_config = model.get_prediction_heads_config() with tempfile.TemporaryDirectory() as temp_dir: # save model.save_pretrained(temp_dir) # reload model = AutoModelWithHeads.from_pretrained(temp_dir) self.assertIn("dummy", model.heads) self.assertDictEqual(true_config, model.get_prediction_heads_config())
def test_model_with_heads_tagging_head_labels(self): model = AutoModelWithHeads.from_pretrained(self.model_name, config=self.config) model.add_tagging_head("test_head", num_labels=len(self.labels), id2label=self.label_map) with TemporaryDirectory() as temp_dir: model.save_head(temp_dir, "test_head") model.load_head(temp_dir) # this is just loaded to test whether loading an adapter changes the label information model.load_adapter("sst-2", "text_task") self.assertEqual(self.labels, model.get_labels()) self.assertDictEqual(self.label_map, model.get_labels_dict())
def test_train_single_adapter(self): for model_name in self.model_names: with self.subTest(model_name=model_name): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelWithHeads.from_pretrained(model_name) # add two adapters: one will be trained and the other should be frozen model.add_adapter("mrpc", "text_task") model.add_adapter("dummy", "text_task") model.add_classification_head("mrpc") self.assertIn("mrpc", model.config.adapters.adapters) self.assertIn("dummy", model.config.adapters.adapters) # train the mrpc adapter -> should be activated & unfreezed model.train_adapter("mrpc") self.assertEqual([["mrpc"]], model.active_adapters) # all weights of the adapter should be activated for k, v in filter_parameters(model, "text_task_adapters.mrpc").items(): self.assertTrue(v.requires_grad, k) # all weights of the adapter not used for training should be freezed for k, v in filter_parameters(model, "text_task_adapters.dummy").items(): self.assertFalse(v.requires_grad, k) # weights of the model should be freezed (check on some examples) for k, v in filter_parameters(model, "encoder.layer.0.attention").items(): self.assertFalse(v.requires_grad, k) state_dict_pre = copy.deepcopy(model.state_dict()) # setup dataset data_args = GlueDataTrainingArguments( task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True ) train_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="train") training_args = TrainingArguments( output_dir="./examples", do_train=True, learning_rate=0.1, max_steps=5, no_cuda=True ) # evaluate trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset,) trainer.train() for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(), model.state_dict().items()): if "mrpc" in k1: self.assertFalse(torch.equal(v1, v2)) else: self.assertTrue(torch.equal(v1, v2))
def test_model_with_heads_multiple_heads(self): model = AutoModelWithHeads.from_pretrained(self.model_name, config=self.config) model.add_tagging_head("test_head", num_labels=len(self.labels), id2label=self.label_map) model.add_classification_head("second_head", num_labels=5) with TemporaryDirectory() as temp_dir: model.save_head(temp_dir + "/test_head", "test_head") model.load_head(temp_dir + "/test_head") model.save_head(temp_dir + "/second_head", "second_head") model.load_head(temp_dir + "/second_head") model.load_adapter("sst-2", "text_task") self.assertEqual(model.get_labels("test_head"), self.labels) self.assertEqual(model.get_labels_dict("test_head"), self.label_map)
def test_general(self): tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") data_args = GlueDataTrainingArguments( task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True ) train_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="train") model = AutoModelWithHeads.from_pretrained("bert-base-uncased") model.add_classification_head("task", num_labels=3) # add the adapters to be fused model.add_adapter("task") model.add_adapter("additional_adapter") model.train_adapter("task") self.assertEqual("task", model.active_head) self.assertEqual(Stack("task"), model.active_adapters) with TemporaryDirectory() as tempdir: training_args = TrainingArguments( output_dir=tempdir, do_train=True, learning_rate=0.1, logging_steps=1, max_steps=1, save_steps=1, remove_unused_columns=False, ) trainer = AdapterTrainer( model=model, args=training_args, train_dataset=train_dataset, ) trainer.train() # Check that adapters are actually saved but the full model is not files_dir_checkpoint = [file_or_dir for file_or_dir in os.listdir(os.path.join(tempdir, "checkpoint-1"))] self.assertTrue("task" in files_dir_checkpoint) self.assertTrue("additional_adapter" in files_dir_checkpoint) # Check that full model weights are not stored self.assertFalse("pytorch_model.bin" in files_dir_checkpoint) # this should always be false in the adapter trainer self.assertFalse(trainer.args.remove_unused_columns) self.assertEqual("task", model.active_head) self.assertEqual(Stack("task"), model.active_adapters)
def test_multiple_heads_label(self): model = AutoModelWithHeads.from_pretrained(self.model_name, config=self.config) model.add_tagging_head("test_head", num_labels=len(self.labels), id2label=self.label_map) with TemporaryDirectory() as temp_dir: model.save_head(temp_dir, "test_head") model.load_head(temp_dir) # adapter loaded for testing whether it changes label information model.load_adapter("sst-2", "text_task") model.add_classification_head("classification_head") default_label, default_label_dict = get_default(2) self.assertEqual(model.get_labels("classification_head"), default_label) self.assertEqual(model.get_labels_dict("classification_head"), default_label_dict)
def test_reload_static_to_flex_head(self): if not hasattr(MODEL_WITH_HEADS_MAPPING[self.config_class], "add_classification_head"): self.skipTest("No classification head available") static_head_model = AutoModelForSequenceClassification.from_config( self.config()) flex_head_model = AutoModelWithHeads.from_pretrained( None, config=self.config(), state_dict=static_head_model.state_dict()) static_head_model.eval() flex_head_model.eval() static_head_model.add_adapter("test") with tempfile.TemporaryDirectory() as temp_dir: static_head_model.save_adapter(temp_dir, "test") loading_info = {} flex_head_model.load_adapter(temp_dir, loading_info=loading_info) # Load the adapter a second time to make sure our conversion script doesn't break anything flex_head_model.load_adapter(temp_dir, loading_info=loading_info) self.assertEqual(0, len(loading_info["missing_keys"])) self.assertEqual(0, len(loading_info["unexpected_keys"])) # adapter and head were loaded self.assertIn("test", flex_head_model.config.adapters) self.assertIn("test", flex_head_model.heads) # check equal output in_data = self.get_input_samples((1, 128), config=flex_head_model.config) output1 = static_head_model(**in_data, adapter_names=["test"]) output2 = flex_head_model(**in_data, adapter_names=["test"], head="test") self.assertTrue( torch.all(torch.isclose(output1.logits, output2.logits)))
def _build_model(self): model = AutoModelWithHeads.from_pretrained(self._base_model_name) self._adapter_internal_name = model.load_adapter(self._adapter_name, "text_task", config=self._config) return model
def test_reloading_prediction_head(self): tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased") data_args = GlueDataTrainingArguments( task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True ) train_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="train") model = AutoModelWithHeads.from_pretrained("bert-base-uncased") model.add_classification_head("adapter", num_labels=3) model.add_classification_head("dummy", num_labels=2) # add the adapters to be fused model.add_adapter("adapter") model.add_adapter("additional_adapter") # setup fusion adapter_setup = Fuse("adapter", "additional_adapter") model.add_adapter_fusion(adapter_setup) model.train_adapter_fusion(adapter_setup) model.set_active_adapters(adapter_setup) self.assertEqual(adapter_setup, model.active_adapters) self.assertEqual("dummy", model.active_head) with TemporaryDirectory() as tempdir: training_args = TrainingArguments( output_dir=tempdir, do_train=True, learning_rate=0.1, logging_steps=1, max_steps=1, save_steps=1, remove_unused_columns=False, ) trainer = AdapterTrainer( model=model, args=training_args, train_dataset=train_dataset, ) trainer.train() # create second model that should resume the training of the first model_resume = AutoModelWithHeads.from_pretrained("bert-base-uncased") model_resume.add_classification_head("adapter", num_labels=3) model_resume.add_classification_head("dummy", num_labels=2) model_resume.add_adapter("adapter") model_resume.add_adapter("additional_adapter") # setup fusion adapter_setup = Fuse("adapter", "additional_adapter") model_resume.add_adapter_fusion(adapter_setup) model_resume.train_adapter_fusion(adapter_setup) model_resume.set_active_adapters(adapter_setup) trainer_resume = AdapterTrainer( model=model_resume, args=TrainingArguments(do_train=True, max_steps=1, output_dir=tempdir), train_dataset=train_dataset, ) trainer_resume.train(resume_from_checkpoint=True) self.assertEqual("dummy", model.active_head) self.assertEqual(model.config.adapters.adapters, model_resume.config.adapters.adapters) for ((k1, v1), (k2, v2)) in zip( trainer.model.state_dict().items(), trainer_resume.model.state_dict().items() ): self.assertEqual(k1, k2) if "adapter" in k1 or "dummy" in k1: self.assertTrue(torch.equal(v1, v2), k1)
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, MultiLingAdapterArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args, adapter_args = parser.parse_json_file( json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args, adapter_args = parser.parse_args_into_dataclasses( ) if (os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir): raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." ) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", training_args.local_rank, training_args.device, training_args.n_gpu, bool(training_args.local_rank != -1), training_args.fp16, ) logger.info("Training/evaluation parameters %s", training_args) # Set seed set_seed(training_args.seed) try: num_labels = glue_tasks_num_labels[data_args.task_name] output_mode = glue_output_modes[data_args.task_name] except KeyError: raise ValueError("Task not found: %s" % (data_args.task_name)) # Load pretrained model and tokenizer # # Distributed training: # The .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. config = AutoConfig.from_pretrained( model_args.config_name if model_args.config_name else model_args.model_name_or_path, num_labels=num_labels, finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, ) model = AutoModelWithHeads.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, ) model.add_classification_head(data_args.task_name, num_labels=num_labels) # Setup adapters if adapter_args.train_adapter: task_name = data_args.task_name # check if adapter already exists, otherwise add it if task_name not in model.config.adapters.adapter_list( AdapterType.text_task): # resolve the adapter config adapter_config = AdapterConfig.load( adapter_args.adapter_config, non_linearity=adapter_args.adapter_non_linearity, reduction_factor=adapter_args.adapter_reduction_factor, ) # load a pre-trained from Hub if specified if adapter_args.load_adapter: model.load_adapter( adapter_args.load_adapter, AdapterType.text_task, config=adapter_config, load_as=task_name, ) # otherwise, add a fresh adapter else: model.add_adapter(task_name, AdapterType.text_task, config=adapter_config) # optionally load a pre-trained language adapter if adapter_args.load_lang_adapter: # resolve the language adapter config lang_adapter_config = AdapterConfig.load( adapter_args.lang_adapter_config, non_linearity=adapter_args.lang_adapter_non_linearity, reduction_factor=adapter_args.lang_adapter_reduction_factor, ) # load the language adapter from Hub lang_adapter_name = model.load_adapter( adapter_args.load_lang_adapter, AdapterType.text_lang, config=lang_adapter_config, load_as=adapter_args.language, ) else: lang_adapter_name = None # Freeze all model weights except of those of this adapter model.train_adapter([task_name]) # Set the adapters to be used in every forward pass if lang_adapter_name: model.set_active_adapters([lang_adapter_name, task_name]) else: model.set_active_adapters([task_name]) # Get datasets train_dataset = GlueDataset( data_args, tokenizer=tokenizer) if training_args.do_train else None eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") if training_args.do_eval else None test_dataset = GlueDataset( data_args, tokenizer=tokenizer, mode="test") if training_args.do_predict else None def compute_metrics(p: EvalPrediction) -> Dict: if output_mode == "classification": preds = np.argmax(p.predictions, axis=1) elif output_mode == "regression": preds = np.squeeze(p.predictions) return glue_compute_metrics(data_args.task_name, preds, p.label_ids) # Initialize our Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=compute_metrics, do_save_full_model=not adapter_args.train_adapter, do_save_adapters=adapter_args.train_adapter, ) # Training if training_args.do_train: trainer.train(model_path=model_args.model_name_or_path if os.path. isdir(model_args.model_name_or_path) else None) trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) if trainer.is_world_master(): tokenizer.save_pretrained(training_args.output_dir) # Evaluation eval_results = {} if training_args.do_eval: logger.info("*** Evaluate ***") # Loop to handle MNLI double evaluation (matched, mis-matched) eval_datasets = [eval_dataset] if data_args.task_name == "mnli": mnli_mm_data_args = dataclasses.replace(data_args, task_name="mnli-mm") eval_datasets.append( GlueDataset(mnli_mm_data_args, tokenizer=tokenizer, mode="dev")) for eval_dataset in eval_datasets: eval_result = trainer.evaluate(eval_dataset=eval_dataset) output_eval_file = os.path.join( training_args.output_dir, f"eval_results_{eval_dataset.args.task_name}.txt") if trainer.is_world_master(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results {} *****".format( eval_dataset.args.task_name)) for key, value in eval_result.items(): logger.info(" %s = %s", key, value) writer.write("%s = %s\n" % (key, value)) eval_results.update(eval_result) if training_args.do_predict: logging.info("*** Test ***") test_datasets = [test_dataset] if data_args.task_name == "mnli": mnli_mm_data_args = dataclasses.replace(data_args, task_name="mnli-mm") test_datasets.append( GlueDataset(mnli_mm_data_args, tokenizer=tokenizer, mode="test")) for test_dataset in test_datasets: predictions = trainer.predict( test_dataset=test_dataset).predictions if output_mode == "classification": predictions = np.argmax(predictions, axis=1) output_test_file = os.path.join( training_args.output_dir, f"test_results_{test_dataset.args.task_name}.txt") if trainer.is_world_master(): with open(output_test_file, "w") as writer: logger.info("***** Test results {} *****".format( test_dataset.args.task_name)) writer.write("index\tprediction\n") for index, item in enumerate(predictions): if output_mode == "regression": writer.write("%d\t%3.3f\n" % (index, item)) else: item = test_dataset.get_labels()[item] writer.write("%d\t%s\n" % (index, item)) return eval_results
for split in ['train', 'val', 'test']: d = {"text":dataset_dict[f'{split}_text'], 'labels':dataset_dict[f'{split}_labels']} if split == 'val': split = 'validation' #name mismatch with xlm-t dataset and library datasets dataset[split] = Dataset.from_dict(d) # --- MODEL --- config = AutoConfig.from_pretrained( MODEL, num_labels=NUM_LABELS, ) model = AutoModelWithHeads.from_pretrained( MODEL, config=config, ) # Add a new adapter adapter_name = f"adapter_{UNIQUE_NAME}" #adapter_name = f"xlm-t-sentiment" model.add_adapter(adapter_name, AdapterType.text_task) # Add a matching classification head model.add_classification_head( adapter_name, num_labels=NUM_LABELS, id2label={ 0: "Neg", 1:"Neu", 2:"Pos"} ) # Activate the adapter
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser((ModelArguments, DataTrainingArguments, TrainingArguments, MultiLingAdapterArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args, adapter_args = parser.parse_json_file( json_file=os.path.abspath(sys.argv[1])) else: model_args, data_args, training_args, adapter_args = parser.parse_args_into_dataclasses( ) if (os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir): raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. " "Use --overwrite_output_dir to overcome.") # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if is_main_process(training_args.local_rank) else logging.WARN, ) # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) # Set the verbosity to info of the Transformers logger (on main process only): if is_main_process(training_args.local_rank): transformers.utils.logging.set_verbosity_info() logger.info(f"Training/evaluation parameters {training_args}") # Set seed before initializing model. set_seed(training_args.seed) # Get the datasets: you can either provide your own CSV/JSON training and evaluation files (see below) # or specify a GLUE benchmark task (the dataset will be downloaded automatically from the datasets Hub). # # For CSV/JSON files, this script will use as labels the column called 'label' and as pair of sentences the # sentences in columns called 'sentence1' and 'sentence2' if such column exists or the first two columns not named # label if at least two columns are provided. # # If the CSVs/JSONs contain only one non-label column, the script does single sentence classification on this # single column. You can easily tweak this behavior (see below) # # In distributed training, the load_dataset function guarantee that only one local process can concurrently # download the dataset. if data_args.task_name is not None: # Downloading and loading a dataset from the hub. datasets = load_dataset("glue", data_args.task_name) elif data_args.train_file.endswith(".csv"): # Loading a dataset from local csv files datasets = load_dataset("csv", data_files={ "train": data_args.train_file, "validation": data_args.validation_file }) else: # Loading a dataset from local json files datasets = load_dataset("json", data_files={ "train": data_args.train_file, "validation": data_args.validation_file }) # See more about loading any type of standard or custom dataset at # https://huggingface.co/docs/datasets/loading_datasets.html. # Labels label_list = None if data_args.task_name is not None: is_regression = data_args.task_name == "stsb" if not is_regression: label_list = datasets["train"].features["label"].names num_labels = len(label_list) else: num_labels = 1 else: # Trying to have good defaults here, don't hesitate to tweak to your needs. is_regression = datasets["train"].features["label"].dtype in [ "float32", "float64" ] if is_regression: num_labels = 1 else: # A useful fast method: # https://huggingface.co/docs/datasets/package_reference/main_classes.html#datasets.Dataset.unique label_list = datasets["train"].unique("label") label_list.sort() # Let's sort it for determinism num_labels = len(label_list) # Load pretrained model and tokenizer # # In distributed training, the .from_pretrained methods guarantee that only one local process can concurrently # download model & vocab. config = AutoConfig.from_pretrained( model_args.config_name if model_args.config_name else model_args.model_name_or_path, num_labels=num_labels, finetuning_task=data_args.task_name, cache_dir=model_args.cache_dir, ) tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast_tokenizer, ) # We use the AutoModelWithHeads class here for better adapter support. model = AutoModelWithHeads.from_pretrained( model_args.model_name_or_path, from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, ) model.add_classification_head( data_args.task_name or "glue", num_labels=num_labels, id2label={i: v for i, v in enumerate(label_list)} if num_labels > 0 else None, ) # Setup adapters if adapter_args.train_adapter: task_name = data_args.task_name or "glue" # check if adapter already exists, otherwise add it if task_name not in model.config.adapters: # resolve the adapter config adapter_config = AdapterConfig.load( adapter_args.adapter_config, non_linearity=adapter_args.adapter_non_linearity, reduction_factor=adapter_args.adapter_reduction_factor, ) # load a pre-trained from Hub if specified if adapter_args.load_adapter: model.load_adapter( adapter_args.load_adapter, config=adapter_config, load_as=task_name, ) # otherwise, add a fresh adapter else: model.add_adapter(task_name, config=adapter_config) # optionally load a pre-trained language adapter if adapter_args.load_lang_adapter: # resolve the language adapter config lang_adapter_config = AdapterConfig.load( adapter_args.lang_adapter_config, non_linearity=adapter_args.lang_adapter_non_linearity, reduction_factor=adapter_args.lang_adapter_reduction_factor, ) # load the language adapter from Hub lang_adapter_name = model.load_adapter( adapter_args.load_lang_adapter, config=lang_adapter_config, load_as=adapter_args.language, ) else: lang_adapter_name = None # Freeze all model weights except of those of this adapter model.train_adapter([task_name]) # Set the adapters to be used in every forward pass if lang_adapter_name: model.set_active_adapters([lang_adapter_name, task_name]) else: model.set_active_adapters([task_name]) else: if adapter_args.load_adapter or adapter_args.load_lang_adapter: raise ValueError( "Adapters can only be loaded in adapters training mode." "Use --train_adapter to enable adapter training") # Preprocessing the datasets if data_args.task_name is not None: sentence1_key, sentence2_key = task_to_keys[data_args.task_name] else: # Again, we try to have some nice defaults but don't hesitate to tweak to your use case. non_label_column_names = [ name for name in datasets["train"].column_names if name != "label" ] if "sentence1" in non_label_column_names and "sentence2" in non_label_column_names: sentence1_key, sentence2_key = "sentence1", "sentence2" else: if len(non_label_column_names) >= 2: sentence1_key, sentence2_key = non_label_column_names[:2] else: sentence1_key, sentence2_key = non_label_column_names[0], None # Padding strategy if data_args.pad_to_max_length: padding = "max_length" max_length = data_args.max_seq_length else: # We will pad later, dynamically at batch creation, to the max sequence length in each batch padding = False max_length = None # Some models have set the order of the labels to use, so let's make sure we do use it. label_to_id = None if (model.config.label2id != PretrainedConfig(num_labels=num_labels).label2id and data_args.task_name is not None and is_regression): # Some have all caps in their config, some don't. label_name_to_id = { k.lower(): v for k, v in model.config.label2id.items() } if list(sorted(label_name_to_id.keys())) == list(sorted(label_list)): label_to_id = { i: label_name_to_id[label_list[i]] for i in range(num_labels) } else: logger.warn( "Your model seems to have been trained with labels, but they don't match the dataset: ", f"model labels: {list(sorted(label_name_to_id.keys()))}, dataset labels: {list(sorted(label_list))}." "\nIgnoring the model labels as a result.", ) elif data_args.task_name is None: label_to_id = {v: i for i, v in enumerate(label_list)} def preprocess_function(examples): # Tokenize the texts args = ((examples[sentence1_key], ) if sentence2_key is None else (examples[sentence1_key], examples[sentence2_key])) result = tokenizer(*args, padding=padding, max_length=max_length, truncation=True) # Map labels to IDs (not necessary for GLUE tasks) if label_to_id is not None and "label" in examples: result["label"] = [label_to_id[l] for l in examples["label"]] return result datasets = datasets.map(preprocess_function, batched=True, load_from_cache_file=not data_args.overwrite_cache) train_dataset = datasets["train"] eval_dataset = datasets["validation_matched" if data_args.task_name == "mnli" else "validation"] if data_args.task_name is not None: test_dataset = datasets["test_matched" if data_args.task_name == "mnli" else "test"] # Log a few random samples from the training set: for index in random.sample(range(len(train_dataset)), 3): logger.info( f"Sample {index} of the training set: {train_dataset[index]}.") # Get the metric function if data_args.task_name is not None: metric = load_metric("glue", data_args.task_name) # TODO: When datasets metrics include regular accuracy, make an else here and remove special branch from # compute_metrics # You can define your custom compute_metrics function. It takes an `EvalPrediction` object (a namedtuple with a # predictions and label_ids field) and has to return a dictionary string to float. def compute_metrics(p: EvalPrediction): preds = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions preds = np.squeeze(preds) if is_regression else np.argmax(preds, axis=1) if data_args.task_name is not None: result = metric.compute(predictions=preds, references=p.label_ids) if len(result) > 1: result["combined_score"] = np.mean(list( result.values())).item() return result elif is_regression: return {"mse": ((preds - p.label_ids)**2).mean().item()} else: return { "accuracy": (preds == p.label_ids).astype(np.float32).mean().item() } # Initialize our Trainer trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset if training_args.do_eval else None, compute_metrics=compute_metrics, tokenizer=tokenizer, # Data collator will default to DataCollatorWithPadding, so we change it if we already did the padding. data_collator=default_data_collator if data_args.pad_to_max_length else None, do_save_full_model=not adapter_args.train_adapter, do_save_adapters=adapter_args.train_adapter, ) # Training if training_args.do_train: trainer.train(model_path=model_args.model_name_or_path if os.path. isdir(model_args.model_name_or_path) else None) trainer.save_model() # Saves the tokenizer too for easy upload # Evaluation eval_results = {} if training_args.do_eval: logger.info("*** Evaluate ***") # Loop to handle MNLI double evaluation (matched, mis-matched) tasks = [data_args.task_name] eval_datasets = [eval_dataset] if data_args.task_name == "mnli": tasks.append("mnli-mm") eval_datasets.append(datasets["validation_mismatched"]) for eval_dataset, task in zip(eval_datasets, tasks): eval_result = trainer.evaluate(eval_dataset=eval_dataset) output_eval_file = os.path.join(training_args.output_dir, f"eval_results_{task}.txt") if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info(f"***** Eval results {task} *****") for key, value in eval_result.items(): logger.info(f" {key} = {value}") writer.write(f"{key} = {value}\n") eval_results.update(eval_result) if training_args.do_predict: logger.info("*** Test ***") # Loop to handle MNLI double evaluation (matched, mis-matched) tasks = [data_args.task_name] test_datasets = [test_dataset] if data_args.task_name == "mnli": tasks.append("mnli-mm") test_datasets.append(datasets["test_mismatched"]) for test_dataset, task in zip(test_datasets, tasks): # Removing the `label` columns because it contains -1 and Trainer won't like that. test_dataset.remove_columns_("label") predictions = trainer.predict( test_dataset=test_dataset).predictions predictions = np.squeeze( predictions) if is_regression else np.argmax(predictions, axis=1) output_test_file = os.path.join(training_args.output_dir, f"test_results_{task}.txt") if trainer.is_world_process_zero(): with open(output_test_file, "w") as writer: logger.info(f"***** Test results {task} *****") writer.write("index\tprediction\n") for index, item in enumerate(predictions): if is_regression: writer.write(f"{index}\t{item:3.3f}\n") else: item = label_list[item] writer.write(f"{index}\t{item}\n") return eval_results
data_dir=data_path, tokenizer=tokenizer, task=task_name, max_seq_length=max_seq_length, overwrite_cache=overwrite_tokenizer, mode=Split.test, ) if training_args.do_eval else None) config = BertConfig.from_pretrained( model_name_or_path, num_labels=num_labels, finetuning_task=task_name, #cache_dir=cache_dir, ) model = AutoModelWithHeads.from_pretrained(model_name_or_path, config=config) model.add_multiple_choice_head("csqa", num_choices=5) adapter_config = AdapterConfig.load(adapter_config_path) model.load_adapter(adapter_name_or_path, "text_task", config=adapter_config) #model.train_adapter([adapter_name_or_path]) model.set_active_adapters([[adapter_name_or_path]]) # Metric def simple_accuracy(preds, labels): return (preds == labels).mean() def compute_metrics(p: EvalPrediction): preds = np.argmax(p.predictions, axis=1)
def main(): # See all possible arguments in src/transformers/training_args.py # or by passing the --help flag to this script. # We now keep distinct sets of args, for a cleaner separation of concerns. parser = HfArgumentParser((ModelArguments, DataTrainingArguments, UDTrainingArguments, MultiLingAdapterArguments)) if len(sys.argv) == 2 and sys.argv[1].endswith(".json"): # If we pass only one argument to the script and it's the path to a json file, # let's parse it to get our arguments. model_args, data_args, training_args, adapter_args = parser.parse_json_file( json_file=os.path.abspath(sys.argv[1])) else: ( model_args, data_args, training_args, adapter_args, ) = parser.parse_args_into_dataclasses() if (os.path.exists(training_args.output_dir) and os.listdir(training_args.output_dir) and training_args.do_train and not training_args.overwrite_output_dir): raise ValueError( f"Output directory ({training_args.output_dir}) already exists and is not empty. Use --overwrite_output_dir to overcome." ) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if training_args.local_rank in [-1, 0] else logging.WARN, ) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", training_args.local_rank, training_args.device, training_args.n_gpu, bool(training_args.local_rank != -1), training_args.fp16, ) logger.info("Training/evaluation parameters %s", training_args) # Set seed set_seed(training_args.seed) # Prepare for UD dependency parsing task labels = UD_HEAD_LABELS label_map: Dict[int, str] = {i: label for i, label in enumerate(labels)} num_labels = len(labels) config = AutoConfig.from_pretrained( model_args.config_name if model_args.config_name else model_args.model_name_or_path, num_labels=num_labels, id2label=label_map, label2id={label: i for i, label in enumerate(labels)}, cache_dir=model_args.cache_dir, pad_token_id=-1, ) if model_args.is_japanese: assert model_args.mecab_dir is not None assert model_args.mecab_dic_dir is not None tokenizer = AutoTokenizer.from_pretrained( model_args.tokenizer_name if model_args.tokenizer_name else model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_fast=model_args.use_fast, do_lower_case=model_args.do_lower_case, add_prefix_space=True, # Used e.g. for RoBERTa mecab_kwargs={ "mecab_option": f"-r {model_args.mecab_dir} -d {model_args.mecab_dic_dir}" } if model_args.is_japanese else None, ) # The task name (with prefix) task_name = "ud_" + data_args.task_name language = adapter_args.language model = AutoModelWithHeads.from_pretrained( model_args.model_name_or_path, config=config, cache_dir=model_args.cache_dir, ) model.add_dependency_parsing_head( task_name, num_labels=num_labels, id2label=label_map, ) if model_args.leave_out_twelvth: logger.info("Leaving out 12") leave_out = [11] else: leave_out = [] # Setup adapters if adapter_args.train_adapter: # check if adapter already exists, otherwise add it if task_name not in model.config.adapters: # resolve the adapter config adapter_config = AdapterConfig.load( adapter_args.adapter_config, non_linearity=adapter_args.adapter_non_linearity, reduction_factor=adapter_args.adapter_reduction_factor, leave_out=leave_out, ) # load a pre-trained from Hub if specified if adapter_args.load_adapter: model.load_adapter( adapter_args.load_adapter, config=adapter_config, load_as=task_name, leave_out=leave_out, ) # otherwise, add a fresh adapter else: model.add_adapter(task_name, config=adapter_config) # optionally load a pre-trained language adapter if adapter_args.load_lang_adapter: # resolve the language adapter config lang_adapter_config = AdapterConfig.load( adapter_args.lang_adapter_config, non_linearity=adapter_args.lang_adapter_non_linearity, reduction_factor=adapter_args.lang_adapter_reduction_factor, leave_out=leave_out, ) # load the language adapter from Hub lang_adapter_name = model.load_adapter( adapter_args.load_lang_adapter, config=lang_adapter_config, load_as=adapter_args.language, leave_out=leave_out, ) else: lang_adapter_name = None # Freeze all model weights except of those of this adapter model.train_adapter([task_name]) # Set the adapters to be used in every forward pass if lang_adapter_name: model.set_active_adapters(ac.Stack(lang_adapter_name, task_name)) else: model.set_active_adapters(task_name) else: if adapter_args.load_adapter or adapter_args.load_lang_adapter: raise ValueError( "Adapters can only be loaded in adapters training mode." "Use --train_adapter to enable adapter training") # Load and preprocess dataset dataset = load_dataset("universal_dependencies", data_args.task_name) dataset = preprocess_dataset(dataset, tokenizer, labels, data_args, pad_token_id=-1) # Initialize our Trainer # HACK: Set this attribute to False to prevent label columns from being deleted training_args.remove_unused_columns = False trainer_class = DependencyParsingAdapterTrainer if adapter_args.train_adapter else DependencyParsingTrainer trainer = trainer_class( model=model, args=training_args, train_dataset=dataset["train"], eval_dataset=dataset["validation"], ) # Training if training_args.do_train: trainer.train(model_path=model_args.model_name_or_path if os.path. isdir(model_args.model_name_or_path) else None) trainer.save_model() # For convenience, we also re-save the tokenizer to the same directory, # so that you can share your model easily on huggingface.co/models =) if trainer.is_world_process_zero(): tokenizer.save_pretrained(training_args.output_dir) # Evaluation results = {} if training_args.do_eval: logger.info("*** Evaluate ***") result = trainer.evaluate() output_eval_file = os.path.join(training_args.output_dir, "eval_results.txt") if trainer.is_world_process_zero(): with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key, value in result.items(): logger.info(" %s = %s", key, value) writer.write("%s = %s\n" % (key, value)) results.update(result) # Predict if training_args.do_predict: logging.info("*** Test ***") if training_args.store_best_model: logger.info("Loading best model for predictions.") if adapter_args.train_adapter: if language: lang_adapter_config = AdapterConfig.load( config="pfeiffer", non_linearity="gelu", reduction_factor=2, leave_out=leave_out) model.load_adapter( os.path.join(training_args.output_dir, "best_model", language) if training_args.do_train else adapter_args.load_lang_adapter, config=lang_adapter_config, load_as=language, leave_out=leave_out, ) task_adapter_config = AdapterConfig.load(config="pfeiffer", non_linearity="gelu", reduction_factor=16, leave_out=leave_out) model.load_adapter( os.path.join(training_args.output_dir, "best_model", task_name) if training_args.do_train else adapter_args.load_adapter, config=task_adapter_config, load_as=task_name, leave_out=leave_out, ) if language: model.set_active_adapters( ac.Stack(lang_adapter_name, task_name)) else: model.set_active_adapters(task_name) model.to(training_args.device) else: trainer.model = AutoModelWithHeads.from_pretrained( os.path.join(training_args.output_dir, "best_model"), from_tf=bool(".ckpt" in model_args.model_name_or_path), config=config, cache_dir=model_args.cache_dir, ).to(training_args.device) predictions, _, metrics = trainer.predict(dataset["test"]) output_test_results_file = os.path.join(training_args.output_dir, "test_results.txt") if trainer.is_world_process_zero(): with open(output_test_results_file, "w") as writer: for key, value in metrics.items(): logger.info(" %s = %s", key, value) writer.write("%s = %s\n" % (key, value)) return results