def test_training_arguments_are_left_untouched(self): trainer = get_regression_trainer() trainer.train() args = TrainingArguments("./regression") dict1, dict2 = args.to_dict(), trainer.args.to_dict() for key in dict1.keys(): self.assertEqual(dict1[key], dict2[key])
def test_training_arguments_are_left_untouched(self): trainer = get_regression_trainer() trainer.train() args = TrainingArguments("./regression") dict1, dict2 = args.to_dict(), trainer.args.to_dict() for key in dict1.keys(): # Logging dir can be slightly different as they default to something with the time. if key != "logging_dir": self.assertEqual(dict1[key], dict2[key])
def generate_training_args(args, inoculation_step): training_args = TrainingArguments("tmp_trainer") training_args.no_cuda = args.no_cuda training_args.seed = args.seed training_args.do_train = args.do_train training_args.do_eval = args.do_eval training_args.output_dir = os.path.join(args.output_dir, str(inoculation_step)+"-sample") training_args.evaluation_strategy = args.evaluation_strategy # evaluation is done after each epoch training_args.metric_for_best_model = args.metric_for_best_model training_args.greater_is_better = args.greater_is_better training_args.logging_dir = args.logging_dir training_args.task_name = args.task_name training_args.learning_rate = args.learning_rate training_args.per_device_train_batch_size = args.per_device_train_batch_size training_args.per_device_eval_batch_size = args.per_device_eval_batch_size training_args.num_train_epochs = args.num_train_epochs # this is the maximum num_train_epochs, we set this to be 100. training_args.eval_steps = args.eval_steps training_args.logging_steps = args.logging_steps training_args.load_best_model_at_end = args.load_best_model_at_end if args.save_total_limit != -1: # only set if it is specified training_args.save_total_limit = args.save_total_limit import datetime date_time = "{}-{}".format(datetime.datetime.now().month, datetime.datetime.now().day) run_name = "{0}_{1}_{2}_{3}_mlen_{4}_lr_{5}_seed_{6}_metrics_{7}".format( args.run_name, args.task_name, args.model_type, date_time, args.max_seq_length, args.learning_rate, args.seed, args.metric_for_best_model ) training_args.run_name = run_name training_args_dict = training_args.to_dict() # for PR _n_gpu = training_args_dict["_n_gpu"] del training_args_dict["_n_gpu"] training_args_dict["n_gpu"] = _n_gpu HfParser = HfArgumentParser((TrainingArguments)) training_args = HfParser.parse_dict(training_args_dict)[0] if args.model_path == "": args.model_path = args.model_type if args.model_type == "": assert False # you have to provide one of them. # Set seed before initializing model. set_seed(training_args.seed) # Setup logging logging.basicConfig( format="%(asctime)s - %(levelname)s - %(name)s - %(message)s", datefmt="%m/%d/%Y %H:%M:%S", level=logging.INFO if is_main_process(training_args.local_rank) else logging.WARN, ) # Log on each process the small summary: logger.warning( f"Process rank: {training_args.local_rank}, device: {training_args.device}, n_gpu: {training_args.n_gpu}" + f"distributed training: {bool(training_args.local_rank != -1)}, 16-bits training: {training_args.fp16}" ) # Set the verbosity to info of the Transformers logger (on main process only): if is_main_process(training_args.local_rank): transformers.utils.logging.set_verbosity_info() transformers.utils.logging.enable_default_handler() transformers.utils.logging.enable_explicit_format() logger.info(f"Training/evaluation parameters {training_args}") return training_args
def test_training_arguments_are_left_untouched(self): trainer = get_regression_trainer() trainer.train() args = TrainingArguments("./regression") self.assertEqual(args.to_dict(), trainer.args.to_dict())
num_train_epochs=args.epochs, evaluation_strategy="steps", eval_steps=EVAL_STEPS, save_steps=EVAL_STEPS, logging_steps=100, lr_scheduler_type="linear", warmup_steps=args.warmup_steps, learning_rate=args.learning_rate, per_device_train_batch_size=args.batch_size, per_device_eval_batch_size=args.batch_size, gradient_accumulation_steps=args.gradient_accumulation_steps, weight_decay=0, fp16=args.fp16, seed=args.seed, ) pprint(training_args.to_dict()) with open(Path(args.output_dir) / "run_parameters.txt", "w") as f: pprint(training_args.to_dict(), f) trainer = Trainer( model=model, tokenizer=tokenizer, args=training_args, train_dataset=train_dataset, eval_dataset=valid_dataset, callbacks=[PrintExampleCallback], ) trainer.train()
try: get_ipython().run_line_magic('matplotlib', 'inline') args = parser.parse_args([]) except: args = parser.parse_args() # os.environ["WANDB_DISABLED"] = "false" if args.is_tensorboard else "true" os.environ["TRANSFORMERS_CACHE"] = "../huggingface_cache/" # if cache does not exist, create one if not os.path.exists(os.environ["TRANSFORMERS_CACHE"]): os.makedirs(os.environ["TRANSFORMERS_CACHE"]) training_args = TrainingArguments("tmp_trainer") training_args.no_cuda = args.no_cuda training_args.per_device_eval_batch_size = args.per_device_eval_batch_size training_args.per_gpu_eval_batch_size = args.per_device_eval_batch_size training_args_dict = training_args.to_dict() _n_gpu = training_args_dict["_n_gpu"] del training_args_dict["_n_gpu"] training_args_dict["n_gpu"] = _n_gpu HfParser = HfArgumentParser((TrainingArguments)) training_args = HfParser.parse_dict(training_args_dict)[0] TASK_CONFIG = {"classification": ("text", None)} # Load pretrained model and tokenizer NUM_LABELS = 3 MAX_SEQ_LEN = 128 config = AutoConfig.from_pretrained(args.model_type, num_labels=3, finetuning_task=args.task_name, cache_dir=args.cache_dir)