def main(config): os.environ["WANDB_WATCH"] = "False" # To disable Huggingface logging auto_generated_dir = os.getcwd() log.info(f"Work dir: {auto_generated_dir}") os.chdir(hydra.utils.get_original_cwd()) wandb_run = init_wandb(auto_generated_dir, config) args_train = TrainingArguments(output_dir=auto_generated_dir) args_train = update_config(args_train, config.training) args_data = DataTrainingArguments(task_name=config.data.task_name, data_dir=config.data.data_dir) args_data = update_config(args_data, config.data) train_eval_glue_model(config, args_train, args_data, auto_generated_dir)
def test_custom_optimizer(self): train_dataset = RegressionDataset() args = TrainingArguments("./regression") model = RegressionModel() optimizer = torch.optim.SGD(model.parameters(), lr=1.0) lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda x: 1.0) trainer = Trainer(model, args, train_dataset=train_dataset, optimizers=(optimizer, lr_scheduler)) trainer.train() self.assertTrue(torch.abs(trainer.model.a - 1.8950) < 1e-4) self.assertTrue(torch.abs(trainer.model.b - 2.5656) < 1e-4) self.assertEqual( trainer.optimizer.state_dict()["param_groups"][0]["lr"], 1.0)
def load_training_arguments(): training_arguments = TrainingArguments(output_dir=recognizer_dir, group_by_length=True, per_device_train_batch_size=16, gradient_accumulation_steps=2, evaluation_strategy='steps', num_train_epochs=30, gradient_checkpointing=True, fp16=True, save_steps=400, eval_steps=400, logging_steps=400, learning_rate=3e-4, warmup_steps=500, save_total_limit=2, push_to_hub=False) return training_arguments
def _get_training_args(dataclass_args, output_path): """ :param args: a dataclass of arguments for training :param output_path: A string to a temporary directory :return: A TrainingArguments object """ return TrainingArguments( output_dir=output_path, learning_rate=dataclass_args.learning_rate, weight_decay=dataclass_args.weight_decay, adam_beta1=dataclass_args.adam_beta1, adam_beta2=dataclass_args.adam_beta2, adam_epsilon=dataclass_args.adam_epsilon, max_grad_norm=dataclass_args.max_grad_norm, num_train_epochs=dataclass_args.num_train_epochs, report_to=["none"], per_device_train_batch_size=dataclass_args.batch_size)
def test_model_init(self): train_dataset = RegressionDataset() args = TrainingArguments("./regression", learning_rate=0.1) trainer = Trainer(args=args, train_dataset=train_dataset, model_init=lambda: RegressionModel()) trainer.train() self.check_trained_model(trainer.model) # Re-training should restart from scratch, thus lead the same results. trainer.train() self.check_trained_model(trainer.model) # Re-training should restart from scratch, thus lead the same results and new seed should be used. trainer.args.seed = 314 trainer.train() self.check_trained_model(trainer.model, alternate_seed=True)
def get_regression_trainer(a=0, b=0, train_len=64, eval_len=64, **kwargs): train_dataset = RegressionDataset(length=train_len) eval_dataset = RegressionDataset(length=eval_len) model = RegressionModel(a, b) compute_metrics = kwargs.pop("compute_metrics", None) data_collator = kwargs.pop("data_collator", None) optimizers = kwargs.pop("optimizers", (None, None)) args = TrainingArguments("./regression", **kwargs) return Trainer( model, args, data_collator=data_collator, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=compute_metrics, optimizers=optimizers, )
def test_trainer_eval_mrpc(self): MODEL_ID = "bert-base-cased-finetuned-mrpc" tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSequenceClassification.from_pretrained(MODEL_ID) data_args = GlueDataTrainingArguments( task_name="mrpc", data_dir="./tests/fixtures/tests_samples/MRPC", overwrite_cache=True) eval_dataset = GlueDataset(data_args, tokenizer=tokenizer, mode="dev") training_args = TrainingArguments(output_dir="./examples", no_cuda=True) trainer = Trainer(model=model, args=training_args, eval_dataset=eval_dataset) result = trainer.evaluate() self.assertLess(result["eval_loss"], 0.2)
def train(self, num_train_epochs=10, learning_rate=1e-5, weight_decay=1e-2, per_device_train_batch_size=16, per_device_eval_batch_size=16): def compute_metrics(p): predictions, labels = p true_labels, true_predictions = self.process_pred_labels( predictions, labels) results = self.metric.compute(predictions=true_predictions, references=true_labels) return { "precision": results["overall_precision"], "recall": results["overall_recall"], "f1": results["overall_f1"], "accuracy": results["overall_accuracy"], } if self.pretrained: tokenized_datasets = self.datasets.map( self.tokenize_and_align_labels, batched=True) train_args = TrainingArguments( "{}/exp".format(self.classifier_dir), evaluation_strategy="epoch", learning_rate=learning_rate, per_device_train_batch_size=per_device_train_batch_size, per_device_eval_batch_size=per_device_eval_batch_size, num_train_epochs=num_train_epochs, weight_decay=weight_decay, load_best_model_at_end=True) trainer = Trainer(self.model, train_args, train_dataset=tokenized_datasets["train"], eval_dataset=tokenized_datasets["validation"], data_collator=self.data_collator, tokenizer=self.tokenizer, compute_metrics=compute_metrics) # fine tuning model trainer.train() trainer.save_model(self.classifier_dir) print("Trainer is saved to ", self.classifier_dir) else: print("Classifier is disabled. No need to train!")
def train_model(): set_seed(1) model_name = 'bert-base-uncased' max_length = 512 tokenizer = BertTokenizerFast.from_pretrained(model_name, do_lower_case=True) (train_x, test_x, train_y, test_y), label_names = preprocess_dataset() train_encodings = tokenizer(train_x, truncation=True, padding=True, max_length=max_length) test_encodings = tokenizer(test_x, truncation=True, padding=True, max_length=max_length) train_dataset = TorchDataset(train_encodings, train_y) test_dataset = TorchDataset(test_encodings, test_y) model = BertForSequenceClassification.from_pretrained(model_name, num_labels=len(label_names))#.to('cuda') training_arguments = TrainingArguments( output_dir=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'results'), num_train_epochs=3, per_device_train_batch_size=16, per_device_eval_batch_size=20, warmup_steps=500, weight_decay=0.01, logging_dir=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'logs'), load_best_model_at_end=True, logging_steps=200, evaluation_strategy='steps' ) trainer = Trainer( model=model, args=training_arguments, train_dataset=train_dataset, eval_dataset=test_dataset, compute_metrics=compute_metrics ) trainer.train() trainer.evaluate() model_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'models', 'political_tweets_bert-base-uncased_3') model.save_pretrained(model_path) tokenizer.save_pretrained(model_path)
def main(): args = parse_arguments() if args.input_model is None: model = GPT2LMHeadModel.from_pretrained("antoiloui/belgpt2") else: print('loading pre trained model') model = GPT2LMHeadModel.from_pretrained(args.input_model) tokenizer = GPT2Tokenizer.from_pretrained("antoiloui/belgpt2") training_args = TrainingArguments( output_dir=args.output_dir + '_checkpoint', # output directory num_train_epochs=3, # total number of training epochs per_device_train_batch_size=64, # batch size per device during training warmup_steps=100, # number of warmup steps for learning rate scheduler weight_decay=0.01, # strength of weight decay logging_dir='./logs_hyca', # directory for storing logs logging_steps=100, ) special_tokens_dict = { 'bos_token': '<BOS>', 'eos_token': '<EOS>', 'pad_token': '<PAD>' } tokenizer.add_special_tokens(special_tokens_dict) model.resize_token_embeddings(len(tokenizer)) dataset = LineByLineTextDataset(tokenizer=tokenizer, file_path=args.input_file, block_size=32) data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) trainer = Trainer( model=model, args=training_args, data_collator=data_collator, train_dataset=dataset, ) if args.input_model is not None: trainer.train(resume_from_checkpoint=args.input_model + '_checkpoint') else: trainer.train() model.save_pretrained(args.output_dir)
def train(X_train, y_train, y_column_name, model_name=None): eval_dataset = y_train[y_column_name] model_args = ModelArguments(model_name_or_path="distilbert-base-cased", ) global data_args data_args = DataTrainingArguments(task_name="mnli", data_dir="../../datasets/Newswire") num_labels = glue_tasks_num_labels[data_args.task_name] training_args = TrainingArguments( output_dir=model_name, overwrite_output_dir=True, do_train=True, do_eval=True, per_gpu_train_batch_size=32, per_gpu_eval_batch_size=128, num_train_epochs=1, logging_steps=500, logging_first_step=True, save_steps=1000, evaluate_during_training=True, ) config = AutoConfig.from_pretrained( model_args.model_name_or_path, num_labels=num_labels, finetuning_task=data_args.task_name, ) tokenizer = AutoTokenizer.from_pretrained(model_args.model_name_or_path, ) model = AutoModelForSequenceClassification.from_pretrained( model_args.model_name_or_path, config=config, ) train_dataset = GlueDataset(data_args, tokenizer=tokenizer, limit_length=100_000) trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=compute_metrics, ) trainer.train()
def get_trainer(train_dataset, collator, model): training_args = TrainingArguments( output_dir=f'output/bash', overwrite_output_dir=True, do_train=True, no_cuda=cfg('device') == 'cpu', num_train_epochs=cfg('epochs'), per_device_train_batch_size=cfg('batch_size'), gradient_accumulation_steps=cfg('grad_acc'), logging_steps=5, save_steps=0, seed=random.randint(0, 2**32 - 1)) trainer = MTrainer(model=model, args=training_args, data_collator=collator, train_dataset=train_dataset, prediction_loss_only=True) return trainer
def get_regression_trainer(a=0, b=0, double_output=False, train_len=64, eval_len=64, **kwargs): label_names = kwargs.get("label_names", None) train_dataset = RegressionDataset(length=train_len, label_names=label_names) eval_dataset = RegressionDataset(length=eval_len, label_names=label_names) model = RegressionModel(a, b, double_output) compute_metrics = kwargs.pop("compute_metrics", None) data_collator = kwargs.pop("data_collator", None) optimizers = kwargs.pop("optimizers", (None, None)) args = TrainingArguments("./regression", **kwargs) return Trainer( model, args, data_collator=data_collator, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=compute_metrics, optimizers=optimizers, )
def get_train_args(lr=1e-4): train_root_path = Path('experiments/transformers/bert') / bert_model_size_type / tokenizer_type p = train_root_path / f'bert-{bert_model_size_type}-{tokenizer_type}-{data_source_name}-{vocab_size}-05-64' # p = train_root_path / f'bert-{bert_model_size_type}-{tokenizer_type}-{data_source_name}-{vocab_size}-05-128' # p = train_root_path / f'bert-{bert_model_size_type}-{tokenizer_type}-{data_source_name}-{vocab_size}-05' p.mkdir(parents=True, exist_ok=True) return TrainingArguments( output_dir=str(p), overwrite_output_dir=True, num_train_epochs=5, per_device_train_batch_size=48, gradient_accumulation_steps=5, save_total_limit=0, save_steps=0, learning_rate=lr, # fp16=True, dataloader_num_workers=8 )
def train_from_feedback(epochs=1): train_data = load_feedback() training_args = TrainingArguments( output_dir='./results', num_train_epochs=epochs, warmup_steps=500, weight_decay=0.01, logging_dir='./logs', ) trainer = Trainer(model=model, args=training_args, compute_metrics=compute_metrics, train_dataset=train_data) trainer.train() model.save_pretrained(SAVE_LOCATION) return "MODEL UPDATED"
def train_no_evaluate(self) -> None: """Train a BERT-based model, using the training set to train. """ assert self.train_dataset is not None, "train_file was not provided!" self.trainer = Trainer( model=self.model, args=TrainingArguments( do_train=True, output_dir=self.output_dir, overwrite_output_dir=True, num_train_epochs=self.num_train_epochs, ), train_dataset=self.train_dataset, ) self.trainer.train(model_path=self.model_path) self.trainer.save_model() self.tokenizer.save_pretrained(self.trainer.args.output_dir)
def test_custom_optimizer(self): train_dataset = RegressionDataset() args = TrainingArguments("./regression") model = RegressionModel() optimizer = torch.optim.SGD(model.parameters(), lr=1.0) lr_scheduler = torch.optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda x: 1.0) trainer = Trainer(model, args, train_dataset=train_dataset, optimizers=(optimizer, lr_scheduler)) trainer.train() (a, b) = self.default_trained_model self.assertFalse(torch.allclose(trainer.model.a, a)) self.assertFalse(torch.allclose(trainer.model.b, b)) self.assertEqual( trainer.optimizer.state_dict()["param_groups"][0]["lr"], 1.0)
def test_parallel_training(self): tokenizer = AutoTokenizer.from_pretrained(self.tokenizer_name, use_fast=False) if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token model = AutoModelWithHeads.from_config(self.config()) model.add_adapter("mrpc1") model.add_adapter("mrpc2") self.add_head(model, "mrpc1", num_labels=2) self.add_head(model, "mrpc2", num_labels=3) model.active_adapters = Parallel("mrpc1", "mrpc2") model.train_adapter(Parallel("mrpc1", "mrpc2")) # model.eval() # all weights of the adapter should be activated for k, v in filter_parameters(model, "adapters.mrpc1.").items(): self.assertTrue(v.requires_grad, k) # all weights of the adapter not used for training should be freezed for k, v in filter_parameters(model, "adapters.mrpc2.").items(): self.assertTrue(v.requires_grad, k) # weights of the model should be freezed (check on some examples) for k, v in filter_parameters(model, "encoder.layer.0.attention").items(): self.assertFalse(v.requires_grad, k) state_dict_pre = copy.deepcopy(model.state_dict()) train_dataset = self.dataset(tokenizer) training_args = TrainingArguments( output_dir="./examples", do_train=True, learning_rate=0.1, max_steps=10, no_cuda=True ) # evaluate trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, ) trainer.train() for ((k1, v1), (k2, v2)) in zip(state_dict_pre.items(), model.state_dict().items()): if "mrpc" in k1: self.assertFalse(torch.equal(v1, v2), k1) else: self.assertTrue(torch.equal(v1, v2))
def finetune(tag): """fine-tune gpt2 on the given caption dataset""" global tokenizer config = AutoConfig.from_pretrained('gpt2') model = AutoModelWithLMHead.from_pretrained('gpt2', config=config) block_size = tokenizer.max_len # https://github.com/huggingface/transformers/blob/448c467256332e4be8c122a159b482c1ef039b98/src/transformers/data/datasets/language_modeling.py try: train_dataset = TextDataset( tokenizer=tokenizer, file_path=f'./text/training_text/{tag}.txt', block_size=block_size, overwrite_cache=True) data_collator = DataCollatorForLanguageModeling(tokenizer=tokenizer, mlm=False) epochs = 8 training_args = TrainingArguments(output_dir='logging/output', overwrite_output_dir=True, do_train=True, num_train_epochs=epochs, gradient_accumulation_steps=1, learning_rate=1e-4, per_gpu_train_batch_size=1, logging_steps=50, save_steps=0) set_seed(training_args.seed) trainer = Trainer(model=model, args=training_args, data_collator=data_collator, train_dataset=train_dataset, prediction_loss_only=True) with open(f'./logging/training_stats/training_{tag}.log', 'w') as log: sys.stdout = log trainer.train() sys.stdout = sys.__stdout__ if not os.path.exists(f'./trained_models/{tag}/'): os.makedirs(f'./trained_models/{tag}/') # save the model model.save_pretrained(f'./trained_models/{tag}/') print('Done!') except AssertionError: print( f'The training text with the tag = {tag} does not exist. No model was trained!' )
def trainings_run(self, model, tokenizer): # setup dataset train_dataset = self.dataset(tokenizer) training_args = TrainingArguments( output_dir="./examples", do_train=True, learning_rate=0.1, max_steps=10, no_cuda=True, per_device_train_batch_size=2, ) # evaluate trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, ) trainer.train()
def model_trainer(args, test_dataset): # model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels =4) model = RobertaForSequenceClassification.from_pretrained(args.model_path, num_labels=3, return_dict=True) #anfs/bigdisc/rmya2/faiss_data/model_verdict_predictor/checkpoint-1500' training_args = TrainingArguments( output_dir='./results', # output directory per_device_eval_batch_size=32, # batch size for evaluation ) trainer = Trainer( model=model, # the instantiated 🤗 Transformers model to be trained args=training_args, # training arguments, defined above eval_dataset=test_dataset, # evaluation dataset compute_metrics=compute_metrics, ) return trainer, model
def test_evaluation_with_keys_to_drop(self): config = GPT2Config(vocab_size=100, n_positions=128, n_ctx=128, n_embd=32, n_layer=3, n_head=4) tiny_gpt2 = GPT2LMHeadModel(config) x = torch.randint(0, 100, (128, )) eval_dataset = RepeatDataset(x) args = TrainingArguments("./test") trainer = Trainer(tiny_gpt2, args, eval_dataset=eval_dataset) # By default the past_key_values are removed result = trainer.predict(eval_dataset) self.assertTrue(isinstance(result.predictions, np.ndarray)) # We can still get them by setting ignore_keys to [] result = trainer.predict(eval_dataset, ignore_keys=[]) self.assertTrue(isinstance(result.predictions, tuple)) self.assertEqual(len(result.predictions), 2)
def bert(training, testing_1, testing_2, fine_tune): tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') testing_data_1 = np.load(testing_1) testing_data_2 = np.load(testing_2) testing_data = np.concatenate((testing_data_1, testing_data_2)) training_data = np.load(training) model = BertModel.from_pretrained('bert-base-uncased', output_hidden_states=True, ) if fine_tune: train_data = [] eval_data = [] for i in range(len(training_data)): if i % 2 == 0 and i < len(training_data)*0.8: train_data.append(training_data[i]) else: if i % 2 == 0: eval_data.append(training_data[i]) inputs = tokenizer(train_data, padding="max_length", truncation=True) training_args = TrainingArguments(output_dir=os.getcwd() + "\\data\\", do_eval=False) trainer = Trainer(model=model, args=training_args, train_dataset=inputs, eval_dataset=eval_data) trainer.train() output = [] model.eval() for i in range(len(testing_data)): if i%2 == 0: sentence = "[CLS] " + testing_data[i] + " [SEP]" tokenized_sentence = tokenizer.tokenize(sentence) indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_sentence) segments_ids = [1] * len(tokenized_sentence) tokens_tensor = torch.tensor([indexed_tokens]) segments_tensors = torch.tensor([segments_ids]) with torch.no_grad(): outputs = model(tokens_tensor, segments_tensors) hidden_states = outputs[2] token_embeddings = torch.stack(hidden_states, dim=0) token_embeddings = torch.squeeze(token_embeddings, dim=1) token_embeddings = token_embeddings.permute(1, 0, 2) token_vecs_cat = [] for token in token_embeddings: cat_vec = torch.cat((token[-1], token[-2], token[-3], token[-4]), dim=0) token_vecs_cat.append(cat_vec) for i, token_str in enumerate(tokenized_sentence): output.append(np.array(token_vecs_cat[i])) return output[:len(testing_data_1)], output[len(testing_data_1):]
def main(name): logging.info("Start of training") train_df = pd.read_json("train_processed.json") val_df = pd.read_json("val_processed.json") unique_tags = set(tag for label in train_df["label"].to_list() for tag in label) tag2id = {tag: id for id, tag in enumerate(unique_tags)} id2tag = {id: tag for tag, id in tag2id.items()} with open(f"tag2id_{name}.json", "w", encoding="utf-8") as f: json.dump({"tag2id": tag2id, "id2tag": id2tag}, f) model, tokenizer = get_model_and_tokenizer("xlm-roberta-base", len(unique_tags)) train_dataset = AddressDataset(train_df, tag2id, tokenizer) val_dataset = AddressDataset(val_df, tag2id, tokenizer) compute_metrics = ComputeMetrics(id2tag).compute training_args = TrainingArguments(output_dir=f'./results_{name}', save_steps=1000, num_train_epochs=3, per_device_train_batch_size=64, per_device_eval_batch_size=64, warmup_steps=500, weight_decay=0.01, logging_dir=f"./logs_{name}", logging_steps=10, evaluation_strategy="steps", eval_steps=500) trainer = Trainer(model=model, args=training_args, train_dataset=train_dataset, eval_dataset=val_dataset, tokenizer=tokenizer, compute_metrics=compute_metrics) trainer.train() # trainer.evaluate() trainer.save_model(f"./model_{name}")
def main(): tokenizer = BertTokenizer.from_pretrained('vocab/bert-base-chinese-vocab.txt') dataset = LineByLineTextDataset( tokenizer=tokenizer, file_path="data/dialogue_lined/multi-sents-further-pretrain/train_test_dialogues.txt", block_size=512, ) data_collator = DataCollatorForLanguageModeling( tokenizer=tokenizer, mlm=True, mlm_probability=0.15 ) training_args = TrainingArguments( output_dir="model/multi-sents-test-further-pretrained-bert", do_train=True, warmup_steps=int(100 * (len(dataset) / 32) * 0.1), #warmup_steps=10000, overwrite_output_dir=True, num_train_epochs=100, #max_steps=100000, per_device_train_batch_size=8, gradient_accumulation_steps=4, save_steps=1000, logging_steps=10, weight_decay=0.01 ) model = BertForMaskedLM.from_pretrained('bert-base-chinese') trainer = Trainer( model=model, args=training_args, data_collator=data_collator, train_dataset=dataset, prediction_loss_only=True, ) trainer.train() trainer.save_model('model/multi-sents-test-further-pretrained-bert') return
def train_MLM(vocf,outmodel,data_df): bs=8 #tokenizer=BertWordPieceTokenizer(vocf)#input vocab.txt ttk=BertTokenizer.from_pretrained(vocf)#input vocab.txt fvoc=open(vocf) vlen=len(fvoc.readlines()) fvoc.close() config=RobertaConfig(vocab_size=vlen,max_position_embeddings=12,num_attention_heads=12, \ num_hidden_layers=6,type_vocab_size=1,hidden_size=768) model=RobertaForMaskedLM(config=config) model.num_parameters() dataset=tokDataset(data_df,ttk) # Data= DataLoader(dataset, batch_size=bs,shuffle=True,drop_last=False,num_workers=0,collate_fn=collate_fn) # data_collator = DataCollatorForLanguageModeling( # tokenizer=ttk, mlm=True, mlm_probability=0.15 # ) data_collator=collate_fn( tokenizer=ttk, mlm=True, mlm_probability=0.15 ) training_args = TrainingArguments( output_dir=outmodel,#embedding model path overwrite_output_dir=True, num_train_epochs=2, per_device_train_batch_size=bs, save_steps=10_000, save_total_limit=2, ) trainer = Trainer( model=model, args=training_args, train_dataset=dataset, data_collator=data_collator, prediction_loss_only=True ) trainer.train() trainer.save_model(outmodel) print('LM train done: ')
def __init__(self, opts, project_path='./'): self.project_path = project_path self.model_name = opts.model self.training_args = TrainingArguments( output_dir='./check_points', # output directory num_train_epochs=opts.epoch, # total number of training epochs per_device_train_batch_size=opts.train_bs, # batch size per device during training warmup_steps=opts.warmup_steps, # number of warmup steps for learning rate scheduler weight_decay=opts.weight_decay, # strength of weight decay logging_dir='./logs', # directory for storing logs logging_steps=1000, learning_rate=opts.lr, evaluation_strategy='no', save_steps=1500, ) print_info('load model') self.load_model() print_info('load data') self.load_data()
def train_function(train_dataset, eval_dataset=None, **config): model_config = AutoConfig.from_pretrained(model_checkpoint) model = AutoModelForCausalLM.from_config(model_config) training_args = TrainingArguments( f"{model_checkpoint}-wikitext2", evaluation_strategy="epoch", num_train_epochs=config.get("epochs", 3), learning_rate=2e-5, weight_decay=0.01, disable_tqdm=True, no_cuda=True, save_strategy=config.get("save_strategy", "no"), ) trainer = Trainer( model=model, args=training_args, train_dataset=train_dataset, eval_dataset=eval_dataset, ) return trainer
def __init__(self, model, train_dataset=None, eval_dataset=None, **kwargs): """Inialization method. Args: model (PreTrainedModel): Pre-trained model. train_dataset (Dataset): Training dataset. eval_dataset (Dataset): Evaluation dataset. """ logger.debug('Creating runner ...') # Defines the arguments args = TrainingArguments(output_dir='./results', logging_dir='./logs', **kwargs) # Overrides its parent class with inputted arguments super(Runner, self).__init__(model, args, train_dataset=train_dataset, eval_dataset=eval_dataset, compute_metrics=compute_metrics) logger.debug('Runner created.')
def load_training_arguments(experiment_name= vocab_dir): if not os.path.isdir(experiment_name):os.mkdir(experiment_name) training_args = TrainingArguments( output_dir=experiment_name, group_by_length=True, per_device_train_batch_size=30, gradient_accumulation_steps=2, evaluation_strategy="steps", num_train_epochs=100, gradient_checkpointing=True, fp16=True, save_steps=1000, eval_steps=1000, logging_steps=50, learning_rate=3e-4, warmup_steps=500, save_total_limit=6, push_to_hub=False, ) return training_args