model_args.evaluate_during_training_steps = -1 model_args.learning_rate = 0.00003173 model_args.manual_seed = 4 model_args.max_seq_length = 256 model_args.multiprocessing_chunksize = 5000 model_args.no_cache = True model_args.no_save = True model_args.num_train_epochs = 40 model_args.overwrite_output_dir = True model_args.reprocess_input_data = True model_args.train_batch_size = 16 model_args.gradient_accumulation_steps = 2 model_args.train_custom_parameters_only = False model_args.labels_list = ["not_entailment", "entailment"] model_args.output_dir = "vanilla_output" model_args.best_model_dir = "vanilla_output/best_model" model_args.wandb_project = "RTE - Hyperparameter Optimization" model_args.wandb_kwargs = {"name": "vanilla"} # Create a TransformerModel model = ClassificationModel("roberta", "roberta-large", use_cuda=True, args=model_args) # Train the model model.train_model( train_df, eval_df=eval_df, accuracy=lambda truth, predictions: accuracy_score( truth, [round(p) for p in predictions]),
model_args.manual_seed = 4 model_args.max_seq_length = 256 model_args.multiprocessing_chunksize = 5000 model_args.no_cache = True # model_args.no_save = True model_args.num_train_epochs = 10 model_args.overwrite_output_dir = True model_args.reprocess_input_data = True model_args.train_batch_size = 16 model_args.gradient_accumulation_steps = 2 model_args.train_custom_parameters_only = False model_args.save_eval_checkpoints = False model_args.save_model_every_epoch = False model_args.labels_list = ["not_entailment", "entailment"] model_args.output_dir = "tuned_output" model_args.best_model_dir = "tuned_output/best_model" model_args.wandb_project = "RTE - Hyperparameter Optimization" model_args.wandb_kwargs = {"name": "best-params"} layer_params = [] param_groups = [] cleaned_args = {} for key, value in best_params.items(): if key.startswith("layer_"): layer_keys = key.split("_")[-1] start_layer = int(layer_keys.split("-")[0]) end_layer = int(layer_keys.split("-")[-1]) for layer_key in range(start_layer, end_layer): layer_params.append({ "layer": layer_key,
model_args.evaluate_during_training_steps = -1 model_args.save_eval_checkpoints = False model_args.save_model_every_epoch = False model_args.learning_rate = 1e-5 model_args.manual_seed = 4 model_args.max_seq_length = 256 model_args.multiprocessing_chunksize = 5000 model_args.no_cache = True model_args.num_train_epochs = 3 model_args.overwrite_output_dir = True model_args.reprocess_input_data = True model_args.train_batch_size = 16 model_args.gradient_accumulation_steps = 2 model_args.labels_list = ["not_entailment", "entailment"] model_args.output_dir = "default_output" model_args.best_model_dir = "default_output/best_model" model_args.wandb_project = "RTE - Hyperparameter Optimization" model_args.wandb_kwargs = {"name": "augmented-default"} # Create a TransformerModel model = ClassificationModel("roberta", "roberta-large", use_cuda=True, args=model_args) # Train the model model.train_model( train_df, eval_df=eval_df, accuracy=lambda truth, predictions: accuracy_score( truth, [round(p) for p in predictions]),