model_args.train_batch_size = wandb_config["train_batch_size"] model_args.wandb_project = "transformer-aes" model_args.wandb_kwargs = { "name": "{}-{}".format(wandb_config["model"], wandb_config["samples"]) } model_args.learning_rate = wandb_config["lr"] model_args.model = wandb_config["model"] model_args.samples = wandb_config["samples"] # model_args.max_seq_length = wandb_config["max_seq_length"] model_args.regression = True model_args.no_save = True model_args.overwrite_output_dir = True model_args.logging_steps = 1 model_args.evaluate_during_training = True model_args.evaluate_during_training_verbose = True model_args.evaluate_during_training_steps = np.ceil( (wandb_config["samples"] / wandb_config["train_batch_size"]) * 10) model_args.use_eval_cached_features = True model = ClassificationModel(wandb_config["model"], wandb_config["save"], num_labels=1, args=model_args) model.train_model(train_df, eval_df=eval_df, mse=sklearn.metrics.mean_squared_error, mae=sklearn.metrics.mean_absolute_error, r2=sklearn.metrics.r2_score, max=sklearn.metrics.max_error) result, model_outputs, wrong_predictions = model.eval_model(
sweep_id = wandb.sweep(sweep_config, project="RTE - Hyperparameter Optimization") logging.basicConfig(level=logging.INFO) transformers_logger = logging.getLogger("transformers") transformers_logger.setLevel(logging.WARNING) # Preparing train data train_df = load_rte_data_file("data/train.jsonl") eval_df = pd.read_json("data/eval_df", lines=True, orient="records") test_df = pd.read_json("data/test_df", lines=True, orient="records") model_args = ClassificationArgs() model_args.eval_batch_size = 8 model_args.evaluate_during_training = True model_args.evaluate_during_training_silent = False model_args.evaluate_during_training_steps = 1000 model_args.learning_rate = 4e-4 model_args.manual_seed = 4 model_args.max_seq_length = 256 model_args.multiprocessing_chunksize = 5000 model_args.no_cache = True model_args.no_save = True model_args.num_train_epochs = 10 model_args.overwrite_output_dir = True model_args.reprocess_input_data = True model_args.train_batch_size = 16 model_args.gradient_accumulation_steps = 2 model_args.train_custom_parameters_only = False model_args.labels_list = ["not_entailment", "entailment"] model_args.wandb_project = "RTE - Hyperparameter Optimization"