Beispiel #1
0
model_args.train_batch_size = wandb_config["train_batch_size"]
model_args.wandb_project = "transformer-aes"
model_args.wandb_kwargs = {
    "name": "{}-{}".format(wandb_config["model"], wandb_config["samples"])
}
model_args.learning_rate = wandb_config["lr"]
model_args.model = wandb_config["model"]
model_args.samples = wandb_config["samples"]
# model_args.max_seq_length = wandb_config["max_seq_length"]
model_args.regression = True
model_args.no_save = True
model_args.overwrite_output_dir = True
model_args.logging_steps = 1
model_args.evaluate_during_training = True
model_args.evaluate_during_training_verbose = True
model_args.evaluate_during_training_steps = np.ceil(
    (wandb_config["samples"] / wandb_config["train_batch_size"]) * 10)
model_args.use_eval_cached_features = True

model = ClassificationModel(wandb_config["model"],
                            wandb_config["save"],
                            num_labels=1,
                            args=model_args)

model.train_model(train_df,
                  eval_df=eval_df,
                  mse=sklearn.metrics.mean_squared_error,
                  mae=sklearn.metrics.mean_absolute_error,
                  r2=sklearn.metrics.r2_score,
                  max=sklearn.metrics.max_error)

result, model_outputs, wrong_predictions = model.eval_model(
sweep_id = wandb.sweep(sweep_config, project="RTE - Hyperparameter Optimization")

logging.basicConfig(level=logging.INFO)
transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

# Preparing train data
train_df = load_rte_data_file("data/train.jsonl")
eval_df = pd.read_json("data/eval_df", lines=True, orient="records")
test_df = pd.read_json("data/test_df", lines=True, orient="records")

model_args = ClassificationArgs()
model_args.eval_batch_size = 8
model_args.evaluate_during_training = True
model_args.evaluate_during_training_silent = False
model_args.evaluate_during_training_steps = 1000
model_args.learning_rate = 4e-4
model_args.manual_seed = 4
model_args.max_seq_length = 256
model_args.multiprocessing_chunksize = 5000
model_args.no_cache = True
model_args.no_save = True
model_args.num_train_epochs = 10
model_args.overwrite_output_dir = True
model_args.reprocess_input_data = True
model_args.train_batch_size = 16
model_args.gradient_accumulation_steps = 2
model_args.train_custom_parameters_only = False
model_args.labels_list = ["not_entailment", "entailment"]
model_args.wandb_project = "RTE - Hyperparameter Optimization"