def _run_experiment_and_save(run_experiment, params, batcher, prepare): encoder, losses = run_experiment(params) # Save encoder outputmodelname = construct_model_name(params.outputmodelname, params) torch.save(encoder, os.path.join(params.outputdir, outputmodelname + '.encoder')) # write training and validation loss to csv file with open(os.path.join(params.outputdir, outputmodelname + "_losses.csv"), 'w') as loss_csv: loss_csv.write("train_loss,val_loss\n") for train_loss, val_loss in losses: loss_csv.write(",".join([str(train_loss), str(val_loss)]) + "\n") scores = {} # Compute scores on downstream tasks if params.downstream_eval and params.downstream_eval != "none": downstream_scores = _evaluate_downstream_and_probing_tasks( encoder, params, batcher, prepare) # from each downstream task, only select scores we care about to_be_saved_scores = {} for score_name in downstream_scores: to_be_saved_scores[score_name] = _get_score_for_name( downstream_scores, score_name) scores.update(to_be_saved_scores) # Compute word embedding score if params.word_embedding_eval: output_path = _save_embeddings_to_word2vec(encoder, outputmodelname, params) # Save results to csv if params.output_file: write_to_csv(scores, params) return scores
def main(args): # sets seed. np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed(args.seed) args.data_to_n_classes = { 'SNLI': 3, 'SNLIHard': 3, 'MNLIMatched': 3, 'MNLIMismatched': 3, 'JOCI': 3,\ 'SICK-E': 3, 'AddOneRTE': 2, 'DPR': 2, 'FNPLUS': 2, 'SciTail': 2, 'SPR': 2,\ 'MPE': 3, 'QQP': 2,'GLUEDiagnostic': 3 } # builds vocabulary from the all datasets. word_vec = get_vocab(args) shared_nli_net = torch.load(args.model).eval().cuda() eval_accs = {} eval_accs["test"] = evaluate(args, shared_nli_net, args.test_path, \ args.data_to_n_classes[args.test_data], word_vec, split="test")[0] eval_accs["dev"] = evaluate(args, shared_nli_net, args.test_path, \ args.data_to_n_classes[args.test_data], word_vec, split="dev")[0] write_to_csv(eval_accs, args, args.outputfile)
params.lrshrink, optimizer.param_groups[0]['lr'])) if optimizer.param_groups[0][ 'lr'] < params.minlr and params.use_early_stopping: stop_training = True if 'adam' in params.optimizer and params.use_early_stopping: # early stopping (at 2nd decrease in accuracy) stop_training = adam_stop adam_stop = True return eval_acc """ Train model on Natural Language Inference task """ epoch = 1 while not stop_training and epoch <= params.n_epochs: train_acc = trainepoch(epoch) eval_acc = evaluate(epoch, 'valid') epoch += 1 # Run best model on test set. debias_net = torch.load(os.path.join(params.outputdir, params.outputmodelname)) scores = {} print('\nTEST : Epoch {0}'.format(epoch)) scores['NLI_val'] = evaluate(1e6, 'valid', True) scores['NLI_test'] = evaluate(0, 'test', True) write_to_csv(scores, params, params.outputfile)
def main(): parser = get_parser() args = parser.parse_args() if os.path.exists(args.output_dir) and os.listdir( args.output_dir ) and args.do_train and not args.overwrite_output_dir: raise ValueError( "Output directory ({}) already exists and is not empty. Use --overwrite_output_dir to overcome." .format(args.output_dir)) # add all variations of hans automatically if "HANS" in args.eval_task_names: hans_variations = ["HANS-const", "HANS-lex", "HANS-sub"] for variation in hans_variations: if variation not in args.eval_task_names: args.eval_task_names.append(variation) # Setup CUDA, GPU & distributed training device = torch.device("cuda") args.device = device # All of these tasks use the NliProcessor args.actual_task_names = actual_task_names # By default we evaluate on the task itself. if len(args.eval_task_names) == 0: args.eval_task_names = [args.task_name] if "all" in args.eval_task_names: args.eval_task_names = args.eval_task_names + nli_task_names + [ "snli", "mnli" ] args.eval_task_names.remove("all") print(args.eval_task_names) # Setup logging logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger.warning( "Process rank: %s, device: %s, n_gpu: %s, distributed training: %s, 16-bits training: %s", -1, device, 1, bool(False), args.fp16) # Set seed set_seed(args) # Prepare GLUE task args.task_name = args.task_name.lower() if args.task_name.startswith("fever"): processor = processors["fever"]() elif args.task_name in nli_task_names: processor = processors["nli"](task_to_data_dir[args.task_name]) elif args.task_name in ["mnli"]: processor = processors["mnli"](hans=args.hans) elif args.task_name.startswith("HANS"): processor = processors["hans"]() elif args.task_name in args.actual_task_names: processor = processors[args.task_name]() else: raise ValueError("Task not found: %s" % (args.task_name)) label_list = processor.get_labels() num_labels = len(label_list) args.model_type = args.model_type.lower() config_class, model_class, tokenizer_class = MODEL_CLASSES[args.model_type] config = config_class.from_pretrained( args.config_name if args.config_name else args.model_name_or_path, num_labels=num_labels, finetuning_task=args.task_name) tokenizer = tokenizer_class.from_pretrained( args.tokenizer_name if args.tokenizer_name else args.model_name_or_path, do_lower_case=args.do_lower_case) # Adds rubi parameters here. config.rubi = args.rubi config.hans = args.hans config.nonlinear_h_classifier = args.nonlinear_h_classifier config.hypothesis_only = args.hypothesis_only config.lambda_h = args.lambda_h config.focal_loss = args.focal_loss config.poe_loss = args.poe_loss config.similarity = args.similarity config.gamma_focal = args.gamma_focal config.weighted_bias_only = args.weighted_bias_only config.length_features = args.length_features config.hans_features = args.hans_features config.hans_only = args.hans_only config.ensemble_training = args.ensemble_training config.aggregate_ensemble = args.aggregate_ensemble config.poe_alpha = args.poe_alpha config.hidden_dropout_prob = args.hidden_dropout_prob model = model_class.from_pretrained( args.model_name_or_path, from_tf=bool('.ckpt' in args.model_name_or_path), config=config) model.to(args.device) logger.info("Training/evaluation parameters %s", args) print(model.dropout) # Test dropout TODO @mikimn: Remove # Training if args.do_train: train_dataset, _, _ = load_and_cache_examples(args, args.task_name, tokenizer, evaluate=False) global_step, tr_loss = train(args, train_dataset, model, tokenizer) logger.info(" global_step = %s, average loss = %s", global_step, tr_loss) # Saving best-practices: if you use defaults names for the model, you can reload it using from_pretrained() # Create output directory if needed if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) logger.info("Saving model checkpoint to %s", args.output_dir) # Save a trained model, configuration and tokenizer using `save_pretrained()`. # They can then be reloaded using `from_pretrained()` model_to_save = model.module if hasattr( model, 'module') else model # Take care of distributed/parallel training model_to_save.save_pretrained(args.output_dir) tokenizer.save_pretrained(args.output_dir) # Good practice: save your training arguments together with the trained model torch.save(args, os.path.join(args.output_dir, 'training_args.bin')) print("model is saved in ", os.path.join(args.output_dir, 'training_args.bin')) # Load a trained model and vocabulary that you have fine-tuned model = model_class.from_pretrained(args.output_dir) model.to(args.device) # Evaluation results = {} if args.do_eval: result, _ = do_evaluate(args, args.output_dir, tokenizer, model, config) for r in result: results.update(r) # saves the results. print(results) if args.outputfile is not None: write_to_csv(results, args, args.outputfile) return results