def main(): logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) args = get_args() print_args(args) device, n_gpu = initialization.init_cuda_from_args(args, logger=logger) initialization.init_seed(args, n_gpu=n_gpu, logger=logger) initialization.init_output_dir(args) initialization.save_args(args) classifier = simple_classifier(n_classes=args.n_classes, n_hidden=args.fc_dim) classifier = classifier.to(device) optimizer = SGD(classifier.parameters(), lr=0.001, momentum=0.9) runner = Runner(classifier=classifier, optimizer=optimizer, device=device, rparams=RunnerParameters( num_train_epochs=args.num_train_epochs, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size, )) # dataset train_dataset = torch.load(os.path.join(args.data_dir, "train.dataset")) eval_dataset = torch.load(os.path.join(args.data_dir, "dev.dataset")) if args.mnli: mm_eval_dataset = torch.load(os.path.join(args.data_dir, "mm_dev.dataset")) else: mm_eval_dataset = None # run training and validation to_save = runner.run_train_val( train_dataset=train_dataset, eval_dataset=eval_dataset, mm_eval_set=mm_eval_dataset, ) # save training state to output dir. torch.save(to_save, os.path.join(args.output_dir, "training.info"))
def main(): logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) args = get_args() print_args(args) device, n_gpu = initialization.init_cuda_from_args(args, logger=logger) initialization.init_seed(args, n_gpu=n_gpu, logger=logger) initialization.init_train_batch_size(args) initialization.init_output_dir(args) initialization.save_args(args) task = get_task(args.task_name, args.data_dir) use_cuda = False if args.no_cuda else True verbose = args.verbose config = { 'dropout_prob': args.dropout_prob, 'n_classes': args.n_classes, 'fc_dim': args.fc_dim, 'enc_dim': 512, } # load model print("loading Universal Sentence Encoder......") USE = hub.load("https://tfhub.dev/google/universal-sentence-encoder/4") classifier = SimpleClassifier(config) classifier = classifier.cuda() if not args.no_cuda else classifier # get train examples train_examples = task.get_train_examples() # calculate t_total t_total = initialization.get_opt_train_steps(len(train_examples), args) # build optimizer. optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9) # create running parameters r_params = RunnerParameters( local_rank=args.local_rank, n_gpu=n_gpu, learning_rate=5e-5, gradient_accumulation_steps=args.gradient_accumulation_steps, t_total=t_total, warmup_proportion=args.warmup_proportion, num_train_epochs=args.num_train_epochs, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size, verbose=verbose) # create runner class for training and evaluation tasks. runner = GlueTaskClassifierRunner(encoder_model=USE, classifier_model=classifier, optimizer=optimizer, label_list=task.get_labels(), device=device, rparams=r_params) if args.do_train: runner.run_train_classifier(train_examples) if args.do_val: val_examples = task.get_dev_examples() results = runner.run_val(val_examples, task_name=task.name, verbose=verbose) df = pd.DataFrame(results["logits"]) df.to_csv(os.path.join(args.output_dir, "val_preds.csv"), header=False, index=False) metrics_str = json.dumps( { "loss": results["loss"], "metrics": results["metrics"] }, indent=2) print(metrics_str) with open(os.path.join(args.output_dir, "val_metrics.json"), "w") as f: f.write(metrics_str) # HACK for MNLI-mismatched if task.name == "mnli": mm_val_example = MnliMismatchedProcessor().get_dev_examples( task.data_dir) mm_results = runner.run_val(mm_val_example, task_name=task.name, verbose=verbose) df = pd.DataFrame(results["logits"]) df.to_csv(os.path.join(args.output_dir, "mm_val_preds.csv"), header=False, index=False) combined_metrics = {} for k, v in results["metrics"].items(): combined_metrics[k] = v for k, v in mm_results["metrics"].items(): combined_metrics["mm-" + k] = v combined_metrics_str = json.dumps( { "loss": results["loss"], "metrics": combined_metrics, }, indent=2) print(combined_metrics_str) with open(os.path.join(args.output_dir, "val_metrics.json"), "w") as f: f.write(combined_metrics_str)
def main(): logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) args = get_args() print_args(args) device, n_gpu = initialization.init_cuda_from_args(args, logger=logger) initialization.init_seed(args, n_gpu=n_gpu, logger=logger) initialization.init_output_dir(args) initialization.save_args(args) classifier = simple_classifier(n_classes=args.n_classes, n_hidden=args.fc_dim) classifier = classifier.to(device) optimizer = SGD(classifier.parameters(), lr=0.001, momentum=0.9) runner = InfersentRunner(classifier=classifier, optimizer=optimizer, device=device, rparams=RunnerParameters( num_train_epochs=args.num_train_epochs, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size, )) # dataset train_datasets = [] for i in range(TRAIN_SET_NUM_MAP[args.task_name]): train_dataset = torch.load(os.path.join(args.data_dir, "train-{}.dataset".format(i))) train_datasets.append(train_dataset) eval_dataset = torch.load(os.path.join(args.data_dir, "dev-0.dataset")) if args.mnli: mm_eval_dataset = torch.load(os.path.join(args.data_dir, "mm_dev-0.dataset")) else: mm_eval_dataset = None # run training and validation eval_info, state_dicts = runner.run_train_val_with_state_dict_returned( train_dataset=train_datasets, eval_dataset=eval_dataset, mm_eval_set=mm_eval_dataset, ) # save training state to output dir. torch.save(eval_info, os.path.join(args.output_dir, "training.info")) # find highest validation results, load model state dict, and then run prediction @ test set. val_acc = [] mm_val_acc = [] if args.mnli: for item in eval_info: val_acc.append(item[0]['accuracy']) mm_val_acc.append(item[1]['accuracy']) idx = val_acc.index(max(val_acc)) print("highest accuracy on validation is: {}, index = {}. \n" "mis-matched is: {} \n" "Load state dicts and run testing...".format(val_acc[idx], idx, mm_val_acc[idx])) else: for item in eval_info: val_acc.append(item['accuracy']) idx = val_acc.index(max(val_acc)) print("highest accuracy on validation is: {}, index = {}. \n" "Load state dicts and run testing...".format(val_acc[idx], idx)) torch.save(state_dicts[idx], os.path.join(args.output_dir, "state.p")) test_datasets = [] for i in range(TEST_SET_NUM_MAP[args.task_name]): test_dataset = torch.load(os.path.join(args.data_dir, "test-{}.dataset".format(i))) test_datasets.append(test_dataset) runner.classifier.load_state_dict(torch.load(os.path.join(args.output_dir, "state.p"))) logits = runner.run_test(test_datasets) df = pd.DataFrame(logits) df.to_csv(os.path.join(args.output_dir, "test_preds.csv"), header=False, index=False) # HACK for MNLI-mismatched if args.mnli: mm_test_dataset = torch.load(os.path.join(args.data_dir, "mm_test-0.dataset")) logits = runner.run_test([mm_test_dataset]) df = pd.DataFrame(logits) df.to_csv(os.path.join(args.output_dir, "mm_test_preds.csv"), header=False, index=False)
def main(): logging.basicConfig( format='%(asctime)s - %(levelname)s - %(name)s - %(message)s', datefmt='%m/%d/%Y %H:%M:%S', level=logging.INFO) logger = logging.getLogger(__name__) args = get_args() print_args(args) device, n_gpu = initialization.init_cuda_from_args(args, logger=logger) initialization.init_seed(args, n_gpu=n_gpu, logger=logger) initialization.init_train_batch_size(args) initialization.init_output_dir(args) initialization.save_args(args) task = get_task(args.task_name, args.data_dir) use_cuda = False if args.no_cuda else True verbose = args.verbose # model config config = { 'word_emb_dim': args.word_emb_dim, 'enc_lstm_dim': args.enc_lstm_dim, 'n_enc_layers': args.n_enc_layers, 'dpout_model': args.dpout_model, 'dpout_fc': args.dpout_fc, 'fc_dim': args.fc_dim, 'bsize': args.batch_size, 'n_classes': args.n_classes, 'pool_type': args.pool_type, 'nonlinear_fc': args.nonlinear_fc, 'use_cuda': use_cuda, 'version': args.model_version, 'dropout_prob': args.dropout_prob, } # load model if verbose: print('loading model...') model = InferSent(config) model.load_state_dict(torch.load(args.model_path)) model = model.cuda() if not args.no_cuda else model model.set_w2v_path(args.word_emb_path) model.build_vocab_k_words(K=args.k_freq_words, verbose=verbose) # load classifier classifier = SimpleClassifier(config) classifier = classifier.cuda() if not args.no_cuda else classifier # get train examples train_examples = task.get_train_examples() # calculate t_total t_total = initialization.get_opt_train_steps(len(train_examples), args) # build optimizer. optimizer = optim.SGD(classifier.parameters(), lr=0.001, momentum=0.9) # create running parameters r_params = RunnerParameters( local_rank=args.local_rank, n_gpu=n_gpu, learning_rate=5e-5, gradient_accumulation_steps=args.gradient_accumulation_steps, t_total=t_total, warmup_proportion=args.warmup_proportion, num_train_epochs=args.num_train_epochs, train_batch_size=args.train_batch_size, eval_batch_size=args.eval_batch_size, verbose=verbose) # create runner class for training and evaluation tasks. runner = GlueTaskClassifierRunner(encoder_model=model, classifier_model=classifier, optimizer=optimizer, label_list=task.get_labels(), device=device, rparams=r_params) if args.do_train: runner.run_train_classifier(train_examples) if args.do_val: val_examples = task.get_dev_examples() results = runner.run_val(val_examples, task_name=task.name, verbose=verbose) df = pd.DataFrame(results["logits"]) df.to_csv(os.path.join(args.output_dir, "val_preds.csv"), header=False, index=False) metrics_str = json.dumps( { "loss": results["loss"], "metrics": results["metrics"] }, indent=2) print(metrics_str) with open(os.path.join(args.output_dir, "val_metrics.json"), "w") as f: f.write(metrics_str) # HACK for MNLI-mismatched if task.name == "mnli": mm_val_example = MnliMismatchedProcessor().get_dev_examples( task.data_dir) mm_results = runner.run_val(mm_val_example, task_name=task.name, verbose=verbose) df = pd.DataFrame(results["logits"]) df.to_csv(os.path.join(args.output_dir, "mm_val_preds.csv"), header=False, index=False) combined_metrics = {} for k, v in results["metrics"].items(): combined_metrics[k] = v for k, v in mm_results["metrics"].items(): combined_metrics["mm-" + k] = v combined_metrics_str = json.dumps( { "loss": results["loss"], "metrics": combined_metrics, }, indent=2) print(combined_metrics_str) with open(os.path.join(args.output_dir, "val_metrics.json"), "w") as f: f.write(combined_metrics_str)