def main(argv): parser = argparse.ArgumentParser(description='') parser.add_argument('-d', '--device', default='gpu', choices=['gpu', 'cpu'], help='device to train the model with. Options: cpu or gpu. Default: gpu') parser.add_argument('-p', '--pretrainingslist', default=["checkpoints/tmc_ndq_roberta_IR_e2.pth", "checkpoints/tmc_ndq_roberta_NSP_e2.pth", "checkpoints/tmc_ndq_roberta_NN_e3.pth"], help='list of paths of the pretrainings model. They must be three. Default: checkpoints/tmc_ndq_roberta_IR_e2.pth, checkpoints/tmc_ndq_roberta_NSP_e2.pth, checkpoints/tmc_ndq_roberta_NN_e3.pth') parser.add_argument('-x', '--maxlen', default= 180, type=int, help='max sequence length. Default: 180') parser.add_argument('-b', '--batchsize', default= 32, type=int, help='size of the batches. Default: 512') args = parser.parse_args() print(args) models = [torch.load(args.pretrainingslist[0]), torch.load(args.pretrainingslist[1]), torch.load(args.pretrainingslist[2])] retrieval_solvers = ["IR", "NSP", "NN"] tokenizer = RobertaTokenizer.from_pretrained('roberta-large') max_len = args.maxlen batch_size = args.batchsize dataset_name = "ndq" feats_train = [] feats_test = [] for model, retrieval_solver in zip(models, retrieval_solvers): if args.device=="gpu": device = torch.device("cuda") model.cuda() if args.device=="cpu": device = torch.device("cpu") model.cpu() model.eval() print("\n") print(retrieval_solver) print("val") raw_data_train = get_data_ndq(dataset_name, "val", retrieval_solver, tokenizer, max_len) train_dataloader = process_data_ndq(raw_data_train, batch_size, "val") feats_train.append(validation_ndq(model, train_dataloader, device)) labels_train = raw_data_train[-1] print("test") raw_data_test = get_data_ndq(dataset_name, "test", retrieval_solver, tokenizer, max_len) test_dataloader = process_data_ndq(raw_data_test, batch_size, "test") feats_test.append(validation_ndq(model, test_dataloader, device)) labels_test = raw_data_test[-1] upper_bound_train = get_upper_bound(feats_train, labels_train) res = ensembler(feats_train, feats_test, labels_train, labels_test) print("\nFINAL RESULTS:") print("TEST SET: ") print(res) res = ensembler(feats_test, feats_train, labels_test, labels_train) print("VALIDATION SET: ") print(res)
def main(argv): parser = argparse.ArgumentParser(description='') required = parser.add_argument_group('required arguments') required.add_argument('-r', '--retrieval', choices=['IR', 'NSP', 'NN'] , help='retrieval solver for the contexts. Options: IR, NSP or NN', required=True) parser.add_argument('-t', '--dataset', default='ndq', choices=['ndq', 'dq'], help='dataset to train the model with. Options: ndq or dq. Default: ndq') parser.add_argument('-d', '--device', default='gpu', choices=['gpu', 'cpu'], help='device to train the model with. Options: cpu or gpu. Default: gpu') parser.add_argument('-p', '--pretrainings', default="checkpoints/pretrainings_e4.pth", help='path to the pretrainings model. If empty, the model will be the RobertForMultipleChoice with roberta-large weights. Default: checkpoints/pretrainings_e4.pth') parser.add_argument('-b', '--batchsize', default= 1, type=int, help='size of the batches. Default: 1') parser.add_argument('-x', '--maxlen', default= 180, type=int, help='max sequence length. Default: 180') parser.add_argument('-l', '--lr', default= 1e-5, type=float, help='learning rate. Default: 1e-5') parser.add_argument('-e', '--epochs', default= 4, type=int, help='number of epochs. Default: 4') parser.add_argument('-s', '--save', default=False, help='save model at the end of the training', action='store_true') args = parser.parse_args() print(args) if args.pretrainings == "": model = RobertaForMultipleChoice.from_pretrained("roberta-large") else: model = torch.load(args.pretrainings) tokenizer = RobertaTokenizer.from_pretrained('roberta-large') if args.device=="gpu": device = torch.device("cuda") model.cuda() if args.device=="cpu": device = torch.device("cpu") model.cpu() model.zero_grad() batch_size = args.batchsize max_len = args.maxlen lr = args.lr epochs = args.epochs retrieval_solver = args.retrieval save_model = args.save dataset_name = args.dataset raw_data_train = get_data_ndq(dataset_name, "train", retrieval_solver, tokenizer, max_len) raw_data_val = get_data_ndq(dataset_name, "val", retrieval_solver, tokenizer, max_len) train_dataloader = process_data_ndq(raw_data_train, batch_size, "train") val_dataloader = process_data_ndq(raw_data_val, batch_size, "val") optimizer = AdamW(model.parameters(), lr = lr, eps = 1e-8) total_steps = len(train_dataloader) * epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps = 0, num_training_steps = total_steps) training_ndq(model, train_dataloader, val_dataloader, optimizer, scheduler, epochs, retrieval_solver, device, save_model, dataset_name)
def main(argv): parser = argparse.ArgumentParser(description='') parser.add_argument( '-d', '--device', default='gpu', choices=['gpu', 'cpu'], help='device to train the model with. Options: cpu or gpu. Default: gpu' ) parser.add_argument( '-p', '--pretrainings', default='../checkpoints/RACE_e1.pth', help= 'path to the pretrainings model. Default: ../checkpoints/RACE_e1.pth') parser.add_argument('-b', '--batchsize', default=1, type=int, help='size of the batches. Default: 1') parser.add_argument('-x', '--maxlen', default=256, type=int, help='max sequence length. Default: 256') parser.add_argument('-l', '--lr', default=1e-5, type=float, help='learning rate. Default: 1e-5') parser.add_argument('-e', '--epochs', default=4, type=int, help='number of epochs. Default: 4') parser.add_argument('-s', '--save', default=False, help='save model at the end of the training', action='store_true') args = parser.parse_args() print(args) if args.pretrainings == "": model = RobertaForMultipleChoice.from_pretrained("roberta-large") else: model = torch.load(args.pretrainings) tokenizer = RobertaTokenizer.from_pretrained('roberta-large') if args.device == "gpu": device = torch.device("cuda") model.cuda() if args.device == "cpu": device = torch.device("cpu") model.cpu() model.zero_grad() batch_size = args.batchsize max_len = args.maxlen dataset_name = "pretrainings" lr = args.lr epochs = args.epochs save_model = args.save raw_data_train = get_data_pretrainings(dataset_name, "train", tokenizer, max_len) raw_data_val = get_data_pretrainings(dataset_name, "val", tokenizer, max_len) train_dataloader = process_data_ndq(raw_data_train, batch_size, "train") val_dataloader = process_data_ndq(raw_data_val, batch_size, "val") optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8) total_steps = len(train_dataloader) * epochs scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps) training_ndq(model, train_dataloader, val_dataloader, optimizer, scheduler, epochs, device, save_model, dataset_name)