def get_scores(args, split='test'): if split == 'train': input_file = os.path.join(args.data_dir, args.csvtrain) filescores = os.path.join( args.data_dir, 'PriorScores/priorscores_answers_train.pckl') elif split == 'val': input_file = os.path.join(args.data_dir, args.csvval) filescores = os.path.join(args.data_dir, 'PriorScores/priorscores_answers_val.pckl') elif split == 'test': input_file = os.path.join(args.data_dir, args.csvtest) filescores = os.path.join(args.data_dir, 'PriorScores/priorscores_answers_test.pckl') if os.path.exists(filescores): return # Load Model tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) output_model_file = os.path.join(outdir, WEIGHTS_NAME) output_config_file = os.path.join(outdir, CONFIG_NAME) config = BertConfig(output_config_file) model = BertForMultipleChoice(config, num_choices=4) model.load_state_dict(torch.load(output_model_file)) model.to(args.device) n_gpu = torch.cuda.device_count() logger.info("device: {} n_gpu: {}".format(args.device, n_gpu)) if n_gpu > 1: model = torch.nn.DataParallel(model) # Data eval_examples = read_samples(input_file) eval_features = convert_to_features(eval_examples, tokenizer, args.max_seq_length) all_input_ids = torch.tensor(select_field(eval_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(select_field(eval_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(select_field(eval_features, 'segment_ids'), dtype=torch.long) all_labels = torch.tensor([example.label for example in eval_examples], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_labels) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) # Run prediction logger.info("***** Compute prior scores *****") logger.info("Num examples = %d", len(eval_examples)) logger.info("Batch size = %d", args.eval_batch_size) model.eval() batch_idx = 0 for _, batch in enumerate(tqdm(eval_dataloader, desc="Iteration")): input_ids, input_mask, segment_ids, truelabel = batch input_ids = input_ids.to(args.device) input_mask = input_mask.to(args.device) segment_ids = segment_ids.to(args.device) with torch.no_grad(): logits = model(input_ids, segment_ids, input_mask) logits = nn.functional.softmax(logits) logits = logits.detach().cpu().numpy() if batch_idx == 0: scores = logits else: scores = np.concatenate((scores, logits), axis=0) batch_idx += 1 if not os.path.exists(os.path.dirname(filescores)): os.mkdir(os.path.dirname(filescores)) utils.save_obj(scores, filescores) logger.info('Prior scores for %s saved into %s' % (split, filescores))
def main(): parser = argparse.ArgumentParser() #drive.mount('/content/gdrive') swagDir = './data' cacheDir = './cache/' saveDir = './save/cache/' modelDir = './save/cache/pytorch_model.bin' ## Required parameters parser.add_argument( "--data_dir", default=swagDir, type=str, #required=True, help= "The input data dir. Should contain the .csv files (or other data files) for the task." ) parser.add_argument( "--bert_model", default="bert-base-uncased", type=str, #required=True, help="Bert pre-trained model selected in the list: bert-base-uncased, " "bert-large-uncased, bert-base-cased, bert-large-cased, bert-base-multilingual-uncased, " "bert-base-multilingual-cased, bert-base-chinese.") parser.add_argument( "--output_dir", default=saveDir, type=str, #required=True, help="The output directory where the model checkpoints will be written." ) ## Other parameters parser.add_argument( "--max_seq_length", default=100, type=int, help= "The maximum total input sequence length after WordPiece tokenization. \n" "Sequences longer than this will be truncated, and sequences shorter \n" "than this will be padded.") parser.add_argument("--do_train", default=False, action='store_true', help="Whether to run training.") parser.add_argument("--do_eval", default=True, action='store_true', help="Whether to run eval on the dev set.") parser.add_argument( "--do_lower_case", action='store_true', help="Set this flag if you are using an uncased model.") parser.add_argument("--train_batch_size", default=1, type=int, help="Total batch size for training.") parser.add_argument("--eval_batch_size", default=1, type=int, help="Total batch size for eval.") parser.add_argument("--learning_rate", default=5e-5, type=float, help="The initial learning rate for Adam.") parser.add_argument("--num_train_epochs", default=3.0, type=float, help="Total number of training epochs to perform.") parser.add_argument( "--warmup_proportion", default=0.1, type=float, help= "Proportion of training to perform linear learning rate warmup for. " "E.g., 0.1 = 10%% of training.") parser.add_argument("--no_cuda", action='store_true', help="Whether not to use CUDA when available") parser.add_argument("--local_rank", type=int, default=-1, help="local_rank for distributed training on gpus") parser.add_argument('--seed', type=int, default=42, help="random seed for initialization") parser.add_argument( '--gradient_accumulation_steps', type=int, default=1, help= "Number of updates steps to accumulate before performing a backward/update pass." ) parser.add_argument( '--fp16', action='store_true', help="Whether to use 16-bit float precision instead of 32-bit") parser.add_argument( '--loss_scale', type=float, default=0, help= "Loss scaling to improve fp16 numeric stability. Only used when fp16 set to True.\n" "0 (default value): dynamic loss scaling.\n" "Positive power of 2: static loss scaling value.\n") args = parser.parse_args() device = torch.device("cuda") n_gpu = torch.cuda.device_count() """ if args.local_rank == -1 or args.no_cuda: device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu") n_gpu = torch.cuda.device_count() else: torch.cuda.set_device(args.local_rank) device = torch.device("cuda", args.local_rank) n_gpu = 1 # Initializes the distributed backend which will take care of sychronizing nodes/GPUs torch.distributed.init_process_group(backend='nccl') """ logger.info( "device: {} n_gpu: {}, distributed training: {}, 16-bits training: {}". format(device, n_gpu, bool(args.local_rank != -1), args.fp16)) if args.gradient_accumulation_steps < 1: raise ValueError( "Invalid gradient_accumulation_steps parameter: {}, should be >= 1" .format(args.gradient_accumulation_steps)) args.train_batch_size = args.train_batch_size // args.gradient_accumulation_steps random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) if n_gpu > 0: torch.cuda.manual_seed_all(args.seed) if not args.do_train and not args.do_eval: raise ValueError( "At least one of `do_train` or `do_eval` must be True.") """ if os.path.exists(args.output_dir) and os.listdir(args.output_dir): raise ValueError("Output directory ({}) already exists and is not empty.".format(args.output_dir)) if not os.path.exists(args.output_dir): os.makedirs(args.output_dir) """ tokenizer = BertTokenizer.from_pretrained(args.bert_model, do_lower_case=args.do_lower_case) train_examples = None num_train_optimization_steps = None # Prepare model model = BertForMultipleChoice.from_pretrained( args.bert_model, cache_dir=os.path.join(str(PYTORCH_PRETRAINED_BERT_CACHE), 'distributed_{}'.format(args.local_rank)), num_choices=4) model.to(device) if args.local_rank != -1: try: from apex.parallel import DistributedDataParallel as DDP except ImportError: raise ImportError( "Please install apex from https://www.github.com/nvidia/apex to use distributed and fp16 training." ) model = DDP(model) elif n_gpu > 1: model = torch.nn.DataParallel(model) # Prepare optimizer param_optimizer = list(model.named_parameters()) # hack to remove pooler, which is not used # thus it produce None grad that break apex param_optimizer = [n for n in param_optimizer if 'pooler' not in n[0]] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [{ 'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01 }, { 'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 'weight_decay': 0.0 }] """ if args.do_train: # Save a trained model and the associated configuration model_to_save = model.module if hasattr(model, 'module') else model # Only save the model it-self output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) torch.save(model_to_save.state_dict(), output_model_file) output_config_file = os.path.join(args.output_dir, CONFIG_NAME) with open(output_config_file, 'w') as f: f.write(model_to_save.config.to_json_string()) # Load a trained model and config that you have fine-tuned config = BertConfig(output_config_file) model = BertForMultipleChoice(config, num_choices=4) model.load_state_dict(torch.load(output_model_file)) else: model = BertForMultipleChoice.from_pretrained(args.bert_model, num_choices=4) model.to(device) """ #output_model_file = os.path.join(args.output_dir, WEIGHTS_NAME) ckpt1 = "./mrpcckpt/pytorch_model.bin" ckpt2 = "./mnlickpt/pytorch_model.bin" ckpt3 = "./sstckpt/pytorch_model.bin" output_config_file = os.path.join(args.output_dir, CONFIG_NAME) config = BertConfig(output_config_file) model1 = BertForMultipleChoice(config, num_choices=4).to(device) model1.load_state_dict(torch.load(ckpt1)) model2 = BertForMultipleChoice(config, num_choices=4).to(device) model2.load_state_dict(torch.load(ckpt2)) model3 = BertForMultipleChoice(config, num_choices=4).to(device) model3.load_state_dict(torch.load(ckpt3)) #print(device) if args.do_eval and (args.local_rank == -1 or torch.distributed.get_rank() == 0): eval_examples = read_swag_examples(os.path.join( args.data_dir, 'val.csv'), is_training=True) eval_features = convert_examples_to_features(eval_examples, tokenizer, args.max_seq_length, True) logger.info("***** Running evaluation *****") logger.info(" Num examples = %d", len(eval_examples)) logger.info(" Batch size = %d", args.eval_batch_size) all_input_ids = torch.tensor(select_field(eval_features, 'input_ids'), dtype=torch.long) all_input_mask = torch.tensor(select_field(eval_features, 'input_mask'), dtype=torch.long) all_segment_ids = torch.tensor(select_field(eval_features, 'segment_ids'), dtype=torch.long) all_label = torch.tensor([f.label for f in eval_features], dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label) # Run prediction for full data eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.eval_batch_size) model1.eval() model2.eval() model3.eval() eval_loss, eval_accuracy = 0, 0 nb_eval_steps, nb_eval_examples = 0, 0 evalcount = 0 for input_ids, input_mask, segment_ids, label_ids in tqdm( eval_dataloader, desc="Evaluating"): input_ids = input_ids.to(device) input_mask = input_mask.to(device) segment_ids = segment_ids.to(device) label_ids = label_ids.to(device) with torch.no_grad(): tmp_eval_loss = model(input_ids, segment_ids, input_mask, label_ids) logits1 = model1(input_ids, segment_ids, input_mask) logits2 = model2(input_ids, segment_ids, input_mask) logits3 = model3(input_ids, segment_ids, input_mask) logits = logits1 + logits2 + logits3 logits = logits.detach().cpu().numpy() label_ids = label_ids.to('cpu').numpy() tmp_eval_accuracy = accuracy(logits, label_ids) """ if tmp_eval_accuracy ==0: print("Wrong prediction example No. ", evalcount, "with prediction", np.argmax(logits, axis=1), "and groundtruth", label_ids) print("Context sentence: ") print(eval_examples[evalcount].context_sentence) print("Start ending:") print(eval_examples[evalcount].start_ending) print("Candidate answer 0:") print(eval_examples[evalcount].endings[0]) print("Candidate answer 1:") print(eval_examples[evalcount].endings[1]) print("Candidate answer 2:") print(eval_examples[evalcount].endings[2]) print("Candidate answer 3:") print(eval_examples[evalcount].endings[3]) print(eval_examples[evalcount].label) """ if tmp_eval_accuracy == 1: print(evalcount) eval_loss += tmp_eval_loss.mean().item() eval_accuracy += tmp_eval_accuracy nb_eval_examples += input_ids.size(0) nb_eval_steps += 1 evalcount += 1 eval_loss = eval_loss / nb_eval_steps eval_accuracy = eval_accuracy / nb_eval_examples result = { 'eval_loss': eval_loss, 'eval_accuracy': eval_accuracy, } output_eval_file = os.path.join(args.output_dir, "eval_results.txt") with open(output_eval_file, "w") as writer: logger.info("***** Eval results *****") for key in sorted(result.keys()): logger.info(" %s = %s", key, str(result[key])) writer.write("%s = %s\n" % (key, str(result[key])))