if topic_counter >= 0: new_data["data"].append(topic_dict) elif topic_counter >= -1 * dev_topic_num: new_dev_data["data"].append(topic_dict) elif topic_counter >= -1 * (dev_topic_num + test_topic_num): new_test_data["data"].append(topic_dict) else: break topic_counter -= 1 logger.info(f"Saving new data to {train_file}") save(filename=train_file, obj=new_data) logger.info(f"Saving new dev data to {dev_file}") save(filename=dev_file, obj=new_dev_data) logger.info(f"Saving new test data to {test_file}") save(filename=test_file, obj=new_test_data) if __name__ == "__main__": args = get_exp2_data_gen_args() log = get_logger(log_dir=args.logging_dir, name="data-gen") toy_transformer(in_file=args.raw_train_data, train_file=args.train_data_src, dev_file=args.dev_data_src, test_file=args.test_data_src, train_topic_num=args.train_topic_num, dev_topic_num=args.dev_topic_num, test_topic_num=args.test_topic_num, logger=log)
def main(args): # setting up logging logger = get_logger(log_dir=args.logging_dir, name="exp3_evaluation") # grabbing GPU gpu_ids = [] if torch.cuda.is_available(): gpu_ids += [gpu_id for gpu_id in range(torch.cuda.device_count())] device = torch.device(f'cuda:{gpu_ids[0]}') torch.cuda.set_device(device) else: device = torch.device('cpu') logger.info(f"Using device type: {device}") # getting word embeddings with open(args.word_emb_file, 'r') as fh: word_vectors = np.array(json.load(fh)) word_vectors = torch.from_numpy(word_vectors) # loading in the model model = classifier(args=args, word_vectors=word_vectors) model = nn.DataParallel(model, gpu_ids) ckpt_dict = torch.load("./checkpoints/train/exp3_train-34/best.pth.tar", map_location=device) model.load_state_dict(ckpt_dict['model_state']) dataset = qcd(data_path=args.dev_feature_file, num_categories=args.num_categories) loader = data.DataLoader(dataset, shuffle=True, batch_size=args.batch_size, collate_fn=collate_fn) # loading eval_file with open(args.dev_eval_file, 'r') as fh: gold_dict = json.load(fh) all_predicted_indexes = {} predicted_indexes = {} with torch.no_grad(): for qw_idxs, ids, topic_ids, lengths in loader: qw_idxs.to(device) ids.to(device) topic_ids.to(device) batch_size = qw_idxs.size(0) if batch_size != args.batch_size: logger.info( 'Did not process because did not meet batch_size threshold' ) continue targets = [torch.zeros(args.num_categories) for _ in topic_ids] targets = torch.stack(targets) for tid, t in zip(topic_ids, targets): t[tid] = 1 res = model(qw_idxs, lengths) predicted_indexes = { int(idx): int(torch.argmax(i)) for i, idx in zip(res, ids) } all_predicted_indexes.update(predicted_indexes) print( f"Was able to predict {len(all_predicted_indexes)}/{len(gold_dict)} total examples." ) correct = 0 total_eval = 0 for i in all_predicted_indexes: if i in gold_dict: if all_predicted_indexes[i] == gold_dict[i]: correct += 1 total_eval += 1 logger.info(f"Got {correct}/{total_eval} correct")
def main(args): # Set up logging and devices name = "train_exp2" args.save_dir = util.get_save_dir(args.logging_dir, name, training=True) log = get_logger(args.save_dir, name) tbx = SummaryWriter(args.save_dir) device, gpu_ids = util.get_available_devices() log.info(f"Args: {dumps(vars(args), indent=4, sort_keys=True)}") args.batch_size *= max(1, len(gpu_ids)) # Set random seed log.info(f"Using random seed {args.random_seed}...") random.seed(args.random_seed) np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) # Get embeddings log.info(f"Loading embeddings from {args.word_emb_file}...") word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info("Building model...") model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, gpu_ids) if args.load_path: log.info(f"Loading checkpoint from {args.load_path}...") model, step = util.load_model(model, args.load_path, gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.learning_rate, weight_decay=args.learning_rate_decay) # scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR scheduler = sched.ReduceLROnPlateau(optimizer=optimizer, mode="min", factor=0.1, patience=2, verbose=True, cooldown=0 min_lr=0.0005) for epoch in range(args.num_epochs): log.info(f"Starting epoch {epoch}...") for i in range(args.num_train_chunks): # Get data loader train_rec_file = f"{args.train_record_file_exp2}_{i}.npz" log.info(f'Building dataset from {train_rec_file} ...') train_dataset = SQuAD(train_rec_file, args.exp2_train_topic_contexts, use_v2=True) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = 0 # torch.set_num_threads(7) with torch.enable_grad(), tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = qw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f"Evaluating at step {step}...") ema.assign(model) for i in range(args.num_dev_chunks): # Get data loader all_pred_dicts = {} all_results = OrderedDict() dev_rec_file = f"{args.dev_record_file_exp2}_{i}.npz" log.info(f'Building evaluating dataset from {dev_rec_file} ...') dev_dataset = SQuAD(dev_rec_file, args.exp2_dev_topic_contexts, use_v2=True) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, use_squad_v2=True) all_results.update(results) all_pred_dicts.update(pred_dict) del dev_dataset del dev_loader del results del pred_dict torch.cuda.empty_cache() saver.save(step, model, all_results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in all_results.items()) log.info(f"Dev {results_str}") # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in all_results.items(): tbx.add_scalar(f"dev/{k}", v, step) util.visualize(tbx, pred_dict=all_pred_dicts, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals) torch.cuda.empty_cache() del train_dataset del train_loader torch.cuda.empty_cache()
def main(args): exp_name = "exp3_train" # setting up logging log = get_logger(args.logging_dir, exp_name) # setting a save directory save_dir = get_save_dir("./checkpoints", exp_name, training=True, id_max=200) # setting up tensor board tbx = SummaryWriter(save_dir) # setting up saver saver = CheckpointSaver(save_dir=save_dir, max_checkpoints=args.max_checkpoints, metric_name="BCELoss", log=log) # setting the random seed log.info(f"Using random seed {args.random_seed}...") random.seed(args.random_seed) np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) # grabbing a gpu if it is available gpu_ids = [] gpu_ids += [gpu_id for gpu_id in range(torch.cuda.device_count())] device = torch.device(f'cuda:{gpu_ids[0]}') torch.cuda.set_device(device) log.info(f"Using device type: {device}") # getting word embeddings with open(args.word_emb_file, 'r') as fh: word_vectors = np.array(json.load(fh)) word_vectors = torch.from_numpy(word_vectors) # setting up the datasets train_dataset = qcd(data_path=args.train_feature_file, num_categories=args.num_categories) train_loader = data.DataLoader(train_dataset, shuffle=True, batch_size=args.batch_size, collate_fn=collate_fn) dev_dataset = qcd(data_path=args.dev_feature_file, num_categories=args.num_categories) dev_loader = data.DataLoader(dev_dataset, shuffle=False, batch_size=args.batch_size, collate_fn=collate_fn) # setting up the model model = classifier(args=args, word_vectors=word_vectors) model = nn.DataParallel(model, gpu_ids) model.to(device) model.train() ema = EMA(model, args.ema_decay) # optimizer = optim.Adadelta(model.parameters(), args.learning_rate, # weight_decay=args.learning_rate_decay) # scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR optimizer = optim.SGD(model.parameters(), lr=args.learning_rate) step = 0 steps_till_eval = args.eval_steps log.info(f"Vars: {json.dumps(vars(args), indent=4, sort_keys=True)}") for epoch in range(args.num_epochs): log.info(f"Starting epoch {epoch+1}") with torch.enable_grad(), tqdm( total=len(train_loader.dataset)) as progress_bar: for qw_idxs, ids, topic_ids, lengths in train_loader: qw_idxs = qw_idxs.to(device) batch_size = qw_idxs.size(0) if batch_size != args.batch_size: log.info( 'Did not process because did not meet batch_size threshold' ) continue topic_ids = topic_ids.to(device) lengths = lengths.to(device) optimizer.zero_grad() # targets = [torch.zeros(args.num_categories) for _ in topic_ids] # targets = torch.stack(targets).to(device) # for tid, t in zip(topic_ids, targets): # t[tid] = 1 res = model(qw_idxs, lengths) # for loss, either nn.softmax_cross_entropy_with_logits or nn.BCELoss or nn.BCEWithLogitsLoss # not really sure why this is working and the others aren't # loss = nn.CrossEntropyLoss() # loss = nn.BCELoss() # loss = nn.BCEWithLogitsLoss() loss_output = F.nll_loss(F.log_softmax(res, dim=1), topic_ids) loss_output.backward() loss_val = loss_output.item() optimizer.step() # scheduler.step(step//batch_size) ema(model, step // batch_size) step += batch_size steps_till_eval -= batch_size progress_bar.update(batch_size) progress_bar.set_postfix(NLL=(loss_val), Epoch=(epoch + 1)) if steps_till_eval <= 0: steps_till_eval = args.eval_steps log.info(f"Evaluating at step: {step}") ema.assign(model) perc_correct, vis_examples, avg_loss = evaluate( model, dev_loader, device, args.dev_eval_file) log.info( f"Out of Sample BCE loss: {avg_loss} at step {step} in epoch {epoch+1}, resulting in {perc_correct} percent correct" ) tbx.add_scalar("NLL Loss", loss_val, step) tbx.add_scalar("Percent Accuracy", perc_correct, step) for i, example in enumerate(vis_examples): tbl_fmt = ( f'- **Question:** {example["question"]}\n' + f'- **Topic ID:** {example["answer"]}\n' + f'- **Prediction:** {example["prediction"]}') tbx.add_text(tag=f'{i}_of_{len(vis_examples)}', text_string=tbl_fmt, global_step=step) saver.save(model=model, step=step, epoch=epoch, metric_val=loss_val, device=device) ema.resume(model) model.to(device) model.train() log.info(f"resuming training on device {device}")
"answer_start"] answer_dict["text"] = answer["text"] qas_dict["answers"].append(answer_dict) paragraph["qas"].append(qas_dict) topic_dict["paragraphs"].append(paragraph) new_data["data"].append(topic_dict) logger.info(f"Processed {counter} question, answer pairs") logger.info(f"Saving to {out_file}") save(filename=out_file, obj=new_data) if __name__ == "__main__": args = get_exp1_transform_args() logger = get_logger(log_dir=args.logging_dir, name="exp_1 data transformer") # standard sanity check to run every time c, b = get_new_context("test", [ "test1", "test2", "test3", "test4", "test5", "test6", "test7", "test8", "test9" ]) test_val = "test" == c[b:b + 4] if test_val != True: raise ValueError('The get_new_context function is not working') if args.datasplit == "train" or args.datasplit == "all": exp_1_transformer(args.train_data_src, args.train_data_exp1, logger) if args.datasplit == "dev" or args.datasplit == "all": exp_1_transformer(args.dev_data_src, args.dev_data_exp1, logger) if args.datasplit == "test" or args.datasplit == "all":
out_file=args.dev_record_file_exp1, word2idx_dict=word2idx_dict, char2idx_dict=char2idx_dict, is_test=False) save(args.dev_meta_file, dev_meta) save(args.dev_eval_file, dev_eval) del dev_meta del dev_eval # test_examples, test_eval = process_file(filename=args.test_data_exp1, # data_type="test", # word_counter=word_counter, # char_counter=char_counter, # logger=logger) # test_meta = build_features(args=args, examples=test_examples, data_type="test", # out_file=args.test_record_file_exp1, word2idx_dict=word2idx_dict, # char2idx_dict=char2idx_dict, is_test=True) # save(args.test_meta_file, test_meta) # save(args.test_eval_file, test_eval) save(args.word2idx_file, word2idx_dict) save(args.char2idx_file, char2idx_dict) if __name__ == '__main__': nlp = spacy.blank("en") args = get_exp1_setup_args() logger = get_logger(log_dir=args.logging_dir, name="exp1_setup") pre_process(args=args, logger=logger)
def main(args): args.save_dir = util.get_save_dir(args.save_dir, "exp1_training", training=False) log = get_logger(args.logging_dir, "exp1_training") log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, c.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') dataset = SQuAD(args.test_record_file, True) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.datasplit} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission with open(args.test_eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, c.max_ans_len, True) # Log info progress_bar.update(batch_size) # Not using the unlabeled test set # if args.split != 'test': # # No labels for the test set, so NLL would be invalid # progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), True) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) results = util.eval_dicts(gold_dict, pred_dict, True) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.datasplit} {results_str}') # Log to TensorBoard tbx = SummaryWriter(c.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.test_eval_file, step=0, split=args.datasplit, num_visuals=args.num_visuals)