def train_entry(config): from models import BiDAF with open(config.word_emb_file, "rb") as fh: word_mat = np.array(json.load(fh), dtype=np.float32) with open(config.char_emb_file, "rb") as fh: char_mat = np.array(json.load(fh), dtype=np.float32) with open(config.dev_eval_file, "r") as fh: dev_eval_file = json.load(fh) print("Building model...") train_dataset = get_loader(config.train_record_file, config.batch_size) dev_dataset = get_loader(config.dev_record_file, config.batch_size) c_vocab_size, c_emb_size = char_mat.shape model = BiDAF(word_mat, w_embedding_size=300, c_embeding_size=c_emb_size, c_vocab_size=c_vocab_size, hidden_size=100, drop_prob=0.2).to(device) if config.pretrained: print("load pre-trained model") state_dict = torch.load(config.save_path, map_location="cpu") model.load_state_dict(state_dict) ema = EMA(config.decay) for name, param in model.named_parameters(): if param.requires_grad: ema.register(name, param.data) parameters = filter(lambda param: param.requires_grad, model.parameters()) optimizer = optim.Adadelta(lr=0.5, params=parameters) best_f1 = 0 best_em = 0 patience = 0 for iter in range(config.num_epoch): train(model, optimizer, train_dataset, dev_dataset, dev_eval_file, iter, ema) ema.assign(model) metrics = test(model, dev_dataset, dev_eval_file, (iter + 1) * len(train_dataset)) dev_f1 = metrics["f1"] dev_em = metrics["exact_match"] if dev_f1 < best_f1 and dev_em < best_em: patience += 1 if patience > config.early_stop: break else: patience = 0 best_f1 = max(best_f1, dev_f1) best_em = max(best_em, dev_em) fn = os.path.join( config.save_dir, "model_{}_{:.2f}_{:.2f}.pt".format(iter, best_f1, best_em)) torch.save(model.state_dict(), fn) ema.resume(model)
def main(args): log = util.get_logger(args.save_dir, args.name) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Prepare BiDAF model (must already trained) log.info('Building BiDAF model (should be pretrained)') bidaf_model = BiDAF(word_vectors=word_vectors, # todo: these word vectors shouldn't matter? hidden_size=args.hidden_size) # since they will be loaded in during load_model? #drop_prob=args.drop_prob) # no drop probability since we are not training #bidaf_model = nn.DataParallel(bidaf_model, args.gpu_ids) #log.info(f'Loading checkpoint from {args.load_path}...') #bidaf_model = util.load_model(bidaf_model, args.load_path, args.gpu_ids, return_step=False) # don't need step since we aren't training #bidaf_model = bidaf_model.to(device) bidaf_model.eval() # we eval only (vs train) # Setup the Paraphraser model #ema = util.EMA(bidaf_model, args.ema_decay) # Get saver # saver = util.CheckpointSaver(args.save_dir, # max_checkpoints=args.max_checkpoints, # metric_name=args.metric_name, # maximize_metric=args.maximize_metric, # log=log) # Get optimizer and scheduler # optimizer = optim.Adadelta(model.parameters(), args.lr, # weight_decay=args.l2_wd) # scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader a = np.array([[5, 0, 3, 0], [4, 0, 1, 0]]) c_idx = torch.from_numpy(a).long() pp(c_idx[c_idx.nonzero()])
def predict(args, cw_idxs, qn_idxs): args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) log.info('Loading checkpoint from {}...'.format(args.load_path)) model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() y_pred = model(cw_idxs, qn_idxs) return y_pred
def get_model(word_vectors, char_vectors, log, args): if args.model_name == "BiDAF": model = BiDAF_char(word_vectors=word_vectors, char_vectors=char_vectors, char_len=args.char_len, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.model_name == "BiDAF_nochar": model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.model_name == "KnowGQA": model = KnowGQA(word_vectors=word_vectors, char_vectors=char_vectors, char_len=args.char_len, hidden_size=args.hidden_size, h=args.h, drop_prob=args.drop_prob) elif args.model_name == "QANet": model = QANet(word_vectors=word_vectors, char_vectors=char_vectors, char_len=args.char_len, hidden_size=args.hidden_size, h=args.h, drop_prob=args.drop_prob) else: raise ValueError("Model name doesn't exist.") model = nn.DataParallel(model, args.gpu_ids) ''' for p in model.parameters(): if p.dim() > 1: nn.init.xavier_uniform_(p) ''' if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 return model, step
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) ch_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, ch_vectors=ch_vectors, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.split} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f'{args.split}_eval_file'] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.split.title()} {results_str}') # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) # Writes entries directly to event files in the logdir to be consumed by TensorBoard. device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # 一个gpu: batch_size=64 看实际情况 # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: # default=None log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # ema_decay = 0.999 # ema core => new_average = (1.0 - decay) * param.data + decay * self.shadow[name] # Get saver # metric_name: NLL or EM F1 saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, # max_checkpoints = 5 metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) # lr : default=0.5 l2_wd : default=0 scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) # train_record_file = './data/train.npz' train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, # 64 shuffle=True, # sampler = RandomSampler(dataset) batch_sampler = BatchSampler(sampler, batch_size, drop_last) num_workers=args.num_workers, # 4 collate_fn=collate_fn) # merges a list of samples to form a mini-batch. dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) # dev_record_file = './data/dev.npz' dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn)# Merge examples of different length by padding all examples to the maximum length in the batch. # Train log.info('Training...') steps_till_eval = args.eval_steps # 50000 epoch = step // len(train_dataset) # len(train_dataset)= 7 epoch=0 while epoch != args.num_epochs: # 30 epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # 64 optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) # max_grad_norm : default=5.0 optimizer.step() # 进行1次optimize scheduler.step(step // batch_size)# train : step=0 ema(model, step // batch_size) # def __call__(self, model, num_updates): # Log info step += batch_size #step: 0 batch_size=64 progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) # Add scalar data to summary. tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # 50000 # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) ## results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, # './data/dev_eval.json' args.max_ans_len, # 15 args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging and devices name = "train_exp2" args.save_dir = util.get_save_dir(args.logging_dir, name, training=True) log = get_logger(args.save_dir, name) tbx = SummaryWriter(args.save_dir) device, gpu_ids = util.get_available_devices() log.info(f"Args: {dumps(vars(args), indent=4, sort_keys=True)}") args.batch_size *= max(1, len(gpu_ids)) # Set random seed log.info(f"Using random seed {args.random_seed}...") random.seed(args.random_seed) np.random.seed(args.random_seed) torch.manual_seed(args.random_seed) torch.cuda.manual_seed_all(args.random_seed) # Get embeddings log.info(f"Loading embeddings from {args.word_emb_file}...") word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info("Building model...") model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, gpu_ids) if args.load_path: log.info(f"Loading checkpoint from {args.load_path}...") model, step = util.load_model(model, args.load_path, gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.learning_rate, weight_decay=args.learning_rate_decay) # scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR scheduler = sched.ReduceLROnPlateau(optimizer=optimizer, mode="min", factor=0.1, patience=2, verbose=True, cooldown=0 min_lr=0.0005) for epoch in range(args.num_epochs): log.info(f"Starting epoch {epoch}...") for i in range(args.num_train_chunks): # Get data loader train_rec_file = f"{args.train_record_file_exp2}_{i}.npz" log.info(f'Building dataset from {train_rec_file} ...') train_dataset = SQuAD(train_rec_file, args.exp2_train_topic_contexts, use_v2=True) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = 0 # torch.set_num_threads(7) with torch.enable_grad(), tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = qw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f"Evaluating at step {step}...") ema.assign(model) for i in range(args.num_dev_chunks): # Get data loader all_pred_dicts = {} all_results = OrderedDict() dev_rec_file = f"{args.dev_record_file_exp2}_{i}.npz" log.info(f'Building evaluating dataset from {dev_rec_file} ...') dev_dataset = SQuAD(dev_rec_file, args.exp2_dev_topic_contexts, use_v2=True) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, use_squad_v2=True) all_results.update(results) all_pred_dicts.update(pred_dict) del dev_dataset del dev_loader del results del pred_dict torch.cuda.empty_cache() saver.save(step, model, all_results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in all_results.items()) log.info(f"Dev {results_str}") # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in all_results.items(): tbx.add_scalar(f"dev/{k}", v, step) util.visualize(tbx, pred_dict=all_pred_dicts, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals) torch.cuda.empty_cache() del train_dataset del train_loader torch.cuda.empty_cache()
def main(data, flags): # Set up logging and devices log_dir = data.logging_dir log = util.get_logger(log_dir, "toy") tbx = SummaryWriter(data.logging_dir) device, data.gpu_ids = util.get_available_devices() log.info('Config: {}'.format(dumps(vars(data), indent=4, sort_keys=True))) data.batch_size *= max(1, len(data.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(data.random_seed)) random.seed(data.random_seed) np.random.seed(data.random_seed) torch.manual_seed(data.random_seed) torch.cuda.manual_seed_all(data.random_seed) if flags[1] == "toy": word_emb_file = data.toy_word_emb_file training_data = data.toy_record_file_exp3 test_data = data.dev_record_file_exp3 eval_file = data.toy_eval_exp3 elif flags[1] == "train": word_emb_file = data.word_emb_file training_data = data.train_record_file_exp3 test_data = data.dev_record_file_exp3 eval_file = data.train_eval_exp3 elif flags[1] == "dev": word_emb_file = data.word_emb_file training_data = data.dev_record_file_exp3 test_data = data.toy_record_file_exp3 eval_file = data.dev_eval_exp3 # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=data.hidden_size, drop_prob=data.drop_prob) model = nn.DataParallel(model, data.gpu_ids) if data.load_path: log.info('Loading checkpoint from {}...'.format(data.load_path)) model, step = util.load_model(model, data.load_path, data.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, data.ema_decay) # Get saver saver = util.CheckpointSaver(data.logging_dir, max_checkpoints=10, metric_name=data.metric_name, maximize_metric=data.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), data.learning_rate, weight_decay=data.learning_weight_decay) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') # np.load(data.toy_record_file_exp3) train_dataset = SQuAD3(training_data, use_v2=True) train_loader = torchdata.DataLoader(train_dataset, batch_size=data.batch_size, shuffle=True, num_workers=data.num_workers, collate_fn=collate_fn) test_dataset = SQuAD3(test_data, use_v2=True) test_loader = torchdata.DataLoader(test_dataset, batch_size=data.batch_size, shuffle=False, num_workers=data.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = data.eval_steps epoch = step // len(test_dataset) while epoch != data.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log.info("cw_idxs length: {}".format(str(len(cw_idxs)))) log.info("qw_idxs length: {}".format(str(len(qw_idxs)))) log.info("cw_idxs size: {}".format(str( sys.getsizeof(cw_idxs)))) log.info("qw_idxs size: {}".format(str( sys.getsizeof(qw_idxs)))) log.info("cw_idxs shape: {}".format(str(cw_idxs.shape))) log.info("qw_idxs shape: {}".format(str(qw_idxs.shape))) log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), data.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('toy/NLL', loss_val, step) tbx.add_scalar('toy/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = data.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, test_loader, device, eval_path=eval_file, max_len=sys.maxsize, use_squad_v2=True) saver.save(step, model, results[data.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=step, split='dev', num_visuals=data.num_visuals)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) max_len = 10 # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) ch_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, ch_vectors=ch_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) #print("ckpt 1") ans_lens = y2 - y1 loss = 0 for i in range(max_len): mask = ((torch.ones_like(y1) * i) == ans_lens).type( torch.cuda.LongTensor) y = y1 * mask loss += F.nll_loss(log_p[:, :, i], y) #print("ckpt 2") loss_val = loss.item() #print("ckpt 3") # Backward loss.backward() #print("ckpt 4") nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) #print("ckpt 5") optimizer.step() #print("ckpt 6") scheduler.step(step // batch_size) ema(model, step // batch_size) #print("ckpt 7") # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) if step % (50 * batch_size) == 0: print(loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size #print("ckpt 8") if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed # To make the data generation of every experiment same log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) print(word_vectors.size()) print(char_vectors.size()) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader # (context_idxs(context_len,): Indices of the words in the context., # context_char_idx(context_len, max_word_len): Indices of the characters in the context, # question_idxs(question_len,): Indices of the words in the question, # question_char_idx(question_len, max_word_len): Indices of the characters in the question, # y1:start, -1 if no answer: answer start index, # y2:start, -1 if no answer: answer end index, # id ID of the example) log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward # cw_idxs: Indices of the words in the context # cc_idxs: Indices of the characters in the context # qw_idxs: Indices of the words in the query # qc_idxs: Indices of the characters in teh query cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) # cw_idx with shape(context_len, ) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) # L(theta) = - 1/N * sum(log(P1_yi_1) + log(P2_yi_2)) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): args.save_dir = util.get_save_dir(args.save_dir, "exp1_training", training=False) log = get_logger(args.logging_dir, "exp1_training") log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, c.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') dataset = SQuAD(args.test_record_file, True) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.datasplit} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission with open(args.test_eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, c.max_ans_len, True) # Log info progress_bar.update(batch_size) # Not using the unlabeled test set # if args.split != 'test': # # No labels for the test set, so NLL would be invalid # progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), True) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) results = util.eval_dicts(gold_dict, pred_dict, True) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.datasplit} {results_str}') # Log to TensorBoard tbx = SummaryWriter(c.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.test_eval_file, step=0, split=args.datasplit, num_visuals=args.num_visuals)
def main(args): # Load TF-IDF from pickle scorer = TFIDF([]) scorer.get_from_pickle() # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get data loader log.info('Building dataset...') record_file = vars(args)['{}_record_file'.format(args.split)] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, char_vocab_size= 1376, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) log.info('Loading checkpoint from {}...'.format(args.load_path)) model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Evaluate log.info('Evaluating on {} split...'.format(args.split)) nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)['{}_eval_file'.format(args.split)] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs,qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) if (args.use_tfidf): # Apply TF-IDF filtering to pred_dict tf_idf_threshold = 2 tf_idf_common_threshold = 1 for key, value in pred_dict.items(): if value != "": tf_idf_score = scorer.normalized_additive_idf_ignore_common_words( value, threshold_frequency=tf_idf_common_threshold) if tf_idf_score < tf_idf_threshold: pred_dict[key] = '' pass # print ("pred_dict: {}, pruned".format(tf_idf_score)) else: pass # print ("pred_dict: {}, kept".format(tf_idf_score)) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('{} {}'.format(args.split.title(), results_str)) # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info('Writing submission file to {}...'.format(sub_path)) with open(sub_path, 'w') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') ''' model = QANet(word_vectors, args.hidden_size, args.char_embed_size, args.word_from_char_size, args.dropout_main, args.embed_encoder_num_convs, args.embed_encoder_conv_kernel_size, args.embed_encoder_num_heads, args.embed_encoder_num_blocks, args.model_encoder_num_convs, args.model_encoder_conv_kernel_size, args.model_encoder_num_heads, args.model_encoder_num_blocks) ''' char_vectors = util.torch_from_json(args.char_emb_file) model = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler # Increase LR from 0 to args.lr in num_warmup_steps steps, then keep constant LR optimizer = optim.Adam(model.parameters(), args.lr, betas=(0.8, 0.999), eps=1e-07, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: math.log(1+s)/math.log(args.num_warmup_steps) if s < args.num_warmup_steps else 1) # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # NEW : load the tag embeddings pos_vectors = util.torch_from_json(args.pos_emb_file) ner_vectors = util.torch_from_json(args.ner_emb_file) # Add loss if 'loss' in args.name: distance_criterion = DistanceFromAnswerLoss(coefficient=.5, device=device, normalization=True, penalization_type='quadratic', reduction='mean') # Choose model log.info('Building model {}...'.format(args.name)) if 'baseline' in args.name: model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'BiDAF_char': model = BiDAF_char(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif (args.name == 'BiDAF_tag') or (args.name == 'BiDAF_tag_loss'): model = BiDAF_tag(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif (args.name == 'BiDAF_tag_unfrozen') or (args.name == 'BiDAF_tag_unfrozen_loss'): model = BiDAF_tag(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob, freeze_tag=False) elif args.name == 'BiDAF_tag_ext': model = BiDAF_tag_ext(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'BiDAF_tag_ext_unfrozen': model = BiDAF_tag_ext(word_vectors=word_vectors, char_vectors=char_vectors, pos_vectors=pos_vectors, ner_vectors=ner_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob, freeze_tag=False) elif args.name == 'coattn': model = CoattentionModel(hidden_dim=args.hidden_size, embedding_matrix=word_vectors, train_word_embeddings=False, dropout=0.35, pooling_size=16, number_of_iters=4, number_of_layers=2, device=device) else: raise NameError('No model named ' + args.name) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn, drop_last=True) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn, drop_last=True) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, cpos_idxs, cner_idxs, cw_ems, cw_tfs, qw_idxs, qc_idxs, qpos_idxs, qner_idxs, qw_ems, qw_tfs, y1, y2, ids in train_loader: # NEW # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward if 'baseline' in args.name: log_p1, log_p2 = model(cw_idxs, qw_idxs) elif args.name == 'BiDAF_char': # Additional setup for forward cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) elif (args.name == 'BiDAF_tag') or (args.name == 'BiDAF_tag_unfrozen') or (args.name == 'BiDAF_tag_loss') or (args.name == 'BiDAF_tag_unfrozen_loss'): # Additional setup for forward cc_idxs = cc_idxs.to(device) cpos_idxs = cpos_idxs.to(device) cner_idxs = cner_idxs.to(device) qc_idxs = qc_idxs.to(device) qpos_idxs = qpos_idxs.to(device) qner_idxs = qner_idxs.to(device) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs, cpos_idxs, qpos_idxs, cner_idxs, qner_idxs) elif (args.name == 'BiDAF_tag_ext') or (args.name == 'BiDAF_tag_ext_unfrozen'): # Additional setup for forward cc_idxs = cc_idxs.to(device) cpos_idxs = cpos_idxs.to(device) cner_idxs = cner_idxs.to(device) cw_ems = cw_ems.to(device) cw_tfs = cw_tfs.to(device) qc_idxs = qc_idxs.to(device) qpos_idxs = qpos_idxs.to(device) qner_idxs = qner_idxs.to(device) qw_ems = qw_ems.to(device) qw_tfs = qw_tfs.to(device) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs, cpos_idxs, qpos_idxs, cner_idxs, qner_idxs, cw_ems, qw_ems, cw_tfs, qw_tfs) elif args.name == 'coattn': max_c_len = cw_idxs.size(1) max_q_len = qw_idxs.size(1) c_len = [] q_len = [] for i in range(cw_idxs.size(0)): if len((cw_idxs[i] == 0).nonzero()) != 0: c_len_i = (cw_idxs[i] == 0).nonzero()[0].item() else: c_len_i = cw_idxs.size(1) if len((qw_idxs[i] == 0).nonzero()) != 0: q_len_i = (qw_idxs[i] == 0).nonzero()[0].item() else: q_len_i = qw_idxs.size(1) c_len.append(int(c_len_i)) q_len.append(int(q_len_i)) c_len = torch.Tensor(c_len).int() q_len = torch.Tensor(q_len).int() num_examples = int(cw_idxs.size(0) / len(args.gpu_ids)) log_p1, log_p2 = model(max_c_len, max_q_len, cw_idxs, qw_idxs, c_len, q_len, num_examples, True, True) else: raise NameError('No model named ' + args.name) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) # Add distance penalization if 'loss' in args.name: loss += distance_criterion(log_p1, y1) + distance_criterion(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2, args.name, args.gpu_ids) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') nbr_model = 0 if (args.load_path_baseline): model_baseline = Baseline(word_vectors=word_vectors, hidden_size=100) model_baseline = nn.DataParallel(model_baseline, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_baseline}...') model_baseline = util.load_model(model_baseline, args.load_path_baseline, gpu_ids, return_step=False) model_baseline = model_baseline.to(device) model_baseline.eval() nll_meter_baseline = util.AverageMeter() nbr_model += 1 save_prob_baseline_start = [] save_prob_baseline_end = [] if (args.load_path_bidaf): model_bidaf = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size) model_bidaf = nn.DataParallel(model_bidaf, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_bidaf}...') model_bidaf = util.load_model(model_bidaf, args.load_path_bidaf, gpu_ids, return_step=False) model_bidaf = model_bidaf.to(device) model_bidaf.eval() nll_meter_bidaf = util.AverageMeter() nbr_model += 1 save_prob_bidaf_start = [] save_prob_bidaf_end = [] if (args.load_path_bidaf_fusion): model_bidaf_fu = BiDAF_fus(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size) model_bidaf_fu = nn.DataParallel(model_bidaf_fu, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_bidaf_fusion}...') model_bidaf_fu = util.load_model(model_bidaf_fu, args.load_path_bidaf_fusion, gpu_ids, return_step=False) model_bidaf_fu = model_bidaf_fu.to(device) model_bidaf_fu.eval() nll_meter_bidaf_fu = util.AverageMeter() nbr_model += 1 save_prob_bidaf_fu_start = [] save_prob_bidaf_fu_end = [] if (args.load_path_qanet): model_qanet = QANet(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks, divisor_dim_kqv=args.divisor_dim_kqv) model_qanet = nn.DataParallel(model_qanet, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet}...') model_qanet = util.load_model(model_qanet, args.load_path_qanet, gpu_ids, return_step=False) model_qanet = model_qanet.to(device) model_qanet.eval() nll_meter_qanet = util.AverageMeter() nbr_model += 1 save_prob_qanet_start = [] save_prob_qanet_end = [] if (args.load_path_qanet_old): model_qanet_old = QANet_old(word_vectors=word_vectors, char_vectors=char_vectors, device=device, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks) model_qanet_old = nn.DataParallel(model_qanet_old, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet_old}...') model_qanet_old = util.load_model(model_qanet_old, args.load_path_qanet_old, gpu_ids, return_step=False) model_qanet_old = model_qanet_old.to(device) model_qanet_old.eval() nll_meter_qanet_old = util.AverageMeter() nbr_model += 1 save_prob_qanet_old_start = [] save_prob_qanet_old_end = [] if (args.load_path_qanet_inde): model_qanet_inde = QANet_independant_encoder( word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks, divisor_dim_kqv=args.divisor_dim_kqv) model_qanet_inde = nn.DataParallel(model_qanet_inde, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet_inde}...') model_qanet_inde = util.load_model(model_qanet_inde, args.load_path_qanet_inde, gpu_ids, return_step=False) model_qanet_inde = model_qanet_inde.to(device) model_qanet_inde.eval() nll_meter_qanet_inde = util.AverageMeter() nbr_model += 1 save_prob_qanet_inde_start = [] save_prob_qanet_inde_end = [] if (args.load_path_qanet_s_e): model_qanet_s_e = QANet_S_E(word_vectors=word_vectors, char_vectors=char_vectors, char_emb_dim=args.char_emb_dim, hidden_size=args.hidden_size, n_heads=args.n_heads, n_conv_emb_enc=args.n_conv_emb, n_conv_mod_enc=args.n_conv_mod, n_emb_enc_blocks=args.n_emb_blocks, n_mod_enc_blocks=args.n_mod_blocks, divisor_dim_kqv=args.divisor_dim_kqv) model_qanet_s_e = nn.DataParallel(model_qanet_s_e, gpu_ids) log.info(f'Loading checkpoint from {args.load_path_qanet_s_e}...') model_qanet_s_e = util.load_model(model_qanet_s_e, args.load_path_qanet_s_e, gpu_ids, return_step=False) model_qanet_s_e = model_qanet_s_e.to(device) model_qanet_s_e.eval() nll_meter_qanet_s_e = util.AverageMeter() nbr_model += 1 save_prob_qanet_s_e_start = [] save_prob_qanet_s_e_end = [] # Get data loader log.info('Building dataset...') record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.split} split...') pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f'{args.split}_eval_file'] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) batch_size = cw_idxs.size(0) y1, y2 = y1.to(device), y2.to(device) l_p1, l_p2 = [], [] # Forward if (args.load_path_baseline): log_p1_baseline, log_p2_baseline = model_baseline( cw_idxs, cc_idxs) loss_baseline = F.nll_loss(log_p1_baseline, y1) + F.nll_loss( log_p2_baseline, y2) nll_meter_baseline.update(loss_baseline.item(), batch_size) l_p1 += [log_p1_baseline.exp()] l_p2 += [log_p2_baseline.exp()] if (args.save_probabilities): save_prob_baseline_start += [ log_p1_baseline.exp().detach().cpu().numpy() ] save_prob_baseline_end += [ log_p2_baseline.exp().detach().cpu().numpy() ] if (args.load_path_qanet): log_p1_qanet, log_p2_qanet = model_qanet( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet = F.nll_loss(log_p1_qanet, y1) + F.nll_loss( log_p2_qanet, y2) nll_meter_qanet.update(loss_qanet.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet.exp()] l_p2 += [log_p2_qanet.exp()] if (args.save_probabilities): save_prob_qanet_start += [ log_p1_qanet.exp().detach().cpu().numpy() ] save_prob_qanet_end += [ log_p2_qanet.exp().detach().cpu().numpy() ] if (args.load_path_qanet_old): log_p1_qanet_old, log_p2_qanet_old = model_qanet_old( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet_old = F.nll_loss(log_p1_qanet_old, y1) + F.nll_loss( log_p2_qanet_old, y2) nll_meter_qanet_old.update(loss_qanet_old.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet_old.exp()] l_p2 += [log_p2_qanet_old.exp()] if (args.save_probabilities): save_prob_qanet_old_start += [ log_p1_qanet_old.exp().detach().cpu().numpy() ] save_prob_qanet_old_end += [ log_p2_qanet_old.exp().detach().cpu().numpy() ] if (args.load_path_qanet_inde): log_p1_qanet_inde, log_p2_qanet_inde = model_qanet_inde( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet_inde = F.nll_loss( log_p1_qanet_inde, y1) + F.nll_loss(log_p2_qanet_inde, y2) nll_meter_qanet_inde.update(loss_qanet_inde.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet_inde.exp()] l_p2 += [log_p2_qanet_inde.exp()] if (args.save_probabilities): save_prob_qanet_inde_start += [ log_p1_qanet_inde.exp().detach().cpu().numpy() ] save_prob_qanet_inde_end += [ log_p2_qanet_inde.exp().detach().cpu().numpy() ] if (args.load_path_qanet_s_e): log_p1_qanet_s_e, log_p2_qanet_s_e = model_qanet_s_e( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_qanet_s_e = F.nll_loss(log_p1_qanet_s_e, y1) + F.nll_loss( log_p2_qanet_s_e, y2) nll_meter_qanet_s_e.update(loss_qanet_s_e.item(), batch_size) # Get F1 and EM scores l_p1 += [log_p1_qanet_s_e.exp()] l_p2 += [log_p2_qanet_s_e.exp()] if (args.save_probabilities): save_prob_qanet_s_e_start += [ log_p1_qanet_s_e.exp().detach().cpu().numpy() ] save_prob_qanet_s_e_end += [ log_p2_qanet_s_e.exp().detach().cpu().numpy() ] if (args.load_path_bidaf): log_p1_bidaf, log_p2_bidaf = model_bidaf( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_bidaf = F.nll_loss(log_p1_bidaf, y1) + F.nll_loss( log_p2_bidaf, y2) nll_meter_bidaf.update(loss_bidaf.item(), batch_size) l_p1 += [log_p1_bidaf.exp()] l_p2 += [log_p2_bidaf.exp()] if (args.save_probabilities): save_prob_bidaf_start += [ log_p1_bidaf.exp().detach().cpu().numpy() ] save_prob_bidaf_end += [ log_p2_bidaf.exp().detach().cpu().numpy() ] if (args.load_path_bidaf_fusion): log_p1_bidaf_fu, log_p2_bidaf_fu = model_bidaf_fu( cw_idxs, cc_idxs, qw_idxs, qc_idxs) loss_bidaf_fu = F.nll_loss(log_p1_bidaf_fu, y1) + F.nll_loss( log_p2_bidaf_fu, y2) nll_meter_bidaf_fu.update(loss_bidaf_fu.item(), batch_size) l_p1 += [log_p1_bidaf_fu.exp()] l_p2 += [log_p2_bidaf_fu.exp()] if (args.save_probabilities): save_prob_bidaf_fu_start += [ log_p1_bidaf_fu.exp().detach().cpu().numpy() ] save_prob_bidaf_fu_end += [ log_p2_bidaf_fu.exp().detach().cpu().numpy() ] p1, p2 = l_p1[0], l_p2[0] for i in range(1, nbr_model): p1 += l_p1[i] p2 += l_p2[i] p1 /= nbr_model p2 /= nbr_model starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid if (args.load_path_qanet): progress_bar.set_postfix(NLL=nll_meter_qanet.avg) elif (args.load_path_bidaf): progress_bar.set_postfix(NLL=nll_meter_bidaf.avg) elif (args.load_path_bidaf_fusion): progress_bar.set_postfix(NLL=nll_meter_bidaf_fu.avg) elif (args.load_path_qanet_old): progress_bar.set_postfix(NLL=nll_meter_qanet_old.avg) elif (args.load_path_qanet_inde): progress_bar.set_postfix(NLL=nll_meter_qanet_inde.avg) elif (args.load_path_qanet_s_e): progress_bar.set_postfix(NLL=nll_meter_qanet_s_e.avg) else: progress_bar.set_postfix(NLL=nll_meter_baseline.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) if (args.save_probabilities): if (args.load_path_baseline): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_baseline_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_baseline_end, fp) if (args.load_path_bidaf): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_end, fp) if (args.load_path_bidaf_fusion): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_fu_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_bidaf_fu_end, fp) if (args.load_path_qanet): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_end, fp) if (args.load_path_qanet_old): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_old_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_old_end, fp) if (args.load_path_qanet_inde): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_inde_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_inde_end, fp) if (args.load_path_qanet_s_e): with open(args.save_dir + "/probs_start", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_s_e_start, fp) with open(args.save_dir + "/probs_end", "wb") as fp: #Pickling pickle.dump(save_prob_qanet_s_e_end, fp) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) if (args.load_path_qanet): meter_avg = nll_meter_qanet.avg elif (args.load_path_bidaf): meter_avg = nll_meter_bidaf.avg elif (args.load_path_bidaf_fusion): meter_avg = nll_meter_bidaf_fu.avg elif (args.load_path_qanet_inde): meter_avg = nll_meter_qanet_inde.avg elif (args.load_path_qanet_s_e): meter_avg = nll_meter_qanet_s_e.avg elif (args.load_path_qanet_old): meter_avg = nll_meter_qanet_old.avg else: meter_avg = nll_meter_baseline.avg results_list = [('NLL', meter_avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.split.title()} {results_str}') # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vec = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') if args.name == 'baseline': model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'charembeddings': model = BiDAFChar(word_vectors=word_vectors, char_vec=char_vec, word_len=16, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'charembeddings2': model = BiDAFChar2(word_vectors=word_vectors, char_vec=char_vec, word_len=16, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif args.name == 'qanet': model = QANet(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, total_prob=args.total_drop, final_prob=args.final_prob) elif args.name == 'qanet2': model = QANet2(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, rel=args.rel_att, total_prob=args.total_drop, final_prob=args.final_prob, freeze=args.freeze_emb) elif args.name == 'qanet3': model = QANet3(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, rel=args.rel_att, total_prob=args.total_drop, final_prob=args.final_prob, freeze=args.freeze_emb) elif args.name == 'qanet4': model = QANet4(word_vectors=word_vectors, char_vec=char_vec, word_len=16, emb_size=args.hidden_size, drop_prob=args.drop_prob, enc_size=args.enc_size, n_head=args.n_head, LN_train=args.ln_train, DP_residual=args.dp_res, mask_pos=args.mask_pos, two_pos=args.two_pos, rel=args.rel_att, total_prob=args.total_drop, final_prob=args.final_prob, freeze=args.freeze_emb) else: raise ValueError('Wrong model name') model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler if args.name == 'qanet': optimizer = optim.Adam(model.parameters(), args.lr, betas=(0.8, 0.999), weight_decay=3 * 1e-7, eps=1e-7) scheduler = warmup(optimizer, 1, 2000) elif args.opt == 'adam': if args.grad_cent: optimizer = AdamWGC(model.parameters(), args.lr, betas=(0.9, 0.999), weight_decay=3 * 1e-7, eps=1e-7, use_gc=True) else: optimizer = AdamW(model.parameters(), args.lr, betas=(0.8, 0.999), weight_decay=3 * 1e-7, eps=1e-7) scheduler = warmup(optimizer, 1, 2000) elif args.opt == 'adadelta': optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=3 * 1e-7) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR elif args.opt == 'sgd': optimizer = optim.SGD(model.parameters(), args.lr, weight_decay=3 * 1e-7) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) i = 0 while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, cc_idxs, qw_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() i += 1 loss /= args.acc_step # Backward loss.backward() if i % args.acc_step == 0: nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(i // (args.acc_step)) ema(model, i // (args.acc_step)) optimizer.zero_grad() # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0 and i % args.acc_step == 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): save_dir = os.path.join("./save", time.strftime("%m%d%H%M%S")) if not os.path.exists(save_dir): os.makedirs(save_dir) tokenizer = BertTokenizer.from_pretrained("bert-base-uncased") if args.all_data: data_loader = get_ext_data_loader(tokenizer, "./data/train/", shuffle=True, args=args) else: data_loader, _, _ = get_data_loader(tokenizer, "./data/train-v1.1.json", shuffle=True, args=args) vocab_size = len(tokenizer.vocab) if args.bidaf: print("train bidaf") model = BiDAF(embedding_size=args.embedding_size, vocab_size=vocab_size, hidden_size=args.hidden_size, drop_prob=args.dropout) else: ntokens = len(tokenizer.vocab) model = QANet(ntokens, embedding=args.embedding, embedding_size=args.embedding_size, hidden_size=args.hidden_size, num_head=args.num_head) if args.load_model: state_dict = torch.load(args.model_path, map_location="cpu") model.load_state_dict(state_dict) print("load pre-trained model") device = torch.device("cuda") model = model.to(device) model.train() ema = EMA(model, args.decay) base_lr = 1 parameters = filter(lambda param: param.requires_grad, model.parameters()) optimizer = optim.Adam(lr=base_lr, betas=(0.9, 0.999), eps=1e-7, weight_decay=5e-8, params=parameters) cr = args.lr / math.log2(args.lr_warm_up_num) scheduler = optim.lr_scheduler.LambdaLR( optimizer, lr_lambda=lambda ee: cr * math.log2(ee + 1) if ee < args.lr_warm_up_num else args.lr) step = 0 num_batches = len(data_loader) avg_loss = 0 best_f1 = 0 for epoch in range(1, args.num_epochs + 1): step += 1 start = time.time() model.train() for i, batch in enumerate(data_loader, start=1): c_ids, q_ids, start_positions, end_positions = batch c_len = torch.sum(torch.sign(c_ids), 1) max_c_len = torch.max(c_len) c_ids = c_ids[:, :max_c_len].to(device) q_len = torch.sum(torch.sign(q_ids), 1) max_q_len = torch.max(q_len) q_ids = q_ids[:, :max_q_len].to(device) start_positions = start_positions.to(device) end_positions = end_positions.to(device) optimizer.zero_grad() loss = model(c_ids, q_ids, start_positions=start_positions, end_positions=end_positions) loss.backward() avg_loss = cal_running_avg_loss(loss.item(), avg_loss) nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step) ema(model, step // args.batch_size) batch_size = c_ids.size(0) step += batch_size msg = "{}/{} {} - ETA : {} - qa_loss: {:.4f}" \ .format(i, num_batches, progress_bar(i, num_batches), eta(start, i, num_batches), avg_loss) print(msg, end="\r") if not args.debug: metric_dict = eval_qa(args, model) f1 = metric_dict["f1"] em = metric_dict["exact_match"] print("epoch: {}, final loss: {:.4f}, F1:{:.2f}, EM:{:.2f}".format( epoch, avg_loss, f1, em)) if args.bidaf: model_name = "bidaf" else: model_name = "qanet" if f1 > best_f1: best_f1 = f1 state_dict = model.state_dict() save_file = "{}_{:.2f}_{:.2f}".format(model_name, f1, em) path = os.path.join(save_dir, save_file) torch.save(state_dict, path)
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) #log = util.get_logger(args.save_dir, args.name) #log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings print('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model print('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) model = nn.DataParallel(model, gpu_ids) print(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader print('Building dataset...') #record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD("./data/my_test.npz", args.use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate print(f'Evaluating on {args.split} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission #eval_file = vars(args)[f'{args.split}_eval_file'] with open("./data/my_test_eval.json", 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: print("viewing the dataset") print(cw_idxs, cc_idxs, qw_idxs, qc_idxs) # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) #if args.split != 'test': # No labels for the test set, so NLL would be invalid #progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) print("my evaluation ....") for el in pred_dict: print(el, pred_dict[el]) for el in sub_dict: print(el, sub_dict[el])
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') if args.model == 'bidaf': model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) elif args.model == 'bidafextra': model = BiDAFExtra(word_vectors=word_vectors, args=args) elif args.model == 'fusionnet': model = FusionNet(word_vectors=word_vectors, args=args) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info('Building dataset...') record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD(record_file, args) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # print("*"*80) # print(len(dataset.question_idxs)) # for question_idx in dataset.question_idxs: # print(question_idx) # print("*" * 80) # print(self.question_idxs[question_idx]) # self.question_idxs[idx] # print("data_loader: ",data_loader) # Evaluate log.info(f'Evaluating on {args.split} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f'{args.split}_eval_file'] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) # create statistics # print("*"*80) # print(len(gold_dict)) # print(gold_dict['1']['question']) count_questions_type = defaultdict(lambda: 0) audit_trail_from_question_type = defaultdict(lambda: []) list_of_interrogative_pronouns = [ "what", "whose", "why", "which", "where", "when", "how", "who", "whom" ] for index in range(1, len(gold_dict)): # transform the question in lower case to simplify the analysis, thus losing the benefit of the capital letters # possibly indicating the position of the interrogative pronoun in the sentence. question_lower_case = gold_dict[str(index)]['question'].lower() list_question_lower_case_with_punctuation = question_lower_case.translate( {ord(i): " " for i in "'"}).split() # question_lower_case = [] for item in list_question_lower_case_with_punctuation: question_lower_case.append( item.translate({ord(i): "" for i in ",.<>!@£$%^&*()_-+=?"})) # defining a variable for the first word first_word_question_lower_case = question_lower_case[0] # defining variable for the second word second_word_question_lower_case = question_lower_case[1] # defining variable for the first and second word combined_first_and_second_words = first_word_question_lower_case + " " + second_word_question_lower_case #printing on the screen test for debugging purpose # Analyzing the sentence if first_word_question_lower_case in list_of_interrogative_pronouns: count_questions_type[first_word_question_lower_case] += 1 audit_trail_from_question_type[ first_word_question_lower_case].append(str(index)) # composed question starting by in elif first_word_question_lower_case == "in": if second_word_question_lower_case in list_of_interrogative_pronouns and second_word_question_lower_case != "whose": count_questions_type[combined_first_and_second_words] += 1 audit_trail_from_question_type[ combined_first_and_second_words].append(str(index)) else: pronoun = find_first_interrogative_pronoun( list_of_interrogative_pronouns, question_lower_case) count_questions_type[pronoun] += 1 audit_trail_from_question_type[pronoun].append(str(index)) # composed question starting by by elif first_word_question_lower_case == "by": if second_word_question_lower_case in list_of_interrogative_pronouns \ and second_word_question_lower_case !="whom"\ and second_word_question_lower_case !="which"\ and second_word_question_lower_case !="when"\ and second_word_question_lower_case !="how": count_questions_type[combined_first_and_second_words] += 1 audit_trail_from_question_type[ combined_first_and_second_words].append(str(index)) else: pronoun = find_first_interrogative_pronoun( list_of_interrogative_pronouns, question_lower_case) count_questions_type[pronoun] += 1 audit_trail_from_question_type[pronoun].append(str(index)) else: pronoun = find_first_interrogative_pronoun( list_of_interrogative_pronouns, question_lower_case) #if pronoun =="": # print(">>", question_lower_case) # print("@@@", gold_dict[str(index)]['question']) count_questions_type[pronoun] += 1 audit_trail_from_question_type[pronoun].append(str(index)) # if pronoun =="": # print(">>", question_lower_case.split()) # print() #if first_word_question_lower_case == "if": # print(">>", question_lower_case.split()) # print(count_questions_type) # if gold_dict[str(index)]['question'].lower().split()[0] == "in": # print(gold_dict[str(index)]['question']) reverse_dict_by_value = OrderedDict( sorted(count_questions_type.items(), key=lambda x: x[1])) # print(count_questions_type) total_questions = sum(count_questions_type.values()) # print(reverse_dict) #for k, v in reverse_dict_by_value.items(): # print( "%s: %s and in percentage: %s" % (k, v, 100*v/total_questions)) #print(audit_trail_from_question_type) # exit() with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, cw_pos, cw_ner, cw_freq, cqw_extra, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward if args.model == 'bidaf': log_p1, log_p2 = model(cw_idxs, qw_idxs) else: log_p1, log_p2 = model(cw_idxs, qw_idxs, cw_pos, cw_ner, cw_freq, cqw_extra) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) # Printing information for questions without interrogative pronouns """" print("len(gold_dict): ", len(gold_dict)) print("len(pred_dict): ", len(pred_dict)) print("Is gold_dict.keys() identical to pred_dict.keys(): ", gold_dict.keys()==pred_dict.keys()) if gold_dict.keys()!=pred_dict.keys(): for key in gold_dict.keys(): if key not in pred_dict.keys(): print("key ", key, " missing in pred_dict.keys(") """ results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Computing the F1 score for each type of question # # audit_trail_from_question_type[pronoun].append(str(index)) # create a list of the types of questions by extracting the keys from the dict audit_trail_from_question_type types_of_questions = list(audit_trail_from_question_type.keys()) gold_dict_per_type_of_questions = defaultdict(lambda: []) pred_dict_per_type_of_questions = {} gold_dict_per_type_of_questions_start = {} pred_dict_per_type_of_questions_start = {} gold_dict_per_type_of_questions_middle = {} pred_dict_per_type_of_questions_middle = {} gold_dict_per_type_of_questions_end = {} pred_dict_per_type_of_questions_end = {} for type_of_questions in types_of_questions: #gold_pred = {key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions]} #lst_pred = {key: value for key, value in pred_dict.items() if key in audit_trail_from_question_type[type_of_questions]} # Create two dictionnaries for each type of sentence for gold_dict_per_type_of_questions and pred_dict_per_type_of_questions gold_dict_per_type_of_questions[type_of_questions] = { key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } pred_dict_per_type_of_questions[type_of_questions] = { key: value for key, value in pred_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } # print(type_of_questions," F1 score: ", util.eval_dicts(gold_dict_per_type_of_questions[type_of_questions], pred_dict_per_type_of_questions[type_of_questions], args.use_squad_v2)['F1']) gold_dict_per_type_of_questions_start[type_of_questions] = { key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } pred_dict_per_type_of_questions_start[type_of_questions] = { key: value for key, value in pred_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } gold_dict_per_type_of_questions_middle[type_of_questions] = { key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } pred_dict_per_type_of_questions_middle[type_of_questions] = { key: value for key, value in pred_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } gold_dict_per_type_of_questions_end[type_of_questions] = { key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } pred_dict_per_type_of_questions_end[type_of_questions] = { key: value for key, value in pred_dict.items() if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys() } for key, value in gold_dict.items(): #if key in audit_trail_from_question_type[type_of_questions] and key in pred_dict.keys(): if key in audit_trail_from_question_type[ type_of_questions] and type_of_questions != "" and key in pred_dict_per_type_of_questions[ type_of_questions]: """ print("type_of_questions: ",type_of_questions) print("key: ", key) print("question: ", value["question"]) sub_index = value["question"].lower().find(type_of_questions) print("sub_index: ",sub_index) test_fc = value["question"].lower().find(type_of_questions) print("present type of the var: ",type(test_fc)) #print("question: ", value["question"][str(key)]) print("length of the question: ", len(value["question"])) print('Position of the interrogative pronoun in the question:', ) """ # Create two dictionnaries for each type of sentence based at the start of the sentence if value["question"].lower().find( type_of_questions) == 1 or value["question"].lower( ).find(type_of_questions) == 0: #print("BEGINNING") if type_of_questions != "": try: del gold_dict_per_type_of_questions_middle[ type_of_questions][key] except KeyError: pass try: del pred_dict_per_type_of_questions_middle[ type_of_questions][key] except KeyError: pass try: del gold_dict_per_type_of_questions_end[ type_of_questions][key] except KeyError: pass try: del pred_dict_per_type_of_questions_end[ type_of_questions][key] except KeyError: pass #pred_dict_per_type_of_questions_start[type_of_questions] = {key: pred_dict[key] for key in # gold_dict_per_type_of_questions_start[ # type_of_questions].keys()} elif value["question"].lower( ).find(type_of_questions) >= len( value["question"]) - len(type_of_questions) - 5: #print("END") if type_of_questions != "": try: del gold_dict_per_type_of_questions_middle[ type_of_questions][key] except KeyError: pass try: del pred_dict_per_type_of_questions_middle[ type_of_questions][key] except KeyError: pass try: del gold_dict_per_type_of_questions_start[ type_of_questions][key] except KeyError: pass try: del pred_dict_per_type_of_questions_start[ type_of_questions][key] except KeyError: pass #print("type_of_questions: ",type_of_questions) #sub_index = value["question"].lower().find(type_of_questions) #print("sub_index: ", sub_index) #print("len(value['question']) - len(type_of_questions) - 2: ", len(value["question"])-len(type_of_questions)-2) #start_string = len(value["question"])-len(type_of_questions)-6 #end_string = len(value["question"])-1 #print("extract at the end: ", value["question"][start_string:end_string]) else: #print("MIDDLE") if type_of_questions != "": try: del gold_dict_per_type_of_questions_start[ type_of_questions][key] except KeyError: pass try: del pred_dict_per_type_of_questions_start[ type_of_questions][key] except KeyError: pass try: del gold_dict_per_type_of_questions_end[ type_of_questions][key] except KeyError: pass try: del pred_dict_per_type_of_questions_end[ type_of_questions][key] except KeyError: pass pass """ if type_of_questions != "": gold_dict_per_type_of_questions_start[type_of_questions] = {key: value for key, value in gold_dict.items() if (key in audit_trail_from_question_type[type_of_questions] \ and (value["question"].lower().find(type_of_questions) <= 1) \ and key in pred_dict_per_type_of_questions[type_of_questions]) } """ """ for key in gold_dict_per_type_of_questions_start[type_of_questions].keys(): print("key:: ", key ) print("type(key):: ", type(key) ) print("pred_dict[,key,] : ", pred_dict[key]) print("@@@@@@@@@@@@@@@@@@@@@@@@") pred_dict_per_type_of_questions_start[type_of_questions] = {key: pred_dict[key] for key in gold_dict_per_type_of_questions_start[type_of_questions].keys()} #pred_dict_per_type_of_questions_start[type_of_questions] = {key: value for key, value in pred_dict.items() if key in list(gold_dict_per_type_of_questions_start[type_of_questions].keys()) } # Create two dictionnaries for each type of sentence based at the end of the sentence gold_dict_per_type_of_questions_end[type_of_questions] = {key: value for key, value in gold_dict.items() if key in audit_trail_from_question_type[type_of_questions] \ and value["question"].lower().find(type_of_questions) >= len(value["question"])-len(type_of_questions)-2 \ and key in pred_dict_per_type_of_questions[type_of_questions]} pred_dict_per_type_of_questions_end[type_of_questions] = {key: pred_dict[key] for key in list(gold_dict_per_type_of_questions_end[type_of_questions].keys())} #print("*"*80) # Create two dictionnaries for each type of sentence based at the middle of the sentencecount_questions_type gold_dict_per_type_of_questions_middle[type_of_questions] = {key: value for key, value in gold_dict.items() if key not in list(gold_dict_per_type_of_questions_start[type_of_questions].keys()) \ and key not in list(gold_dict_per_type_of_questions_end[type_of_questions].keys())} pred_dict_per_type_of_questions_middle[type_of_questions] = {key: pred_dict[key] for key in list(gold_dict_per_type_of_questions_end[type_of_questions].keys())} else: gold_dict_per_type_of_questions_start[""] = gold_dict_per_type_of_questions[""] pred_dict_per_type_of_questions_start[""] = pred_dict_per_type_of_questions[""] gold_dict_per_type_of_questions_end[""] = gold_dict_per_type_of_questions[""] pred_dict_per_type_of_questions_end[""] = pred_dict_per_type_of_questions[""] gold_dict_per_type_of_questions_middle[""] = gold_dict_per_type_of_questions[""] pred_dict_per_type_of_questions_middle[""] = pred_dict_per_type_of_questions[""] """ positions_in_question = ["beginning", "middle", "end"] # print(type_of_questions," F1 score: ", util.eval_dicts(gold_dict_per_type_of_questions[type_of_questions], pred_dict_per_type_of_questions[type_of_questions], args.use_squad_v2)['F1']) list_beginning = [ util.eval_dicts( gold_dict_per_type_of_questions_start[type_of_questions], pred_dict_per_type_of_questions_start[type_of_questions], args.use_squad_v2)['F1'] for type_of_questions in types_of_questions ] list_middle = [ util.eval_dicts( gold_dict_per_type_of_questions_middle[type_of_questions], pred_dict_per_type_of_questions_middle[type_of_questions], args.use_squad_v2)['F1'] for type_of_questions in types_of_questions ] list_end = [ util.eval_dicts( gold_dict_per_type_of_questions_end[type_of_questions], pred_dict_per_type_of_questions_end[type_of_questions], args.use_squad_v2)['F1'] for type_of_questions in types_of_questions ] #for type_of_questions in types_of_questions: # print("gold_dict_per_type_of_questions_start[type_of_questions]: ",gold_dict_per_type_of_questions_start[type_of_questions]) # print("pred_dict_per_type_of_questions[type_of_questions]: ",pred_dict_per_type_of_questions[type_of_questions]) F1 = np.array([list_beginning, list_middle, list_end]) m, n = F1.shape value_to_ignore = [] for i in range(m): for j in range(n): if F1[i, j] == "NA" or F1[i, j] == 0: value_to_ignore.append((i, j)) print("value to ignore: ", value_to_ignore) #F1 = np.array([[0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0], # [0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0], # [0, 0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0]]) data_label = copy.deepcopy(F1) for row in data_label: for column_idx in range(len(row)): if row[column_idx] == "NA": row[column_idx] = "" # print question without interrogative pronoun required for the second part of the analysis: for key, value in gold_dict.items(): if key in audit_trail_from_question_type[ ""] and key in pred_dict.keys(): print("question: ", gold_dict_per_type_of_questions['']) print("golden answers: ", ) print("prediction: ", pred_dict[key]) print() fig, ax = plt.subplots() types_of_questions[types_of_questions.index( "")] = "Implicit question without interrogative pronoun" im, cbar = heatmap(F1, positions_in_question, types_of_questions, ax=ax, \ cmap="YlGn", cbarlabel="F1 scores") texts = annotate_heatmap(im, data=data_label, valfmt="{x:.1f}", ignore=value_to_ignore) fig.tight_layout() plt.show() # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.split.title()} {results_str}') # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
def test_model(questions, context, use_squad_v2=True, model_path="../save/training-02/best.pth.tar"): # Set up logging #args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) #log = util.get_logger(args.save_dir, args.name) #log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') #args = get_test_args() device, gpu_ids = util.get_available_devices() batch_size = 64 * max(1, len(gpu_ids)) # Get embeddings #print('Loading embeddings...') word_vectors = util.torch_from_json('../data/word_emb.json') # Get model #print('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=100) model = nn.DataParallel(model, gpu_ids) #model_path = "../save/training-02/best.pth.tar" #print(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, model_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader #print('Building dataset...') #record_file = vars(args)[f'{args.split}_record_file'] # my code start here # this is a simple approch when dealing with the user date # according to your approch of creating the interface you can change this code # and also you have to check the function "process_file" in the setup.py file processed_questions = [] for index, question in enumerate(questions): processed_question = { "question": question, "id": index, "answers": [{ "answer_start": 0, "text": "never mind" }] } processed_questions.append(processed_question) source = {"paragraphs": [{"qas": processed_questions, "context": context}]} word_counter, char_counter = Counter(), Counter() with open("../data/word2idx.json", "r") as f1: word2idx_dict = json.load(f1) with open("../data/char2idx.json", "r") as f2: char2idx_dict = json.load(f2) my_test_examples, my_test_eval = process_file(source, "my_test", word_counter, char_counter) npz = build_features(my_test_examples, "my_test", word2idx_dict, char2idx_dict, is_test=True) #my code end here dataset = SQuAD(npz, use_squad_v2) data_loader = data.DataLoader(dataset, batch_size=batch_size, shuffle=False, num_workers=4, collate_fn=collate_fn) # Evaluate #print(f'Evaluating on {args.split} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission #eval_file = vars(args)[f'{args.split}_eval_file'] gold_dict = my_test_eval #print("gold_dict", gold_dict) #print("data_loader", data_loader) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, 15, use_squad_v2) print("starts ", starts, " ends ", ends) # Log info progress_bar.update(batch_size) #if args.split != 'test': # No labels for the test set, so NLL would be invalid #progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) #print("my evaluation ....") #for el in pred_dict: #print(el, pred_dict[el]) #for el in sub_dict: #print(el, sub_dict[el]) return pred_dict
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) # Comment out to only use 1 GPU on nv12 args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = None max_context_len, max_question_len = args.para_limit, args.ques_limit if (args.model_type == "bidaf" or args.model_type == "bert-bidaf"): model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif (args.model_type == "dcn" or args.model_type == "bert-dcn"): model = DCN(word_vectors=word_vectors, hidden_size=args.hidden_size, max_context_len=max_context_len, max_question_len=max_question_len, drop_prob=args.drop_prob) elif (args.model_type == "bert-basic"): model = BERT(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) if model is None: raise ValueError('Model is unassigned. Please ensure --model_type \ chooses between {bidaf, bert-bidaf, dcn, bert-dcn, bert-basic} ') model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) count_skip = 0 while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: batch_size = cw_idxs.size(0) count_skip += 1 if (args.skip_examples == True and (count_skip % 5 == 1 or count_skip % 5 == 2 or count_skip % 5 == 3 or count_skip % 5 == 4)): step += batch_size progress_bar.update(batch_size) steps_till_eval -= batch_size continue # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() ## Additions for BERT ## max_context_len, max_question_len = args.para_limit, args.ques_limit if "bert" in args.model_type: bert_train_embeddings = get_embeddings( "train", ids, args.para_limit, args.ques_limit) else: bert_train_embeddings = None # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs, bert_train_embeddings, \ max_context_len, max_question_len, device) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2, args) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging and devices (unchanged from train.py) args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) # train only, not in test device, args.gpu_ids = util.get_available_devices() # todo(small): should this be args (compare test_para) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # args.py: default size is 64 # Set random seed (unchanged) - train only log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Prepare BiDAF model (must already trained) log.info('Building BiDAF model (should be pretrained)') bidaf_model = BiDAF(word_vectors=word_vectors, # todo: these word vectors shouldn't matter? hidden_size=args.hidden_size) # since they will be loaded in during load_model? #drop_prob=args.drop_prob) # no drop probability since we are not training bidaf_model = nn.DataParallel(bidaf_model, args.gpu_ids) if args.short_test: args.hidden_size = 5 elif not args.load_path: log.info("Trying to trian paraphraser withou bidaf model. " "First train BiDAF and then specify the load path. Exiting") exit(1) else: log.info(f'Loading checkpoint from {args.load_path}...') bidaf_model = util.load_model(bidaf_model, args.load_path, args.gpu_ids, return_step=False) # don't need step since we aren't training bidaf_model = bidaf_model.to(device) bidaf_model.eval() # we eval only (vs train) # todo: Setup the Paraphraser model paraphaser_model = Paraphraser(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) # Get data loader log.info('Building dataset...') # New for paraphrase: squad_paraphrase has extra fields train_dataset = SQuAD_paraphrase(args.train_record_file, args.use_squad_v2) # train.npz (from setup.py, build_features()) train_loader = data.DataLoader(train_dataset, # this dataloader used for all epoch iteration batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn_para) dev_dataset = SQuAD_paraphrase(args.dev_record_file, args.use_squad_v2) # dev.npz (same as above) dev_loader = data.DataLoader(dev_dataset, # dev.npz used in evaluate() fcn batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn_para) # todo: this is just for looking at the paraphrases idx2word_dict = load(args.idx2word_file) #Get saver # saver = util.CheckpointSaver(args.save_dir, # max_checkpoints=args.max_checkpoints, # metric_name=args.metric_name, # maximize_metric=args.maximize_metric, # log=log) #Get optimizer and scheduler # ema = util.EMA(paraphaser_model, args.ema_decay) # optimizer = optim.Adadelta(paraphaser_model.parameters(), args.lr, # weight_decay=args.l2_wd) # scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Train step = 0 log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, cphr_idxs, qphr_idxs, qphr_types, ids in train_loader: # Setup for forward # note that cc_idxs, qc_idxs are not used! (character indices) cw_idxs = cw_idxs.to(device) # todo what does this actually do qw_idxs = qw_idxs.to(device) cphr_idxs = cphr_idxs.to(device) qphr_idxs = qphr_idxs.to(device) qphr_types = qphr_types.to(device) batch_size = cw_idxs.size(0) # if args.short_test: # print(f'batch size: {batch_size}') # for i, type in enumerate(cphr_idxs[0]): # print(f'type: {i}') # pp(type) # for x in (qphr_idxs[0], qphr_types[0]): # pp(x) # return paraphrased = paraphaser_model(qphr_idxs, qphr_types, cphr_idxs) for idx, p in enumerate(paraphrased): # enumerate over batch_size non_zeros = p[p.nonzero()].squeeze() #paraphrased[idx] = non_zeros sentence_as_list = [idx2word_dict[str(w.item())] for w in non_zeros] pp(" ".join(sentence_as_list)) #pp([idx2word_dict[w] for w in non_zeros]) if args.short_test: return optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) # // is floor division ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, # call eval with dev_loader args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # ###################################### # tokenizer = BertTokenizer.from_pretrained('bert-large-uncased', do_lower_case=True) # train_examples = None # train_examples = read_squad_examples( # input_file=args.train_file, is_training=True, version_2_with_negative=args.version_2_with_negative) # train_features = convert_examples_to_features( # examples=train_examples, # tokenizer=tokenizer, # max_seq_length=args.max_seq_length, # doc_stride=args.doc_stride, # max_query_length=args.max_query_length, # is_training=True) # if args.local_rank == -1 or torch.distributed.get_rank() == 0: # logger.info(" Saving train features into cached file %s", cached_train_features_file) # with open(cached_train_features_file, "wb") as writer: # pickle.dump(train_features, writer) # all_input_ids = torch.tensor([f.input_ids for f in train_features], dtype=torch.long) # x = all_input_ids ########################################### # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # added_flag cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) optimizer.zero_grad() # Forward # log_p1, log_p2 = model(cw_idxs, qw_idxs) log_p1, log_p2 = model(cw_idxs, qw_idxs, cc_idxs, qc_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def test(args): # Set up logging log = util.get_logger(args.save_dir, args.name) log.info(f"Args: {dumps(vars(args), indent=4, sort_keys=True)}") device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) # Get embeddings log.info("Loading embeddings...") word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info("Building model...") model = BiDAF( word_vectors=word_vectors, hidden_size=args.hidden_size, use_glove=args.use_glove, ) model = nn.DataParallel(model, gpu_ids) log.info(f"Loading checkpoint from {args.load_path}...") model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() # Get data loader log.info("Building dataset...") record_file = vars(args)[f"{args.split}_record_file"] dataset = SQuAD(record_file, args.use_squad_v2) data_loader = data.DataLoader( dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn, ) # Evaluate log.info(f"Evaluating on {args.split} split...") nll_meter = stats.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f"{args.split}_eval_file"] with open(eval_file, "r") as fh: gold_dict = json_load(fh) with torch.no_grad(), tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Get F1 and EM scores p1, p2 = log_p1.exp(), log_p2.exp() starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != "test": # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens( gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2, ) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) if args.split != "test": results = {"NLL": nll_meter.avg} results.update(eval.eval_dicts(gold_dict, pred_dict, args.use_squad_v2)) # Log to console results_str = ", ".join(f"{k}: {v:05.2f}" for k, v in results.items()) log.info(f"{args.split.title()} {results_str}") # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize( tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals, ) # Write submission file if args.split == "dev": sub_path = join(args.save_dir, "val" + "_" + args.sub_file) else: sub_path = join(args.save_dir, args.split + "_" + args.sub_file) log.info(f"Writing submission file to {sub_path}...") eval.write_submission(sub_path, sub_dict)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size if step % 1000 == 0 and step > 0: log.info(f'Step {step}: training loss {loss_val}...') steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def train(args): # Set up logging and devices log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f"Args: {dumps(vars(args), indent=4, sort_keys=True)}") args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info(f"Using random seed {args.seed}...") random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info("Loading embeddings...") word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info("Building model...") model = BiDAF( word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob, use_glove=args.use_glove, ) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info(f"Loading checkpoint from {args.load_path}...") model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = stats.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver( args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log, ) # Get optimizer and scheduler optimizer = optim.Adam(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.0) # Constant LR # Get data loader log.info("Building dataset...") train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader( train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn, ) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader( dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn, ) # Train log.info("Training...") steps_till_eval = args.eval_steps epoch = step // len(train_dataset) scaler = amp.GradScaler() while epoch != args.num_epochs: epoch += 1 log.info(f"Starting epoch {epoch}...") with torch.enable_grad(), tqdm( total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward with amp.autocast(): log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward scaler.scale(loss).backward() scaler.unscale_(optimizer) nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) scaler.step(optimizer) scaler.update() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar("train/NLL", loss_val, step) tbx.add_scalar("train/LR", optimizer.param_groups[0]["lr"], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f"Evaluating at step {step}...") ema.assign(model) results, pred_dict = evaluate( model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2, ) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ", ".join(f"{k}: {v:05.2f}" for k, v in results.items()) log.info(f"Dev {results_str}") # Log to TensorBoard log.info("Visualizing in TensorBoard...") for k, v in results.items(): tbx.add_scalar(f"dev/{k}", v, step) util.visualize( tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split="dev", num_visuals=args.num_visuals, )
def main(args): print("in main") print("args: ", args) if True: args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') # CHECK IF WE NEED TO USE ALL OF THESE???? word_vectors = util.torch_from_json(args.word_emb_file) # Get model log.info('Building model...') model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) print("train dataset!: ", train_dataset) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() # Forward log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging and devices args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info('Args: {}'.format(dumps(vars(args), indent=4, sort_keys=True))) args.batch_size *= max(1, len(args.gpu_ids)) # Set random seed log.info('Using random seed {}...'.format(args.seed)) random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) # Get model log.info('Building model...') model = BiDAF(vectors=(word_vectors, char_vectors), hidden_size=args.hidden_size, drop_prob=args.drop_prob, p_sdd=args.p_sdd, char_limit=args.char_limit, use_transformer=args.use_transformer, inter_size=args.inter_size, heads=args.heads, c2w_size=args.c2w_size, enc_blocks=args.enc_blocks, enc_convs=args.enc_convs, mod_blocks=args.mod_blocks, mod_convs=args.mod_convs, use_GRU=args.use_GRU) model = nn.DataParallel(model, args.gpu_ids) if args.load_path: log.info('Loading checkpoint from {}...'.format(args.load_path)) model, step = util.load_model(model, args.load_path, args.gpu_ids) # uses the saved step num else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler # optimizer = optim.Adadelta(model.parameters(), args.lr, # weight_decay=args.l2_wd) # The scheduler MULTIPLIES the base LR, NOT replaces optimizer = optim.Adam(model.parameters(), 1., betas=(.9, .98), eps=1e-9, weight_decay=args.l2_wd) scheduler = sched.LambdaLR( optimizer, lambda s: 0.001 * math.log(s + 1) / math.log(1000 - 1) if s < 1000 else 0.001) # Chute (must use math.log, else TypeError) # scheduler = sched.LambdaLR(optimizer, lambda s: (args.hidden_size**(-.5)) * # min((s+1e-9)**(-.5), s*(4000**(-1.5))) # ) # From Vaswani et. al 2017 # Get data loader log.info('Building dataset...') train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) while epoch != args.num_epochs: epoch += 1 log.info('Starting epoch {}...'.format(epoch)) with torch.enable_grad(), \ tqdm(total=len(train_loader.dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: # Setup for forward optimizer.zero_grad() batch_size = cw_idxs.size(0) cc_idxs = cc_idxs.to(device) # (batch, c_limit, char_limit) qc_idxs = qc_idxs.to(device) cw_idxs = cw_idxs.to(device) # (batch, c_limit) qw_idxs = qw_idxs.to(device) c_idxs, q_idxs = (cw_idxs, cc_idxs), (qw_idxs, qc_idxs) # Forward log_p1, log_p2 = model(c_idxs, q_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step( step // batch_size ) # By default, schedules per epoch; pass in step # as "epoch" ema(model, step // batch_size) # Log info step += batch_size # Number of examples. Step is usually the number of (mini)-batches progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) steps_till_eval -= batch_size if steps_till_eval <= 0: steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info('Evaluating at step {}...'.format(step)) ema.assign(model) results, pred_dict = evaluate(model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2) saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console results_str = ', '.join('{}: {:05.2f}'.format(k, v) for k, v in results.items()) log.info('Dev {}'.format(results_str)) # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar('dev/{}'.format(k), v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals)
def main(args): # Set up logging args.save_dir = util.get_save_dir(args.save_dir, args.name, training=False) log = util.get_logger(args.save_dir, args.name) device, gpu_ids = util.get_available_devices() args.batch_size *= max(1, len(gpu_ids)) log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') # Get embeddings log.info('Loading embeddings...') word_vectors = util.torch_from_json(args.word_emb_file) models = {} if args.use_ensemble: total_models = 0 for model_name in ['bidaf', 'bidafextra', 'fusionnet']: models_list = [] for model_file in glob.glob( f'{args.load_path}/{model_name}-*/{args.ensemble_models}'): # Get model log.info('Building model...') if model_name == 'bidaf': model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) elif model_name == 'bidafextra': model = BiDAFExtra(word_vectors=word_vectors, args=args) elif model_name == 'fusionnet': model = FusionNet(word_vectors=word_vectors, args=args) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {model_file}...') model = util.load_model(model, model_file, gpu_ids, return_step=False) # Load each model on CPU (have plenty of RAM ...) model = model.cpu() model.eval() models_list.append(model) models[model_name] = models_list total_models += len(models_list) log.info(f'Using an ensemble of {total_models} models') else: device, gpu_ids = util.get_available_devices() # Get model log.info('Building model...') if args.model == 'bidaf': model = BiDAF(word_vectors=word_vectors, hidden_size=args.hidden_size) elif args.model == 'bidafextra': model = BiDAFExtra(word_vectors=word_vectors, args=args) elif args.model == 'fusionnet': model = FusionNet(word_vectors=word_vectors, args=args) model = nn.DataParallel(model, gpu_ids) log.info(f'Loading checkpoint from {args.load_path}...') model = util.load_model(model, args.load_path, gpu_ids, return_step=False) model = model.to(device) model.eval() models[args.model] = [model] # Get data loader log.info('Building dataset...') record_file = vars(args)[f'{args.split}_record_file'] dataset = SQuAD(record_file, args) data_loader = data.DataLoader(dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) # Evaluate log.info(f'Evaluating on {args.split} split...') nll_meter = util.AverageMeter() pred_dict = {} # Predictions for TensorBoard sub_dict = {} # Predictions for submission eval_file = vars(args)[f'{args.split}_eval_file'] with open(eval_file, 'r') as fh: gold_dict = json_load(fh) with torch.no_grad(), \ tqdm(total=len(dataset)) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, cw_pos, cw_ner, cw_freq, cqw_extra, y1, y2, ids in data_loader: # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) p1s = [] p2s = [] for model_name in models: for model in models[model_name]: # Move model to GPU to evaluate model = model.to(device) # Forward if model_name == 'bidaf': log_p1, log_p2 = model.to(device)(cw_idxs, qw_idxs) else: log_p1, log_p2 = model.to(device)(cw_idxs, qw_idxs, cw_pos, cw_ner, cw_freq, cqw_extra) log_p1, log_p2 = log_p1.cpu(), log_p2.cpu() if not args.use_ensemble: y1, y2 = y1.to(device), y2.to(device) log_p1, log_p2 = log_p1.to(device), log_p2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) nll_meter.update(loss.item(), batch_size) # Move model back to CPU to release GPU memory model = model.cpu() # Get F1 and EM scores p1, p2 = log_p1.exp().unsqueeze( -1).cpu(), log_p2.exp().unsqueeze(-1).cpu() p1s.append(p1), p2s.append(p2) best_ps = torch.max( torch.cat([ torch.cat(p1s, -1).unsqueeze(-1), torch.cat(p2s, -1).unsqueeze(-1) ], -1), -2)[0] p1, p2 = best_ps[:, :, 0], best_ps[:, :, 1] starts, ends = util.discretize(p1, p2, args.max_ans_len, args.use_squad_v2) # Log info progress_bar.update(batch_size) if args.split != 'test': # No labels for the test set, so NLL would be invalid progress_bar.set_postfix(NLL=nll_meter.avg) idx2pred, uuid2pred = util.convert_tokens(gold_dict, ids.tolist(), starts.tolist(), ends.tolist(), args.use_squad_v2) pred_dict.update(idx2pred) sub_dict.update(uuid2pred) # Log results (except for test set, since it does not come with labels) if args.split != 'test': results = util.eval_dicts(gold_dict, pred_dict, args.use_squad_v2) results_list = [('NLL', nll_meter.avg), ('F1', results['F1']), ('EM', results['EM'])] if args.use_squad_v2: results_list.append(('AvNA', results['AvNA'])) results = OrderedDict(results_list) # Log to console results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'{args.split.title()} {results_str}') # Log to TensorBoard tbx = SummaryWriter(args.save_dir) util.visualize(tbx, pred_dict=pred_dict, eval_path=eval_file, step=0, split=args.split, num_visuals=args.num_visuals) # Write submission file sub_path = join(args.save_dir, args.split + '_' + args.sub_file) log.info(f'Writing submission file to {sub_path}...') with open(sub_path, 'w', newline='', encoding='utf-8') as csv_fh: csv_writer = csv.writer(csv_fh, delimiter=',') csv_writer.writerow(['Id', 'Predicted']) for uuid in sorted(sub_dict): csv_writer.writerow([uuid, sub_dict[uuid]])
def main(args): # Set up logging and devices startime = datetime.now() args.save_dir = util.get_save_dir(args.save_dir, args.name, training=True) log = util.get_logger(args.save_dir, args.name) time_log = args.log_time if time_log > 0: log.info(f'Start training at: {startime.strftime("%H:%M:%S")}') tbx = SummaryWriter(args.save_dir) device, args.gpu_ids = util.get_available_devices() log.info(f'Args: {dumps(vars(args), indent=4, sort_keys=True)}') args.batch_size *= max(1, len(args.gpu_ids)) model_type = args.model # Set random seed log.info(f'Using random seed {args.seed}...') random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) # check this #useCharEmbeddings = args.model == 'BiDAFplus' # Get embeddings log.info('Loading embeddings...') print(f'{args.word_emb_file}') word_vectors = util.torch_from_json(args.word_emb_file) char_vectors = util.torch_from_json(args.char_emb_file) if time_log > 0: log.info(f'Loaded embeddings: {(datetime.now()-startime).seconds}') # load_char_vectors # Get model log.info('Building model...') if model_type == 'BiDAFplus': # model = BiDAFplus(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, params=get_params(model_type, args.params)) elif model_type == 'BiDAFbase': model = BiDAFbase(word_vectors=word_vectors, hidden_size=args.hidden_size, drop_prob=args.drop_prob) elif model_type == "Transformer": model = TransformerModel(word_vectors=word_vectors, char_vectors=char_vectors, params=get_params(model_type, args.params)) elif model_type == 'BiDAF': model = BiDAF(word_vectors=word_vectors, char_vectors=char_vectors, hidden_size=args.hidden_size, params=get_params(model_type, args.params)) model = nn.DataParallel(model, args.gpu_ids) if time_log > 0: log.info(f'Built model: {(datetime.now()-startime).seconds}') if args.load_path: log.info(f'Loading checkpoint from {args.load_path}...') model, step = util.load_model(model, args.load_path, args.gpu_ids) else: step = 0 model = model.to(device) model.train() ema = util.EMA(model, args.ema_decay) # Get saver saver = util.CheckpointSaver(args.save_dir, max_checkpoints=args.max_checkpoints, metric_name=args.metric_name, maximize_metric=args.maximize_metric, log=log) # Get optimizer and scheduler optimizer = optim.Adadelta(model.parameters(), args.lr, weight_decay=args.l2_wd) scheduler = sched.LambdaLR(optimizer, lambda s: 1.) # Constant LR # Get data loader log.info('Building dataset...') if args.mode != 'quick_eval': train_dataset = SQuAD(args.train_record_file, args.use_squad_v2) train_loader = data.DataLoader(train_dataset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers, collate_fn=collate_fn) dev_dataset = SQuAD(args.dev_record_file, args.use_squad_v2) dev_loader = data.DataLoader(dev_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers, collate_fn=collate_fn) else: loaded_data = quick_eval_data_loader() train_loader = [loaded_data for _ in range(5)] dev_loader = [quick_eval_data_loader(dev=True)] train_dataset = train_loader dev_dataset = dev_loader log.info('Built dataset: {}:{}'.format(*divmod((datetime.now() - startime).seconds, 60))) # Train log.info('Training...') steps_till_eval = args.eval_steps epoch = step // len(train_dataset) if time_log > 0: traintime = datetime.now() total_iterations = 0 while epoch != args.num_epochs: epoch += 1 log.info(f'Starting epoch {epoch}...') if time_log > 0: epochtime = datetime.now() if args.mode != 'quick_eval': progress_len = len(train_loader.dataset) else: progress_len = len(train_loader) with torch.enable_grad(), \ tqdm(total=progress_len) as progress_bar: for cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids in train_loader: #quick_eval_data_saver(cw_idxs, cc_idxs, qw_idxs, qc_idxs, y1, y2, ids) ######### if time_log > 0: itertime = datetime.now() # Setup for forward cw_idxs = cw_idxs.to(device) qw_idxs = qw_idxs.to(device) batch_size = cw_idxs.size(0) optimizer.zero_grad() if model_type == 'BiDAF' or model_type == "Transformer": cc_idxs = cc_idxs.to(device) qc_idxs = qc_idxs.to(device) log_p1, log_p2 = model(cc_idxs, qc_idxs, cw_idxs, qw_idxs) # Forward elif model_type == 'BiDAFbase': log_p1, log_p2 = model(cw_idxs, qw_idxs) y1, y2 = y1.to(device), y2.to(device) loss = F.nll_loss(log_p1, y1) + F.nll_loss(log_p2, y2) loss_val = loss.item() if time_log > 2: forwardtime = datetime.now() log.info('Forward time {}:{}'.format( *divmod((forwardtime - itertime).seconds, 60))) # Backward loss.backward() nn.utils.clip_grad_norm_(model.parameters(), args.max_grad_norm) optimizer.step() scheduler.step(step // batch_size) ema(model, step // batch_size) if time_log > 2: backwardtime = datetime.now() log.info('Backward time {}:{}'.format( *divmod((backwardtime - forwardtime).seconds, 60))) # Log info step += batch_size progress_bar.update(batch_size) progress_bar.set_postfix(epoch=epoch, NLL=loss_val) tbx.add_scalar('train/NLL', loss_val, step) tbx.add_scalar('train/LR', optimizer.param_groups[0]['lr'], step) if time_log > 0: enditertime = datetime.now() #log.info('Iteration {} {}:{}'.format(total_iterations, # *divmod((enditertime-itertime).seconds, 60))) steps_till_eval -= batch_size if steps_till_eval <= 0 or args.mode == 'quick_eval': steps_till_eval = args.eval_steps # Evaluate and save checkpoint log.info(f'Evaluating at step {step}...') ema.assign(model) results, pred_dict = evaluate( model, dev_loader, device, args.dev_eval_file, args.max_ans_len, args.use_squad_v2, model_type, quick_eval=args.mode == 'quick_eval') saver.save(step, model, results[args.metric_name], device) ema.resume(model) # Log to console if time_log > 1: log.info('Eval time {}:{}'.format( *divmod((datetime.now() - enditertime).seconds, 60))) results_str = ', '.join(f'{k}: {v:05.2f}' for k, v in results.items()) log.info(f'Dev {results_str}') # Log to TensorBoard log.info('Visualizing in TensorBoard...') for k, v in results.items(): tbx.add_scalar(f'dev/{k}', v, step) util.visualize(tbx, pred_dict=pred_dict, eval_path=args.dev_eval_file, step=step, split='dev', num_visuals=args.num_visuals) total_iterations += 1 if ((time_log == 2) and (total_iterations % 10 == 0)) or ( (time_log == 1) and (total_iterations % 100 == 0)): log.info('Mean iteration time {}:{}'.format( *divmod((enditertime - traintime).seconds / total_iterations, 60))) if time_log > 0: endepochtime = datetime.now() log.info('Epoch time {}:{}'.format( *divmod((endepochtime - epochtime).seconds, 60)))