def main(): log.info('[program starts.]') train, dev, dev_y, embedding, opt = load_data(vars(args)) log.info('[Data loaded.]') if args.resume: log.info('[loading previous model...]') checkpoint = torch.load(os.path.join(model_dir, args.resume)) if args.resume_options: opt = checkpoint['config'] state_dict = checkpoint['state_dict'] model = DocReaderModel(opt, embedding, state_dict) epoch_0 = checkpoint['epoch'] + 1 for i in range(checkpoint['epoch']): random.shuffle(list(range(len(train)))) # synchronize random seed if args.reduce_lr: lr_decay(model.optimizer, lr_decay=args.reduce_lr) else: model = DocReaderModel(opt, embedding) epoch_0 = 1 if args.cuda: model.cuda() if args.resume: batches = BatchGen(dev, batch_size=args.batch_size, evaluation=True, gpu=args.cuda) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) log.info("[dev EM: {} F1: {}]".format(em, f1)) best_val_score = f1 else: best_val_score = 0.0 for epoch in range(epoch_0, epoch_0 + args.epochs): log.warn('Epoch {}'.format(epoch)) # train batches = BatchGen(train, batch_size=args.batch_size, gpu=args.cuda) start = datetime.now() for i, batch in enumerate(batches): model.update(batch) if i % args.log_per_updates == 0: log.info( 'updates[{0:6}] train loss[{1:.5f}] remaining[{2}]'.format( model.updates, model.train_loss.avg, str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0])) # eval if epoch % args.eval_per_epoch == 0: batches = BatchGen(dev, batch_size=1, evaluation=True, gpu=args.cuda) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) log.warn("dev EM: {} F1: {}".format(em, f1)) # save if not args.save_last_only or epoch == epoch_0 + args.epochs - 1: model_file = os.path.join(model_dir, 'checkpoint_epoch_{}.pt'.format(epoch)) model.save(model_file, epoch) if f1 > best_val_score: best_val_score = f1 copyfile(model_file, os.path.join(model_dir, 'best_model.pt')) log.info('[new best model saved.]')
def main(): log.info('[program starts.]') train, dev, dev_y, embedding, opt = load_data(vars(args)) log.info('[Data loaded.]') if args.resume: log.info('[loading previous model...]') checkpoint = torch.load(os.path.join(model_dir, args.resume)) if args.resume_options: opt = checkpoint['config'] state_dict = checkpoint['state_dict'] model = DocReaderModel(opt, embedding, state_dict) epoch_0 = checkpoint['epoch'] + 1 for i in range(checkpoint['epoch']): random.shuffle(list(range(len(train)))) # synchronize random seed if args.reduce_lr: lr_decay(model.optimizer, lr_decay=args.reduce_lr) else: model = DocReaderModel(opt, embedding) epoch_0 = 1 if args.cuda: model.cuda() if args.resume: batches = BatchGen(dev, batch_size=1, evaluation=True, gpu=args.cuda) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) log.info("[dev EM: {} F1: {}]".format(em, f1)) best_val_score = f1 else: best_val_score = 0.0 for epoch in range(epoch_0, epoch_0 + args.epochs): log.warning('Epoch {}'.format(epoch)) # train batches = BatchGen(train, batch_size=args.batch_size, gpu=args.cuda) start = datetime.now() for i, batch in enumerate(batches): model.update(batch) if i % args.log_per_updates == 0: # log.info('updates[{0:6}] train loss[{1:.5f}] remaining[{2}]'.format( # model.updates, model.train_loss.avg, # str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0])) log.info('epoch [{0:2}] updates[{1:6}] train loss[{2:.5f}] remaining[{3}] lr[{4:.4f}]'.format( epoch, model.updates, model.train_loss.avg, str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0], model.optimizer.state_dict()['param_groups'][0]['lr'])) # eval if epoch % args.eval_per_epoch == 0: batches = BatchGen(dev, batch_size=1, evaluation=True, gpu=args.cuda) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) log.warning("dev EM: {} F1: {}".format(em, f1)) # save if not args.save_last_only or epoch == epoch_0 + args.epochs - 1: model_file = os.path.join(model_dir, 'checkpoint_epoch_{}.pt'.format(epoch)) model.save(model_file, epoch) if f1 > best_val_score: best_val_score = f1 copyfile( model_file, os.path.join(model_dir, 'best_model.pt')) log.info('[new best model saved.]')
def main(): from args import args # parser = argparse.ArgumentParser() # parser.add_argument('--model', required=True) # parser.add_argument('--train', required=True) # parser.add_argument('--dev', required=True) # args.load_model_dir = parser.parse_args().model # args.ent_train_dir = parser.parse_args().train # args.ent_dev_dir = parser.parse_args().dev args.load_model_dir = '/scratch0/shifeng/rawr/drqa/original.pt' args.ent_train_dir = 'results/20180217T172242.135276/train.pkl' args.ent_dev_dir = 'pkls/original.rawr.dev.pkl' args.other_train_dir = 'results/targeted_train_all.pkl' out_dir = prepare_output_dir(args, '/scratch0/shifeng/rawr/drqa/') log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) fh = logging.FileHandler(os.path.join(out_dir, 'output.log')) fh.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) formatter = logging.Formatter(fmt='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S') fh.setFormatter(formatter) ch.setFormatter(formatter) log.addHandler(fh) log.addHandler(ch) log.info('===== {} ====='.format(out_dir)) with open(os.path.join(out_dir, 'args.pkl'), 'wb') as f: pickle.dump(args, f) random.seed(args.seed) torch.manual_seed(args.seed) if args.cuda: torch.cuda.manual_seed(args.seed) log.info('loading regular data from {}'.format(args.data_file)) train_reg, dev_reg, dev_y, embedding, opt = load_data(args) log.info('{} regular training examples'.format(len(train_reg))) log.info('{} regular dev examples'.format(len(dev_reg))) # log.info(opt) ''' load data for regularization ''' log.info('loading entropy training data from {}'.format(args.ent_train_dir)) with open(args.ent_train_dir, 'rb') as f: train_ent = pickle.load(f) if isinstance(train_ent, dict) and 'reduced' in train_ent: train_ent = train_ent['reduced'] if isinstance(train_ent[0][0], list): train_ent = list(itertools.chain(*train_ent)) log.info('loading targeted training data from {}'.format(args.other_train_dir)) with open(args.other_train_dir, 'rb') as f: other_train_ent = pickle.load(f) if isinstance(other_train_ent, dict) and 'reduced' in train_ent: other_train_ent = other_train_ent['reduced'] if isinstance(other_train_ent[0][0], list): other_train_ent = list(itertools.chain(*other_train_ent)) train_ent += other_train_ent if args.filter_long > 0: train_ent = [x for x in train_ent if len(x[5]) < args.filter_long] log.info('loading entropy dev data from {}'.format(args.ent_train_dir)) with open(args.ent_dev_dir, 'rb') as f: dev_ent = pickle.load(f)['reduced'] if isinstance(dev_ent[0], list): # dev_ent = list(itertools.chain(*dev_ent)) dev_ent = [x[0] for x in dev_ent] # if args.filter_long > 0: # dev_ent = [x for x in dev_ent if len(x[5]) > args.filter_long] log.info('{} entropy training examples'.format(len(train_ent))) log.info('{} entropy dev examples'.format(len(dev_ent))) log.info('loading model from {}'.format(args.load_model_dir)) checkpoint = torch.load(args.load_model_dir) # opt = checkpoint['config'] state_dict = checkpoint['state_dict'] model = DocReaderModel(vars(opt), embedding, state_dict) model.cuda() ''' initial evaluation ''' dev_reg_batches = BatchGen( dev_reg, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, evaluation=True, gpu=args.cuda) dev_ent_batches = BatchGen( dev_ent, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, evaluation=True, gpu=args.cuda) predictions = [] for batch in dev_reg_batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) ents, predictions_r = [], [] for batch in dev_ent_batches: p, _, ss, se, _, _ = model.predict(batch, get_all=True) ss = ss.cpu().numpy() se = se.cpu().numpy() ents.append(scipy.stats.entropy(ss.T).sum() + \ scipy.stats.entropy(se.T).sum()) predictions_r.extend(p) ent = sum(ents) / len(ents) em_r, f1_r = score(predictions_r, dev_y) log.info("[dev EM: {:.5f} F1: {:.5f} Ent: {:.5f}]".format(em, f1, ent)) log.info("[dev EMR: {:.5f} F1R: {:.5f}]".format(em_r, f1_r)) best_f1_score = f1 ''' interleaved training ''' train_ent_batches = BatchGen( train_ent, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, gpu=args.cuda) len_train_ent_batches = len(train_ent_batches) train_ent_batches = iter(train_ent_batches) n_reg = 0 n_ent = 0 for epoch in range(args.epochs): log.warning('Epoch {}'.format(epoch)) train_reg_batches = BatchGen( train_reg, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, gpu=args.cuda) start = datetime.now() for i_reg, reg_batch in enumerate(train_reg_batches): model.update(reg_batch) n_reg += 1 if n_reg > args.start_ent: if i_reg % args.n_reg_per_ent == 0: for j in range(args.n_ent_per_reg): try: model.update_entropy(next(train_ent_batches), gamma=args.gamma) n_ent += 1 except StopIteration: n_ent = 0 train_ent_batches = iter(BatchGen( train_ent, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, gpu=args.cuda)) if n_reg % args.n_report == 0: log.info('epoch [{:2}] batch [{}, {}] loss[{:.5f}] entropy[{:.5f}]'.format( epoch, i_reg, n_ent, model.train_loss.avg, -model.entropy_loss.avg / args.gamma)) # if n_reg % args.n_eval == 0: dev_reg_batches = BatchGen( dev_reg, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, evaluation=True, gpu=args.cuda) dev_ent_batches = BatchGen( dev_ent, batch_size=args.batch_size, pos_size=args.pos_size, ner_size=args.ner_size, evaluation=True, gpu=args.cuda) ''' regular evaluation ''' predictions = [] for batch in dev_reg_batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) ''' entropy evaluation ''' ents, predictions_r = [], [] for batch in dev_ent_batches: p, _, ss, se, _, _ = model.predict(batch, get_all=True) ss = ss.cpu().numpy() se = se.cpu().numpy() ents.append(scipy.stats.entropy(ss.T).sum() + \ scipy.stats.entropy(se.T).sum()) predictions_r.extend(p) ent = sum(ents) / len(ents) em_r, f1_r = score(predictions_r, dev_y) log.info("dev EM: {:.5f} F1: {:.5f} Ent: {:.5f}".format(em, f1, ent)) log.info("[dev EMR: {:.5f} F1R: {:.5f}]".format(em_r, f1_r)) ''' save best model ''' if f1 > best_f1_score: best_f1_score = f1 model_file = os.path.join(out_dir, 'best_model.pt') model.save(model_file, epoch) log.info('[save best model F1: {:.5f}]'.format(best_f1_score)) ''' save models ''' model_file = os.path.join( out_dir, 'checkpoint_epoch_{}.pt'.format(epoch)) model.save(model_file, epoch) log.info("[save model {}]".format(model_file))
def main(): from args import conf parser = argparse.ArgumentParser() parser.add_argument('--resume', default=False) parser.add_argument('--resume-options', default=False) args = parser.parse_args() # set random seed random.seed(conf.seed) torch.manual_seed(conf.seed) if conf.cuda: torch.cuda.manual_seed(conf.seed) # setup logger log = logging.getLogger(__name__) log.setLevel(logging.DEBUG) fh = logging.FileHandler('main.log') fh.setLevel(logging.DEBUG) ch = logging.StreamHandler(sys.stdout) ch.setLevel(logging.INFO) formatter = logging.Formatter( fmt='%(asctime)s %(message)s', datefmt='%m/%d/%Y %I:%M:%S') fh.setFormatter(formatter) ch.setFormatter(formatter) log.addHandler(fh) log.addHandler(ch) train, dev, dev_y, embedding, conf = load_data(conf) log.info(conf) log.info('[Data loaded.]') if args.resume: log.info('[loading previous model...]') checkpoint = torch.load(args.resume) if args.resume_options: conf = checkpoint['config'] state_dict = checkpoint['state_dict'] model = DocReaderModel(vars(conf), embedding, state_dict) epoch_0 = checkpoint['epoch'] + 1 for i in range(checkpoint['epoch']): random.shuffle(list(range(len(train)))) # synchronize random seed if conf.reduce_lr: for param_group in model.optimizer.param_groups: param_group['lr'] *= conf.lr_decay log.info('[learning rate reduced by {}]'.format(conf.lr_decay)) else: model = DocReaderModel(vars(conf), embedding) epoch_0 = 1 if conf.cuda: model.cuda() if args.resume: batches = BatchGen( dev, batch_size=conf.batch_size, pos_size=conf.pos_size, ner_size=conf.ner_size, gpu=conf.cuda, evaluation=True) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) log.info("[dev EM: {} F1: {}]".format(em, f1)) best_val_score = f1 else: best_val_score = 0.0 for epoch in range(epoch_0, epoch_0 + conf.epochs): log.warning('Epoch {}'.format(epoch)) # train batches = BatchGen( train, batch_size=conf.batch_size, pos_size=conf.pos_size, ner_size=conf.ner_size, gpu=conf.cuda) start = datetime.now() for i, batch in enumerate(batches): model.update(batch) if i % conf.log_per_updates == 0: log.info('epoch [{0:2}] updates[{1:6}] \ train loss[{2:.5f}] remaining[{3}]'.format( epoch, model.updates, model.train_loss.avg, str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0])) # eval if epoch % conf.eval_per_epoch == 0: batches = BatchGen( dev, batch_size=conf.batch_size, pos_size=conf.pos_size, ner_size=conf.ner_size, gpu=conf.cuda, evaluation=True) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) log.warning("dev EM: {} F1: {}".format(em, f1)) # save if not conf.save_last_only or epoch == epoch_0 + conf.epochs - 1: model_file = 'results/baseline_epoch_{}.pt'.format(epoch) model.save(model_file, epoch) if f1 > best_val_score: best_val_score = f1 copyfile( model_file, os.path.join('results/baseline.pt')) log.info('[new best model saved.]')
def main(): args, log = setup() log.info('[Program starts. Loading data...]') train, dev, dev_y, embedding, opt = load_data(vars(args)) log.info(opt) log.info('[Data loaded.]') if args.save_dawn_logs: dawn_start = datetime.now() log.info('dawn_entry: epoch\tf1Score\thours') if args.resume: log.info('[loading previous model...]') checkpoint = torch.load(os.path.join(args.model_dir, args.resume)) if args.resume_options: opt = checkpoint['config'] state_dict = checkpoint['state_dict'] model = DocReaderModel(opt, embedding, state_dict) epoch_0 = checkpoint['epoch'] + 1 # synchronize random seed random.setstate(checkpoint['random_state']) torch.random.set_rng_state(checkpoint['torch_state']) if args.cuda: torch.cuda.set_rng_state(checkpoint['torch_cuda_state']) if args.reduce_lr: lr_decay(model.optimizer, lr_decay=args.reduce_lr) log.info('[learning rate reduced by {}]'.format(args.reduce_lr)) batches = BatchGen(dev, batch_size=args.batch_size, evaluation=True, gpu=args.cuda) predictions = [] for i, batch in enumerate(batches): predictions.extend(model.predict(batch)) log.debug('> evaluating [{}/{}]'.format(i, len(batches))) em, f1 = score(predictions, dev_y) log.info("[dev EM: {} F1: {}]".format(em, f1)) if math.fabs(em - checkpoint['em']) > 1e-3 or math.fabs( f1 - checkpoint['f1']) > 1e-3: log.info('Inconsistent: recorded EM: {} F1: {}'.format( checkpoint['em'], checkpoint['f1'])) log.error( 'Error loading model: current code is inconsistent with code used to train the previous model.' ) exit(1) best_val_score = checkpoint['best_eval'] else: model = DocReaderModel(opt, embedding) epoch_0 = 1 best_val_score = 0.0 for epoch in range(epoch_0, epoch_0 + args.epochs): log.warning('Epoch {}'.format(epoch)) # train batches = BatchGen(train, batch_size=args.batch_size, gpu=args.cuda) start = datetime.now() for i, batch in enumerate(batches): model.update(batch) if i % args.log_per_updates == 0: log.info( '> epoch [{0:2}] updates[{1:6}] train loss[{2:.5f}] remaining[{3}]' .format( epoch, model.updates, model.train_loss.value, str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0])) log.debug('\n') # eval batches = BatchGen(dev, batch_size=args.batch_size, evaluation=True, gpu=args.cuda) predictions = [] for i, batch in enumerate(batches): predictions.extend(model.predict(batch)) log.debug('> evaluating [{}/{}]'.format(i, len(batches))) em, f1 = score(predictions, dev_y) log.warning("dev EM: {} F1: {}".format(em, f1)) if args.save_dawn_logs: time_diff = datetime.now() - dawn_start log.warning("dawn_entry: {}\t{}\t{}".format( epoch, f1 / 100.0, float(time_diff.total_seconds() / 3600.0))) # save if not args.save_last_only or epoch == epoch_0 + args.epochs - 1: model_file = os.path.join(args.model_dir, 'checkpoint_epoch_{}.pt'.format(epoch)) model.save(model_file, epoch, [em, f1, best_val_score]) if f1 > best_val_score: best_val_score = f1 copyfile(model_file, os.path.join(args.model_dir, 'best_model.pt')) log.info('[new best model saved.]')
def main(): log.info('[program starts.]') train, dev, dev_y, embedding, opt = load_data(vars(args)) log.info('[Data loaded. ]') if args.resume: log.info('[loading previous model...]') checkpoint = torch.load(os.path.join(model_dir, args.resume)) if args.resume_options: opt = checkpoint['config'] state_dict = checkpoint['state_dict'] model = DocReaderModel(opt, embedding, state_dict) epoch_0 = checkpoint['epoch'] + 1 for i in range(checkpoint['epoch']): random.shuffle(list(range(len(train)))) # synchronize random seed if args.reduce_lr: lr_decay(model.optimizer, lr_decay=args.reduce_lr) else: model = DocReaderModel(opt, embedding) epoch_0 = 1 if args.cuda: model.cuda() ############################ point_em = [] point_f1 = [] point_loss = [] stacked_epoch = [] train_loss_list = [] ############################ if args.resume: batches = BatchGen(dev, batch_size=1, evaluation=True, gpu=args.cuda) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) ############################################################################################################ for i in range(epoch_0 - 1): stacked_epoch.append(i + 1) #stack epoch from 1 to (epoch_0 - 1) for em_resume in open(em_dir + "/until_epoch_" + str(epoch_0 - 1) + "_em.txt").read().strip().split('\n'): em_resume = float(em_resume) point_em.append(em_resume) for f1_resume in open(f1_dir + "/until_epoch_" + str(epoch_0 - 1) + "_f1.txt").read().strip().split('\n'): f1_resume = float(f1_resume) point_f1.append(f1_resume) for loss_resume in open(loss_dir + "/until_epoch_" + str(epoch_0 - 1) + "_loss.txt").read().strip().split('\n'): loss_resume = float(loss_resume) point_loss.append(loss_resume) save_each_plot(stacked_epoch, point_em, EM_graph_dir, 'EM') save_each_plot(stacked_epoch, point_f1, F1_graph_dir, 'F1') save_each_plot(stacked_epoch, point_loss, Loss_graph_dir, 'Loss') ############################################################################################################ log.info("[dev EM: {} F1: {}]".format(em, f1)) best_val_score = f1 else: best_val_score = 0.0 for epoch in range(epoch_0, epoch_0 + args.epochs): log.warn('Epoch {}'.format(epoch)) # train batches = BatchGen(train, batch_size=args.batch_size, gpu=args.cuda) start = datetime.now() for i, batch in enumerate(batches): model.update(batch) if i % args.log_per_updates == 0: log.info( 'updates[{0:6}] train loss[{1:.5f}] remaining[{2}]'.format( model.updates, model.train_loss.avg, str((datetime.now() - start) / (i + 1) * (len(batches) - i - 1)).split('.')[0])) train_loss_list.append( model.train_loss.avg ) # ########## <- add train_loss of all batches train_loss_avg = np.sum(train_loss_list) / len( train_loss_list) # ######## <- added. print(train_loss_avg) # eval if epoch % args.eval_per_epoch == 0: batches = BatchGen(dev, batch_size=1, evaluation=True, gpu=args.cuda) predictions = [] for batch in batches: predictions.extend(model.predict(batch)) em, f1 = score(predictions, dev_y) ###################################################################################### stacked_epoch.append(epoch) point_em.append(em) point_f1.append(f1) point_loss.append(train_loss_avg) print("train_loss:") print(model.train_loss.avg) with open(em_dir + "/until_epoch_" + str(epoch) + "_em.txt", "wb") as f: np.savetxt(f, point_em) print("until_epoch_" + str(epoch) + "em.txt saved.") with open(f1_dir + "/until_epoch_" + str(epoch) + "_f1.txt", "wb") as f: np.savetxt(f, point_f1) print("until_epoch_" + str(epoch) + "f1.txt saved.") with open(loss_dir + "/until_epoch_" + str(epoch) + "_loss.txt", "wb") as f: np.savetxt(f, point_loss) print("until_epoch_" + str(epoch) + "loss.txt saved.") save_each_plot(stacked_epoch, point_em, EM_graph_dir, 'EM') save_each_plot(stacked_epoch, point_f1, F1_graph_dir, 'F1') save_each_plot(stacked_epoch, point_loss, Loss_graph_dir, 'Loss') ###################################################################################### log.warn("dev EM: {} F1: {}".format(em, f1)) # save if not args.save_last_only or epoch == epoch_0 + args.epochs - 1: model_file = os.path.join(model_dir, 'checkpoint_epoch_{}.pt'.format(epoch)) model.save(model_file, epoch) if f1 > best_val_score: best_val_score = f1 copyfile(model_file, os.path.join(model_dir, 'best_model.pt')) log.info('[new best model saved.]') ############################################################################################# # After processing for all epoch, save the plot of all saved graphs(all_previous + present) save_all_model_plot('../result/data/em_data', 'EM', EM_graph_dir) save_all_model_plot('../result/data/f1_data', 'F1', F1_graph_dir) save_all_model_plot('../result/data/loss_data', 'Loss', Loss_graph_dir)