def evaluate_process(model, valid_loader, log_start_time, global_t, epoch, logger, tb_writer, api): model.eval() valid_loader.epoch_init(1, shuffle=False) # batch_size是1,重复10次,计算BLEU f_eval = open( "./output/{}/{}/eval_global_{}_epoch{}.txt".format( args.expname, log_start_time, global_t, epoch), "w") repeat = 10 # 测试当前model # Define the metrics metrics = Metrics(model.embedder) recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2 \ = evaluate(model, metrics, valid_loader, api.vocab, api.rev_vocab, f_eval, repeat) logger.info("Avg recall BLEU %f, avg precision BLEU %f, bow_extrema %f, bow_avg %f, bow_greedy %f, intra_dist1 %f," " intra_dist2 %f, avg_len %f, \ninter_dist1 %f, inter_dist2 %f (only 1 ref, not final results)" \ % (recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2)) if args.visual: tb_writer.add_scalar('recall_bleu', recall_bleu, epoch) tb_writer.add_scalar('prec_bleu', prec_bleu, epoch) tb_writer.add_scalar('bow_extrema', bow_extrema, epoch) tb_writer.add_scalar('bow_avg', bow_avg, epoch) tb_writer.add_scalar('bow_greedy', bow_greedy, epoch) tb_writer.add_scalar('intra_dist1', intra_dist1, epoch) tb_writer.add_scalar('intra_dist2', intra_dist2, epoch) tb_writer.add_scalar('inter_dist1', inter_dist1, epoch) tb_writer.add_scalar('inter_dist2', inter_dist2, epoch)
logger.info(log) itr += 1 itr_global += 1 if epoch % args.eval_every == 0: # evaluate the model in the validation set model.eval() valid_loader.epoch_init(1, config['diaglen'], 1, shuffle=False) f_eval = open( "./output/{}/{}/{}/tmp_results/epoch{}.txt".format( args.model, args.expname, args.dataset, epoch), "w") repeat = 10 recall_bleu, prec_bleu, bow_extrema, bow_avg, bow_greedy, intra_dist1, intra_dist2, avg_len, inter_dist1, inter_dist2\ =evaluate(model, metrics, valid_loader, vocab, ivocab, f_eval, repeat) if args.visual: tb_writer.add_scalar('recall_bleu', recall_bleu, epoch) tb_writer.add_scalar('prec_bleu', prec_bleu, epoch) tb_writer.add_scalar('bow_extrema', bow_extrema, epoch) tb_writer.add_scalar('bow_avg', bow_avg, epoch) tb_writer.add_scalar('bow_greedy', bow_greedy, epoch) tb_writer.add_scalar('intra_dist1', intra_dist1, epoch) tb_writer.add_scalar('intra_dist2', intra_dist2, epoch) tb_writer.add_scalar('inter_dist1', inter_dist1, epoch) tb_writer.add_scalar('inter_dist2', inter_dist2, epoch) # end of epoch ---------------------------- model.adjust_lr() save_model(model, epoch) # save model after each epoch
itr_global) logger.info(log) itr += 1 itr_global += 1 if itr_global % args.eval_every == 0: # evaluate the model in the develop set model.eval() valid_loader = torch.utils.data.DataLoader(dataset=valid_set, batch_size=1, shuffle=False, num_workers=1) f_eval = open( "./output/{}/{}/tmp_results/iter{}.txt".format( args.model, args.expname, itr_global), "w") repeat = 10 recall_bleu, prec_bleu = evaluate(model, metrics, valid_loader, vocab_desc, vocab_api, f_eval, repeat) if args.visual: tb_writer.add_scalar('recall_bleu', recall_bleu, itr_global) tb_writer.add_scalar('prec_bleu', prec_bleu, itr_global) save_model(model, itr_global) # save model after each epoch # end of epoch ---------------------------- model.adjust_lr()
def train(args): timestamp=datetime.now().strftime('%Y%m%d%H%M') # LOG # logger = logging.getLogger(__name__) logging.basicConfig(level=logging.DEBUG, format="%(message)s")#,format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") tb_writer=None if args.visual: # make output directory if it doesn't already exist os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models', exist_ok=True) os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/temp_results', exist_ok=True) fh = logging.FileHandler(f"./output/{args.model}/{args.expname}/{timestamp}/logs.txt") # create file handler which logs even debug messages logger.addHandler(fh)# add the handlers to the logger tb_writer = SummaryWriter(f"./output/{args.model}/{args.expname}/{timestamp}/logs/") # save arguments json.dump(vars(args), open(f'./output/{args.model}/{args.expname}/{timestamp}/args.json', 'w')) # Device # if args.gpu_id<0: device = torch.device("cuda") else: device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available() and args.gpu_id>-1 else "cpu") print(device) n_gpu = torch.cuda.device_count() if args.gpu_id<0 else 1 print(f"num of gpus:{n_gpu}") # Set the random seed manually for reproducibility. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) def save_model(model, epoch, timestamp): """Save model parameters to checkpoint""" os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models', exist_ok=True) ckpt_path=f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl' print(f'Saving model parameters to {ckpt_path}') torch.save(model.state_dict(), ckpt_path) def load_model(model, epoch, timestamp): """Load parameters from checkpoint""" ckpt_path=f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl' print(f'Loading model parameters from {ckpt_path}') model.load_state_dict(torch.load(checkpoint)) config = getattr(configs, 'config_'+args.model)() ############################################################################### # Load dataset ############################################################################### train_set=APIDataset(args.data_path+'train.desc.h5', args.data_path+'train.apiseq.h5', config['max_sent_len']) valid_set=APIDataset(args.data_path+'test.desc.h5', args.data_path+'test.apiseq.h5', config['max_sent_len']) train_loader=torch.utils.data.DataLoader(dataset=train_set, batch_size=config['batch_size'], shuffle=True, num_workers=1) valid_loader=torch.utils.data.DataLoader(dataset=valid_set, batch_size=config['batch_size'], shuffle=True, num_workers=1) print("Loaded dataset!") ############################################################################### # Define the models ############################################################################### model = getattr(models, args.model)(config) if args.reload_from>=0: load_model(model, args.reload_from) model=model.to(device) ############################################################################### # Prepare the Optimizer ############################################################################### no_decay = ['bias', 'LayerNorm.weight'] optimizer_grouped_parameters = [ {'params': [p for n, p in model.named_parameters() if not any(nd in n for nd in no_decay)], 'weight_decay': 0.01}, {'params': [p for n, p in model.named_parameters() if any(nd in n for nd in no_decay)], 'weight_decay': 0.0} ] optimizer = torch.optim.AdamW(optimizer_grouped_parameters, lr=config['lr'], eps=config['adam_epsilon']) scheduler = get_cosine_schedule_with_warmup( optimizer, num_warmup_steps=config['warmup_steps'], num_training_steps=len(train_loader)*config['epochs']) # do not foget to modify the number when dataset is changed ############################################################################### # Training ############################################################################### logger.info("Training...") itr_global=1 start_epoch=1 if args.reload_from==-1 else args.reload_from+1 for epoch in range(start_epoch, config['epochs']+1): epoch_start_time = time.time() itr_start_time = time.time() # shuffle (re-define) dataset between epochs for batch in train_loader:# loop through all batches in training dataset model.train() batch_gpu = [tensor.to(device) for tensor in batch] loss = model(*batch_gpu) loss.backward() torch.nn.utils.clip_grad_norm_(model.parameters(), config['clip']) optimizer.step() scheduler.step() model.zero_grad() if itr_global % args.log_every == 0: elapsed = time.time() - itr_start_time log = '%s-%s|@gpu%d epo:[%d/%d] iter:%d step_time:%ds loss:%f'\ %(args.model, args.expname, args.gpu_id, epoch, config['epochs'],itr_global, elapsed, loss) if args.visual: tb_writer.add_scalar('loss', loss, itr_global) logger.info(log) itr_start_time = time.time() if itr_global % args.valid_every == 0: model.eval() loss_records={} for batch in valid_loader: batch_gpu = [tensor.to(device) for tensor in batch] with torch.no_grad(): valid_loss = model.valid(*batch_gpu) for loss_name, loss_value in valid_loss.items(): v=loss_records.get(loss_name, []) v.append(loss_value) loss_records[loss_name]=v log = 'Validation ' for loss_name, loss_values in loss_records.items(): log = log + loss_name + ':%.4f '%(np.mean(loss_values)) if args.visual: tb_writer.add_scalar(loss_name, np.mean(loss_values), itr_global) logger.info(log) itr_global+=1 if itr_global % args.eval_every == 0: # evaluate the model in the develop set model.eval() save_model(model, itr_global, timestamp) # save model after each epoch valid_loader=torch.utils.data.DataLoader(dataset=valid_set, batch_size=1, shuffle=False, num_workers=1) vocab_api = load_dict(args.data_path+'vocab.apiseq.json') vocab_desc = load_dict(args.data_path+'vocab.desc.json') metrics=Metrics() os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/temp_results', exist_ok=True) f_eval = open(f"./output/{args.model}/{args.expname}/{timestamp}/temp_results/iter{itr_global}.txt", "w") repeat = 1 decode_mode = 'sample' recall_bleu, prec_bleu = evaluate(model, metrics, valid_loader, vocab_desc, vocab_api, repeat, decode_mode, f_eval) if args.visual: tb_writer.add_scalar('recall_bleu', recall_bleu, itr_global) tb_writer.add_scalar('prec_bleu', prec_bleu, itr_global) # end of epoch ---------------------------- model.adjust_lr()
def train(args): timestamp = datetime.now().strftime('%Y%m%d%H%M') # LOG # logger = logging.getLogger(__name__) logging.basicConfig( level=logging.DEBUG, format="%(message)s" ) #,format="%(asctime)s: %(name)s: %(levelname)s: %(message)s") tb_writer = None if args.visual: # make output directory if it doesn't already exist os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models', exist_ok=True) os.makedirs( f'./output/{args.model}/{args.expname}/{timestamp}/temp_results', exist_ok=True) fh = logging.FileHandler( f"./output/{args.model}/{args.expname}/{timestamp}/logs.txt") # create file handler which logs even debug messages logger.addHandler(fh) # add the handlers to the logger tb_writer = SummaryWriter( f"./output/{args.model}/{args.expname}/{timestamp}/logs/") # save arguments json.dump( vars(args), open(f'./output/{args.model}/{args.expname}/{timestamp}/args.json', 'w')) # Device # if args.gpu_id < 0: device = torch.device("cuda") else: device = torch.device(f"cuda:{args.gpu_id}" if torch.cuda.is_available( ) and args.gpu_id > -1 else "cpu") print(device) n_gpu = torch.cuda.device_count() if args.gpu_id < 0 else 1 print(f"num of gpus:{n_gpu}") # Set the random seed manually for reproducibility. random.seed(args.seed) np.random.seed(args.seed) torch.manual_seed(args.seed) torch.cuda.manual_seed_all(args.seed) def save_model(model, epoch, timestamp): """Save model parameters to checkpoint""" os.makedirs(f'./output/{args.model}/{args.expname}/{timestamp}/models', exist_ok=True) ckpt_path = f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl' print(f'Saving model parameters to {ckpt_path}') torch.save(model.state_dict(), ckpt_path) def load_model(model, epoch, timestamp): """Load parameters from checkpoint""" ckpt_path = f'./output/{args.model}/{args.expname}/{timestamp}/models/model_epo{epoch}.pkl' print(f'Loading model parameters from {ckpt_path}') model.load_state_dict(torch.load(checkpoint)) config = getattr(configs, 'config_' + args.model)() ############################################################################### # Load data ############################################################################### train_set = APIDataset(args.data_path + 'train.desc.h5', args.data_path + 'train.apiseq.h5', config['max_sent_len']) valid_set = APIDataset(args.data_path + 'test.desc.h5', args.data_path + 'test.apiseq.h5', config['max_sent_len']) print("Loaded data!") ############################################################################### # Define the models ############################################################################### model = getattr(models, args.model)(config) if args.reload_from >= 0: load_model(model, args.reload_from) model = model.to(device) ############################################################################### # Training ############################################################################### logger.info("Training...") itr_global = 1 start_epoch = 1 if args.reload_from == -1 else args.reload_from + 1 for epoch in range(start_epoch, config['epochs'] + 1): epoch_start_time = time.time() itr_start_time = time.time() # shuffle (re-define) data between epochs train_loader = torch.utils.data.DataLoader( dataset=train_set, batch_size=config['batch_size'], shuffle=True, num_workers=1) train_data_iter = iter(train_loader) n_iters = train_data_iter.__len__() itr = 1 while True: # loop through all batches in training data model.train() try: descs, apiseqs, desc_lens, api_lens = train_data_iter.next() except StopIteration: # end of epoch break batch = [ tensor.to(device) for tensor in [descs, desc_lens, apiseqs, api_lens] ] loss_AE = model.train_AE(*batch) if itr % args.log_every == 0: elapsed = time.time() - itr_start_time log = '%s-%s|@gpu%d epo:[%d/%d] iter:[%d/%d] step_time:%ds elapsed:%s \n '\ %(args.model, args.expname, args.gpu_id, epoch, config['epochs'], itr, n_iters, elapsed, timeSince(epoch_start_time,itr/n_iters)) for loss_name, loss_value in loss_AE.items(): log = log + loss_name + ':%.4f ' % (loss_value) if args.visual: tb_writer.add_scalar(loss_name, loss_value, itr_global) logger.info(log) itr_start_time = time.time() if itr % args.valid_every == 0: valid_loader = torch.utils.data.DataLoader( dataset=valid_set, batch_size=config['batch_size'], shuffle=True, num_workers=1) model.eval() loss_records = {} for descs, apiseqs, desc_lens, api_lens in valid_loader: batch = [ tensor.to(device) for tensor in [descs, desc_lens, apiseqs, api_lens] ] valid_loss = model.valid(*batch) for loss_name, loss_value in valid_loss.items(): v = loss_records.get(loss_name, []) v.append(loss_value) loss_records[loss_name] = v log = 'Validation ' for loss_name, loss_values in loss_records.items(): log = log + loss_name + ':%.4f ' % (np.mean(loss_values)) if args.visual: tb_writer.add_scalar(loss_name, np.mean(loss_values), itr_global) logger.info(log) itr += 1 itr_global += 1 if itr_global % args.eval_every == 0: # evaluate the model in the develop set model.eval() save_model(model, itr_global, timestamp) # save model after each epoch valid_loader = torch.utils.data.DataLoader(dataset=valid_set, batch_size=1, shuffle=False, num_workers=1) vocab_api = load_dict(args.data_path + 'vocab.apiseq.json') vocab_desc = load_dict(args.data_path + 'vocab.desc.json') metrics = Metrics() os.makedirs( f'./output/{args.model}/{args.expname}/{timestamp}/temp_results', exist_ok=True) f_eval = open( f"./output/{args.model}/{args.expname}/{timestamp}/temp_results/iter{itr_global}.txt", "w") repeat = 1 decode_mode = 'sample' recall_bleu, prec_bleu = evaluate(model, metrics, valid_loader, vocab_desc, vocab_api, repeat, decode_mode, f_eval) if args.visual: tb_writer.add_scalar('recall_bleu', recall_bleu, itr_global) tb_writer.add_scalar('prec_bleu', prec_bleu, itr_global) # end of epoch ---------------------------- model.adjust_lr()