def main(_): if not os.path.exists(args.save_dir): os.makedirs(args.save_dir) # Load data. if args.dataset == 'multi30k': train_data_loader = Multi30KLoader(args, 'train') val_data_loader = Multi30KLoader(args, 'val') else: train_data_loader = COCOLoader(args, 'train') val_data_loader = COCOLoader(args, 'val') vecs = train_data_loader.vecs restore_path = args.restore_path best_perf = 0. best_epoch = 0 for epoch in range(args.max_num_epoch): pretrain_univeral_embedding = args.univ_pretrain and epoch == 0 and not restore_path if epoch % 5 == 0 or epoch == args.max_num_epoch: if pretrain_univeral_embedding: restore_path = train_language_universal_embedding( train_data_loader, vecs) vecs, restore_path = process_epoch(epoch, train_data_loader, restore_path, vecs, pretrain_univeral_embedding) perf = test_epoch(args, restore_path + '.meta', val_data_loader) if perf > best_perf: best_perf = perf best_epoch = epoch + 1 args.val_best_perf = best_perf args.val_best_epoch = best_epoch json_outfile = os.path.join(args.save_dir, 'results.json') print('best epoch at %i with val score %.1f' % (best_epoch, best_perf)) with open(json_outfile, 'w') as outfile: json.dump(vars(args), outfile)
def run_model(model, train_data, valid_data, test_data, crit, optimizer,adv_optimizer,scheduler, opt, data_dict): logger = evals.Logger(opt) valid_losses = [] losses = [] if opt.test_only: start = time.time() all_predictions, all_targets, test_loss = test_epoch(model, test_data,opt,data_dict,'(Testing)') elapsed = ((time.time()-start)/60) print('\n(Testing) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) test_loss = test_loss/len(test_data._src_insts) print('B : '+str(test_loss)) test_metrics = evals.compute_metrics(all_predictions,all_targets,0,opt,elapsed,all_metrics=True) return loss_file = open(path.join(opt.model_name,'losses.csv'),'w+') for epoch_i in range(opt.epoch): print('================= Epoch', epoch_i+1, '=================') if scheduler and opt.lr_decay > 0: scheduler.step() ################################## TRAIN ################################### start = time.time() all_predictions,all_targets,train_loss=train_epoch(model,train_data,crit,optimizer,adv_optimizer,(epoch_i+1),data_dict,opt) elapsed = ((time.time()-start)/60) print('\n(Training) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) train_loss = train_loss/len(train_data._src_insts) print('B : '+str(train_loss)) if 'reuters' in opt.dataset or 'bibtext' in opt.dataset: torch.save(all_predictions,path.join(opt.model_name,'epochs','train_preds'+str(epoch_i+1)+'.pt')) torch.save(all_targets,path.join(opt.model_name,'epochs','train_targets'+str(epoch_i+1)+'.pt')) train_metrics = evals.compute_metrics(all_predictions,all_targets,0,opt,elapsed,all_metrics=True) ################################### VALID ################################### start = time.time() all_predictions, all_targets,valid_loss = test_epoch(model, valid_data,opt,data_dict,'(Validation)') elapsed = ((time.time()-start)/60) print('\n(Validation) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) valid_loss = valid_loss/len(valid_data._src_insts) print('B : '+str(valid_loss)) torch.save(all_predictions,path.join(opt.model_name,'epochs','valid_preds'+str(epoch_i+1)+'.pt')) torch.save(all_targets,path.join(opt.model_name,'epochs','valid_targets'+str(epoch_i+1)+'.pt')) valid_metrics = evals.compute_metrics(all_predictions,all_targets,0,opt,elapsed,all_metrics=True) valid_losses += [valid_loss] ################################## TEST ################################### start = time.time() all_predictions, all_targets, test_loss = test_epoch(model, test_data,opt,data_dict,'(Testing)') elapsed = ((time.time()-start)/60) print('\n(Testing) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) test_loss = test_loss/len(test_data._src_insts) print('B : '+str(test_loss)) torch.save(all_predictions,path.join(opt.model_name,'epochs','test_preds'+str(epoch_i+1)+'.pt')) torch.save(all_targets,path.join(opt.model_name,'epochs','test_targets'+str(epoch_i+1)+'.pt')) test_metrics = evals.compute_metrics(all_predictions,all_targets,0,opt,elapsed,all_metrics=True) best_valid,best_test = logger.evaluate(train_metrics,valid_metrics,test_metrics,epoch_i,opt.total_num_parameters) print(opt.model_name) losses.append([epoch_i+1,train_loss,valid_loss,test_loss]) if not 'test' in opt.model_name and not opt.test_only: utils.save_model(opt,epoch_i,model,valid_loss,valid_losses) loss_file.write(str(int(epoch_i+1))) loss_file.write(','+str(train_loss)) loss_file.write(','+str(valid_loss)) loss_file.write(','+str(test_loss)) loss_file.write('\n')
stop_criterion = EarlyStopping() for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) if not opt.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) stop_criterion.eval_loss(validation_loss) if stop_criterion.get_nsteps() >= 10: break if not opt.no_train and not opt.no_val: scheduler.step(validation_loss) if not opt.no_test: print('Setting up test_loader') test_data = get_data_set(opt, split='test_3d') test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) test_logger = Logger(os.path.join(opt.result_path, 'test.log'), ['loss', 'acc']) test_loss = test_epoch(test_loader, model, criterion, opt, test_logger)
opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) del checkpoint scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=opt.milestones, gamma=opt.lr_decay) if not opt.no_train: for _ in range(1, opt.begin_epoch): scheduler.step() for i in range(opt.begin_epoch, opt.n_epochs + 1): scheduler.step() cudnn.benchmark = True train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, writer) if i % opt.checkpoint == 0: save_file_path = os.path.join(opt.save_path, 'train_' + str(i+1) + '_model.pth') states = { 'epoch': i + 1, 'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict(),} torch.save(states, save_file_path) if i % opt.test_per_epoches == 0: test_epoch(i, test_loader, model, opt, test_logger, writer) elif not opt.no_test: test_epoch(0, test_loader, model, opt, test_logger, writer) writer.close()
train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) if not opt.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) stop_criterion.eval_loss(validation_loss) if stop_criterion.get_nsteps() >= 10: break if not opt.no_train and not opt.no_val: scheduler.step(validation_loss) if not opt.no_test: print('Setting up test_loader') test_data = get_data_set(opt, split='test') test_loader = torch.utils.data.DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, num_workers=opt.n_threads, pin_memory=True) test_logger = Logger(os.path.join(opt.result_path, 'test.log'), ['loss', 'acc']) test_subject_dirs = pd.read_csv(opt.test_subjects_file) test_subject_dirs = test_subject_dirs['0'].tolist() test_loss = test_epoch(test_subject_dirs, model, criterion, opt, test_logger)
reg_criterion = RegLoss() if cfg.LOSS.REG else None # Create optimizer optimizer = optim.Adam(model.parameters(), lr=cfg.HYPER.LEARNING_RATE) best_loss = float('Inf') for epoch in range(cfg.HYPER.EPOCHS): # Start training train_loss = train_epoch(model, ee_criterion, vec_criterion, col_criterion, lim_criterion, ori_criterion, reg_criterion, optimizer, train_loader, train_target, epoch, logger, cfg.OTHERS.LOG_INTERVAL, writer, device) # Start testing test_loss = test_epoch(model, ee_criterion, vec_criterion, col_criterion, lim_criterion, ori_criterion, reg_criterion, test_loader, test_target, epoch, logger, cfg.OTHERS.LOG_INTERVAL, writer, device) # Save model if test_loss < best_loss: best_loss = test_loss torch.save( model.state_dict(), os.path.join(cfg.OTHERS.SAVE, "best_model_epoch_{:04d}.pth".format(epoch))) logger.info("Epoch {} Model Saved".format(epoch + 1).center( 60, '-'))
opt.begin_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) if not opt.no_train: optimizer.load_state_dict(checkpoint['optimizer']) print('run') # pdb.set_trace() for i in range(opt.begin_epoch, opt.n_epochs + 1): if not opt.no_train: train_epoch(i, train_loader, model, criterion, optimizer, opt, train_logger, train_batch_logger) if not opt.no_val: validation_loss = val_epoch(i, val_loader, model, criterion, opt, val_logger) if not opt.no_test: test_epoch(i, test_loader, model, criterion, opt, test_logger) # if not opt.no_train and not opt.no_val: # scheduler.step(validation_loss) # if opt.test: # spatial_transform = Compose([ # Scale(int(opt.sample_size / opt.scale_in_test)), # CornerCrop(opt.sample_size, opt.crop_position_in_test), # ToTensor(opt.norm_value), norm_method # ]) # temporal_transform = LoopPadding(opt.sample_duration) # target_transform = VideoID() # # test_data = get_test_set(opt, spatial_transform, temporal_transform, # target_transform) # test_loader = torch.utils.data.DataLoader(
def run_model( model, train_data, valid_data, test_data, crit, optimizer, adv_optimizer, scheduler, opt, data_dict, ): #logger = evals.Logger(opt) #train_logger = Logger(opt, 'train') valid_losses = [] train_logger = Logger(opt, 'train') valid_logger = Logger(opt, 'valid') losses = [] if opt.test_only: start = time.time() (all_predictions, all_targets, test_loss) = test_epoch(model, test_data, opt, data_dict, '(Testing)') elapsed = (time.time() - start) / 60 print('\n(Testing) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) test_loss = test_loss / len(test_data._src_insts) print('B : ' + str(test_loss)) # test_metrics = evals.compute_metrics(all_predictions,all_targets,0,opt,elapsed,all_metrics=True) return #loss_file = open(path.join(opt.model_name, 'losses.csv'), 'w+') for epoch_i in range(opt.epoch): print('================= Epoch', epoch_i + 1, '=================') if scheduler and opt.lr_decay > 0: scheduler.step() summary = Summary(opt) # ################################# TRAIN ################################### start = time.time() (all_predictions, all_targets, train_loss) = train_epoch( model, train_data, crit, optimizer, adv_optimizer, epoch_i + 1, data_dict, opt, train_logger, ) elapsed = (time.time() - start) / 60 print('\n(Training) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) train_loss = train_loss / len(train_data._src_insts) print('B : ' + str(train_loss)) """ if 'reuters' in opt.dataset or 'bibtext' in opt.dataset: torch.save(all_predictions, path.join(opt.model_name, 'epochs', 'train_preds' + str(epoch_i + 1) + '.pt')) torch.save(all_targets, path.join(opt.model_name, 'epochs', 'train_targets' + str(epoch_i + 1) + '.pt')) """ train_metrics = evals.compute_metrics( all_predictions, all_targets, train_loss, opt, elapsed, all_metrics=True, ) train_logger.push_metrics(train_metrics) train_logger.push_loss(train_loss) # ################################## VALID ################################### start = time.time() all_predictions, all_targets, valid_loss = test_epoch( model, valid_data, opt, data_dict, '(Validation)') elapsed = (time.time() - start) / 60 print( '\n(Validation) elapse: {elapse:3.3f} min'.format(elapse=elapsed)) valid_loss = valid_loss / len(valid_data._src_insts) print('B : ' + str(valid_loss)) """torch.save(all_predictions, path.join(opt.model_name, 'epochs', 'valid_preds' + str(epoch_i + 1) + '.pt')) torch.save(all_targets, path.join(opt.model_name, 'epochs', 'valid_targets' + str(epoch_i + 1) + '.pt'))""" valid_metrics = evals.compute_metrics( all_predictions, all_targets, valid_loss, opt, elapsed, all_metrics=True, ) valid_logger.push_metrics(valid_metrics) valid_logger.push_loss(valid_loss) #valid_losses += [valid_loss] # ################################# TEST ################################### print(opt.model_name) #losses.append([epoch_i + 1, train_loss, valid_loss, test_loss]) # if not 'test' in opt.model_name and not opt.test_only: # utils.save_model(opt, epoch_i, model, valid_loss, # valid_losses) summary = Summary(opt) summary.add_log(train_logger.log) summary.add_log(valid_logger.log) torch.save(train_logger.log, 'experiments/' + opt.exo_name + 'train') torch.save(valid_logger.log, 'experiments/' + opt.exo_name + 'valid') summary.close()