def test(score_path, data_path): model = DrBC() model.cuda() load(model, MODEL_PATH) model = model.cpu() test_graph = TestData(score_path, data_path) val(model, test_graph, cuda=False)
def main(): opt = opts().parse() now = datetime.datetime.now() logger = Logger(opt.saveDir + '/logs_{}'.format(now.isoformat())) if opt.loadModel != 'none': model = torch.load(opt.loadModel).cuda() else: model = HourglassNet3D(opt.nStack, opt.nModules, opt.nFeats, opt.nRegModules).cuda() criterion = torch.nn.MSELoss().cuda() optimizer = torch.optim.RMSprop(model.parameters(), opt.LR, alpha = ref.alpha, eps = ref.epsilon, weight_decay = ref.weightDecay, momentum = ref.momentum) val_loader = torch.utils.data.DataLoader( H36M(opt, 'val'), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) if opt.test: val(0, opt, val_loader, model, criterion) return train_loader = torch.utils.data.DataLoader( H36M(opt, 'train'), batch_size = opt.trainBatch, shuffle = True if opt.DEBUG == 0 else False, num_workers = int(ref.nThreads) ) for epoch in range(1, opt.nEpochs + 1): loss_train, acc_train, mpjpe_train, loss3d_train = train(epoch, opt, train_loader, model, criterion, optimizer) logger.scalar_summary('loss_train', loss_train, epoch) logger.scalar_summary('acc_train', acc_train, epoch) logger.scalar_summary('mpjpe_train', mpjpe_train, epoch) logger.scalar_summary('loss3d_train', loss3d_train, epoch) if epoch % opt.valIntervals == 0: loss_val, acc_val, mpjpe_val, loss3d_val = val(epoch, opt, val_loader, model, criterion) logger.scalar_summary('loss_val', loss_val, epoch) logger.scalar_summary('acc_val', acc_val, epoch) logger.scalar_summary('mpjpe_val', mpjpe_val, epoch) logger.scalar_summary('loss3d_val', loss3d_val, epoch) torch.save(model, os.path.join(opt.saveDir, 'model_{}.pth'.format(epoch))) logger.write('{:8f} {:8f} {:8f} {:8f} {:8f} {:8f} {:8f} {:8f} \n'.format(loss_train, acc_train, mpjpe_train, loss3d_train, loss_val, acc_val, mpjpe_val, loss3d_val)) else: logger.write('{:8f} {:8f} {:8f} {:8f} \n'.format(loss_train, acc_train, mpjpe_train, loss3d_train)) adjust_learning_rate(optimizer, epoch, opt.dropLR, opt.LR) logger.close()
def main(): opt = opts().parse() now = datetime.datetime.now() logger = Logger(opt.saveDir, now.isoformat()) model, optimizer = getModel(opt) criterion = torch.nn.MSELoss().cuda() # if opt.GPU > -1: # print('Using GPU {}',format(opt.GPU)) # model = model.cuda(opt.GPU) # criterion = criterion.cuda(opt.GPU) # dev = opt.device model = model.cuda() val_loader = torch.utils.data.DataLoader( MPII(opt, 'val'), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) if opt.test: log_dict_train, preds = val(0, opt, val_loader, model, criterion) sio.savemat(os.path.join(opt.saveDir, 'preds.mat'), mdict = {'preds': preds}) return # pyramidnet pretrain一次,先定义gen的训练数据loader train_loader = torch.utils.data.DataLoader( MPII(opt, 'train'), batch_size = opt.trainBatch, shuffle = True if opt.DEBUG == 0 else False, num_workers = int(ref.nThreads) ) # 调用train方法 for epoch in range(1, opt.nEpochs + 1): log_dict_train, _ = train(epoch, opt, train_loader, model, criterion, optimizer) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if epoch % opt.valIntervals == 0: log_dict_val, preds = val(epoch, opt, val_loader, model, criterion) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) #saveModel(model, optimizer, os.path.join(opt.saveDir, 'model_{}.checkpoint'.format(epoch))) torch.save(model, os.path.join(opt.saveDir, 'model_{}.pth'.format(epoch))) sio.savemat(os.path.join(opt.saveDir, 'preds_{}.mat'.format(epoch)), mdict = {'preds': preds}) logger.write('\n') if epoch % opt.dropLR == 0: lr = opt.LR * (0.1 ** (epoch // opt.dropLR)) print('Drop LR to {}'.format(lr)) adjust_learning_rate(optimizer, lr) logger.close() torch.save(model.cpu(), os.path.join(opt.saveDir, 'model_cpu.pth'))
def sceneflow_predict(ckpt_path, vis=True, save_fig=True): """ scene flow测试 :param ckpt_path: :param vis: :return: """ with tf.Session() as sess: # 构建模型 model = PSMNet(width=config.TRAIN_CROP_WIDTH, height=config.TRAIN_CROP_HEIGHT, channels=config.IMG_N_CHANNEL, head_type=config.HEAD_STACKED_HOURGLASS, batch_size=config.VAL_BATCH_SIZE) model.build_net() saver = tf.train.Saver() saver.restore(sess, save_path=ckpt_path) test_loader = DataLoaderSceneFlow(batch_size=config.TRAIN_BATCH_SIZE, max_disp=config.MAX_DISP) val(sess, model, data_loader=test_loader, vis=vis, save_fig=save_fig)
def exp_itl_only(exp_str='exp1', seed = 0, lambdas = np.logspace(-6, 3, num=10), gamma = None, n_processes=30, w_bar = 4, y_snr = 100, task_std = 1, n_tasks = 100, n_train = 100, n_dims = 30, n_tasks_test = 200, n_test = 100, val_perc = 0.0, h_start = None, exp_dir = EXP_FOLDER, inner_solver_test_str = ('ssubgd', 'fista'), show_plot=False, verbose =0): loss_class, tasks_gen, inner_exp_name,\ metric_dict, val_metric = select_exp(exp_str, seed=seed, task_std=task_std, y_snr=y_snr, n_train_tasks=n_tasks, n_val_tasks=n_tasks_test, n_dims=n_dims, val_perc=val_perc, w_bar=w_bar) exp_name = 'grid_searchITL' + inner_exp_name + 'seed' + str(seed) + 'vm' + val_metric + 'is' \ + 'ist' + str(inner_solver_test_str) + 'n' + str(n_train) + 'val_perc' + str(val_perc) exp_parameters = locals() print('parameters ' + exp_name, exp_parameters) n_tasks_val = n_tasks_test//2 # use half the tasks for testing for the validation (with synthetic datasets) data_train, oracle_train = tasks_gen(n_tasks=n_tasks, n_train=n_train, n_test=n_test, sel='train') data_valid, oracle_valid = tasks_gen(n_tasks=n_tasks_val, n_train=n_train, n_test=n_test, sel='val') data_test, oracle_test = tasks_gen(n_tasks=n_tasks_test, n_train=n_train, n_test=n_test, sel='test') print('n_tasks train, val ,test', len(data_train['X_train']), len(data_valid['X_train']), len(data_test['X_train'])) exp_dir_path = make_exp_dir(os.path.join(exp_dir, exp_name)) save_exp_parameters(exp_parameters, exp_dir_path) res_dict = {} for ts in inner_solver_test_str: inner_solver_test_class = inner_solver_selector(ts) # metaval for ITL results = Results(save_dir=exp_dir_path, do_plot=False, show_plot=show_plot, name='ITL-ts' + ts) if h_start is None: h = np.zeros(tasks_gen.n_dims) else: h = np.copy(h_start) itl_res = val(h, val_metric, lambdas, gamma, inner_solver_test_class, loss_class, data_valid, data_test, metric_dict, results, n_processes=n_processes, verbose=verbose) res_dict[itl_res.name] = itl_res plot_resultsList(n_tasks+1, res_dict, save_dir_path=exp_dir_path, show_plot=show_plot, filename='ltl_plots') return res_dict
# Observe that all parameters are being optimized optimizer_ft = optim.SGD(model.parameters(), lr=conf['lr'], momentum=conf['momentum']) #sample = next(iter(train_loader)) #out = model(sample['image'].float().to(device)) #out.max() #out.shape #torch.nn.Sigmoid()(out[0]) #torch.nn.Sigmoid()(out[0])>0.5 #sample['masks'][0] #nn.Softmax2d()(out).max() #pred = nn.Softmax2d()(out) from train import accuracy, iou #print(pred.size()) #accuracy(pred, sample['masks'].float().to(device)) #iou(pred[0], sample['masks'][0].float().to(device)) epochs = 6 since = time.time() for epoch in range(1, epochs + 1): train(model, device, train_loader, optimizer_ft, epoch, criterion, conf) val(model, device, validation_loader, epoch, dataset_sizes['val'], conf) print("Time Taken for epoch%d: %d sec" % (epoch, time.time() - since)) since = time.time() if epoch % 1 == 0: torch.save(model.state_dict(), "./models/FCN8_ep%d.net" % epoch)
def main(): # Parse the options opts = Opts().parse() opts.device = torch.device(f'cuda:{opts.gpu[0]}') print(opts.expID, opts.task) # Record the start time time_start = time.time() # TODO: select the dataset by the options # Set up dataset train_loader_unit = PENN_CROP(opts, 'train') train_loader = tud.DataLoader(train_loader_unit, batch_size=opts.trainBatch, shuffle=False, num_workers=int(opts.num_workers)) val_loader = tud.DataLoader(PENN_CROP(opts, 'val'), batch_size=1, shuffle=False, num_workers=int(opts.num_workers)) # Read number of joints(dim of output) from dataset opts.nJoints = train_loader_unit.part.shape[1] # Create the Model, Optimizer and Criterion if opts.loadModel == 'none': model = Hourglass2DPrediction(opts).cuda(device=opts.device) else: model = torch.load(opts.loadModel).cuda(device=opts.device) # Set the Criterion and Optimizer criterion = torch.nn.MSELoss(reduce=False).cuda(device=opts.device) # opts.nOutput = len(model.outnode.children) optimizer = torch.optim.RMSprop(model.parameters(), opts.LR, alpha=opts.alpha, eps=opts.epsilon, weight_decay=opts.weightDecay, momentum=opts.momentum) # If TEST, just validate # TODO: save the validate results to mat or hdf5 if opts.test: loss_test, pck_test = val(0, opts, val_loader, model, criterion) print(f"test: | loss_test: {loss_test}| PCK_val: {pck_test}\n") ## TODO: save the predictions for the test #sio.savemat(os.path.join(opts.saveDir, 'preds.mat'), mdict = {'preds':preds}) return # NOT TEST, Train and Validate for epoch in range(1, opts.nEpochs + 1): ## Train the model loss_train, pck_train = train(epoch, opts, train_loader, model, criterion, optimizer) ## Show results and elapsed time time_elapsed = time.time() - time_start print( f"epoch: {epoch} | loss_train: {loss_train} | PCK_train: {pck_train} | {time_elapsed//60:.0f}min {time_elapsed%60:.0f}s\n" ) ## Intervals to show eval results if epoch % opts.valIntervals == 0: # TODO: Test the validation part ### Validation loss_val, pck_val = val(epoch, opts, val_loader, model, criterion) print( f"epoch: {epoch} | loss_val: {loss_val}| PCK_val: {pck_val}\n") ### Save the model torch.save(model, os.path.join(opts.save_dir, f"model_{epoch}.pth")) ### TODO: save the preds for the validation #sio.savemat(os.path.join(opts.saveDir, f"preds_{epoch}.mat"), mdict={'preds':preds}) # Use the optimizer to adjust learning rate if epoch % opts.dropLR == 0: lr = adjust_learning_rate(optimizer, epoch, opts.dropLR, opts.LR) print(f"Drop LR to {lr}\n")
epoch = 1 itr = 0 start_time = time.time() # training with early stopping while (epoch < args.epochs + 1) and (iteration < args.patience): if args.arc == 'Parallel': parallel_train(train_loader1, model, optimizer, epoch, args.cuda, args.log_interval, args.loss_func) valid_loss = parallel_val(valid_loader1, model, args.cuda, args.loss_func) else: train(train_loader, model, optimizer, epoch, args.cuda, args.log_interval, args.loss_func) valid_loss = val(valid_loader, model, args.cuda, args.loss_func) if valid_loss > best_valid_loss: iteration += 1 print('Loss was not improved, iteration {0}'.format(str(iteration))) if (iteration == 1): itr += 1 else: print('Saving model...') iteration = 0 best_valid_loss = valid_loss state = { 'net': model.module if args.cuda else model, 'acc': valid_loss, 'epoch': epoch, } if not os.path.isdir('checkpoint'):
def main(): opt = opts().parse() torch.cuda.set_device(opt.gpu_id) print('Using GPU ID: ', str(torch.cuda.current_device())) now = datetime.datetime.now() logger = Logger(opt.saveDir + '/logs_{}'.format(now.isoformat())) if opt.loadModel == 'none': model = inflate(opt).cuda() elif opt.loadModel == 'scratch': model = Pose3D(opt.nChannels, opt.nStack, opt.nModules, opt.numReductions, opt.nRegModules, opt.nRegFrames, ref.nJoints, ref.temporal).cuda() else: if opt.isStateDict: model = Pose3D(opt.nChannels, opt.nStack, opt.nModules, opt.numReductions, opt.nRegModules, opt.nRegFrames, ref.nJoints, ref.temporal).cuda() model.load_state_dict(torch.load(opt.loadModel)) model = model.cuda() print("yaya") else: model = torch.load(opt.loadModel).cuda() val_loader = torch.utils.data.DataLoader(h36m('val', opt), batch_size=1, shuffle=False, num_workers=int(ref.nThreads)) if opt.completeTest: mp = 0. cnt = 0. for i in range(6000 // opt.nVal): opt.startVal = 120 * i opt.nVal = opt.nVal a, b = val(i, opt, val_loader, model) mp += a * b cnt += b print("This Round " + str(a) + " MPJPE in " + str(b) + " frames!!") print("Average MPJPE so far " + str(mp / cnt)) print("") print("------Finally--------") print("Final MPJPE ==> :" + str(mp / cnt)) return if (opt.test): val(0, opt, val_loader, model) return train_loader = torch.utils.data.DataLoader( FusionDataset('train', opt) if opt.loadMpii else h36m('train', opt), batch_size=opt.dataloaderSize, shuffle=True, num_workers=int(ref.nThreads)) optimizer = torch.optim.RMSprop([{ 'params': model.hg.parameters(), 'lr': opt.LRhg }, { 'params': model.dr.parameters(), 'lr': opt.LRdr }], alpha=ref.alpha, eps=ref.epsilon, weight_decay=ref.weightDecay, momentum=ref.momentum) def hookdef(grad): newgrad = grad.clone() if (grad.shape[2] == 1): newgrad = grad * opt.freezefac else: newgrad[:, :, 1, :, :] = grad[:, :, 1, :, :] * opt.freezefac return newgrad def hookdef1(grad): newgrad = grad.clone() newgrad[:, 4096:8192] = newgrad[:, 4096:8192] * opt.freezefac return newgrad for i in (model.parameters()): if len(i.shape) == 5: _ = i.register_hook(hookdef) if len(i.shape) == 2: _ = i.register_hook(hookdef1) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau( optimizer, 'min', factor=opt.dropMag, patience=opt.patience, verbose=True, threshold=opt.threshold) for epoch in range(1, opt.nEpochs + 1): loss_train, loss3d_train, mpjpe_train, acc_train = train( epoch, opt, train_loader, model, optimizer) logger.scalar_summary('loss_train', loss_train, epoch) #logger.scalar_summary('acc_train', acc_train, epoch) logger.scalar_summary('mpjpe_train', mpjpe_train, epoch) logger.scalar_summary('loss3d_train', loss3d_train, epoch) if epoch % opt.valIntervals == 0: loss_val, loss3d_val, mpjpe_val, acc_val = val( epoch, opt, val_loader, model) logger.scalar_summary('loss_val', loss_val, epoch) # logger.scalar_summary('acc_val', acc_val, epoch) logger.scalar_summary('mpjpe_val', mpjpe_val, epoch) logger.scalar_summary('loss3d_val', loss3d_val, epoch) torch.save(model.state_dict(), os.path.join(opt.saveDir, 'model_{}.pth'.format(epoch))) logger.write('{:8f} {:8f} {:8f} {:8f} {:8f} {:8f} \n'.format( loss_train, mpjpe_train, loss3d_train, acc_val, loss_val, mpjpe_val, loss3d_val, acc_train)) else: logger.write('{:8f} {:8f} {:8f} \n'.format(loss_train, mpjpe_train, loss3d_train, acc_train)) #adjust_learning_rate(optimizer, epoch, opt.dropLR, opt.LR) if opt.scheduler == 1: scheduler.step(int(loss_train)) elif opt.scheduler == 2: scheduler.step(int(loss3d_train)) elif opt.scheduler == 3: scheduler.step(int(loss_train + loss3d_train)) elif opt.scheduler == 4: scheduler.step(int(mpjpe_train)) logger.close()
def main(): global args args = parser.parse_args() modelpath = os.path.join(os.path.abspath('../pixel_Exps'),args.exp) train_data = np.load(os.path.join(modelpath,'train_split.npy')) val_data = np.load(os.path.join(modelpath,'val_split.npy')) with open(os.path.join(modelpath,'train_attack.txt'),'r') as f: train_attack = f.readlines() train_attack = [attack.split(' ')[0].split(',')[0].split('\n')[0] for attack in train_attack] sys.path.append(modelpath) model = import_module('model') config, net = model.get_model() start_epoch = args.start_epoch save_dir = args.save_dir if args.resume: checkpoint = torch.load(args.resume) if start_epoch == 0: start_epoch = checkpoint['epoch'] + 1 if not save_dir: save_dir = checkpoint['save_dir'] else: save_dir = os.path.join(modelpath,'results', save_dir) net.load_state_dict(checkpoint['state_dict']) else: if start_epoch == 0: start_epoch = 1 if not save_dir: exp_id = time.strftime('%Y%m%d-%H%M%S', time.localtime()) save_dir = os.path.join(modelpath,'results', exp_id) else: save_dir = os.path.join(modelpath,'results', save_dir) if args.test == 1: net = net.net if args.debug: net = net.cuda() else: net = DataParallel(net).cuda() cudnn.benchmark = True if args.test == 1: with open(os.path.join(modelpath,'test_attack.txt'),'r') as f: test_attack = f.readlines() test_attack = [attack.split(' ')[0].split(',')[0].split('\n')[0] for attack in test_attack] test_data = np.load(os.path.join(modelpath,'test_split.npy')) dataset = DefenseDataset(config, 'test', test_data, test_attack) test_loader = DataLoader( dataset, batch_size = args.batch_size, shuffle = False, num_workers = args.workers, pin_memory = True) resumeid = args.resume.split('.')[-2].split('/')[-1] print(args.defense) args.defense = args.defense==1 if args.defense: name = 'result_%s_%s'%(args.exp,resumeid) else: name = 'result_%s_%s_nodefense'%(args.exp,resumeid) test(net, test_loader, name, args.defense) return dataset = DefenseDataset(config, 'train', train_data, train_attack) train_loader = DataLoader( dataset, batch_size = args.batch_size, shuffle = True, num_workers = args.workers, pin_memory = True) dataset = DefenseDataset(config, 'val', val_data, train_attack) val_loader = DataLoader( dataset, batch_size = args.batch_size, shuffle = True, num_workers = args.workers, pin_memory = True) if not os.path.exists(save_dir): os.makedirs(save_dir) logfile = os.path.join(save_dir,'log') sys.stdout = Logger(logfile) pyfiles = [f for f in os.listdir('./') if f.endswith('.py')] for f in pyfiles: shutil.copy(f, os.path.join(save_dir, f)) if isinstance(net, DataParallel): params = net.module.net.denoise.parameters() else: params = net.net.denoise.parameters() if args.optimizer == 'sgd': optimizer = optim.SGD( params, lr = args.lr, momentum = 0.9, weight_decay = args.weight_decay) elif args.optimizer == 'adam': optimizer = optim.Adam( params, lr = args.lr, weight_decay = args.weight_decay) else: exit('Wrong optimizer') def get_lr(epoch): if epoch <= args.epochs * 0.6: return args.lr elif epoch <= args.epochs * 0.9: return args.lr * 0.1 else: return args.lr * 0.01 for epoch in range(start_epoch, args.epochs + 1): requires_control = epoch == start_epoch train(epoch, net, train_loader, optimizer, get_lr, config['loss_idcs'], requires_control = requires_control) val(epoch, net, val_loader, requires_control = requires_control) if epoch % args.save_freq == 0: try: state_dict = net.module.state_dict() except: state_dict = net.state_dict() for key in state_dict.keys(): state_dict[key] = state_dict[key].cpu() torch.save({ 'epoch': epoch, 'save_dir': save_dir, 'state_dict': state_dict, 'args': args}, os.path.join(save_dir, '%03d.ckpt' % epoch))
def exp(exp_str = 'exp1', seed=0, lambdas = np.logspace(-6, 3, num=10), alphas = np.logspace(-6, 3, num=10), gamma = None, n_processes = 30, w_bar = 4, y_snr=100, task_std = 1, n_tasks = 100, n_train = 100, n_dims = 30, n_tasks_test = 200, n_test = 100, val_perc = 0.0, exp_dir = EXP_FOLDER, inner_solver_str = ('ssubgd', 'fista'), inner_solver_test_str = ('ssubgd', 'fista'), show_plot=False, verbose =0): loss_class, tasks_gen, inner_exp_name,\ metric_dict, val_metric = select_exp(exp_str, seed=seed, task_std=task_std, y_snr=y_snr, n_train_tasks=n_tasks, n_val_tasks=n_tasks_test, n_dims=n_dims, val_perc=val_perc, w_bar=w_bar) exp_name = 'grid_search' + inner_exp_name + 'seed' + str(seed) + 'vm' + val_metric + 'is' \ + str(inner_solver_str) + 'ist' + str(inner_solver_test_str) + 'n' + str(n_train) + 'val_perc' + str(val_perc) n_tasks_val = n_tasks_test//2 # use half the tasks for testing for the validation (with synthetic datasets) exp_parameters = locals() print('parameters ' + exp_name, exp_parameters) exp_dir_path = make_exp_dir(os.path.join(exp_dir, exp_name)) save_exp_parameters(exp_parameters, exp_dir_path) with open(os.path.join(exp_dir_path, tasks_gen.desc+".txt"), "w") as text_file: text_file.write(tasks_gen.desc) data_train, oracle_train = tasks_gen(n_tasks=n_tasks, n_train=n_train, n_test=n_test, sel='train') data_valid, oracle_valid = tasks_gen(n_tasks=n_tasks_val, n_train=n_train, n_test=n_test, sel='val') data_test, oracle_test = tasks_gen(n_tasks=n_tasks_test, n_train=n_train, n_test=n_test, sel='test') res_dict = {} for ts in inner_solver_test_str: inner_solver_test_class = inner_solver_selector(ts) # metaval for ITL results = Results(save_dir=exp_dir_path, do_plot=False, show_plot=show_plot, name='ITL-ts' + ts) h = np.zeros(tasks_gen.n_dims) itl_res = val(h, val_metric, lambdas, gamma, inner_solver_test_class, loss_class, data_valid, data_test, metric_dict, results, n_processes=n_processes, verbose=verbose) res_dict[itl_res.name] = itl_res # metaval for MEAN if oracle_valid is not None: results = Results(save_dir=exp_dir_path, do_plot=False, show_plot=show_plot, name='MEAN-ts' + ts) h = oracle_valid['w_bar'] oracle_res = val(h, val_metric, lambdas, gamma, inner_solver_test_class, loss_class, data_valid, data_test, metric_dict, results, n_processes=n_processes, verbose=verbose) res_dict[oracle_res.name] = oracle_res for s in inner_solver_str: inner_solver_class = inner_solver_selector(s) results = Results(save_dir=exp_dir_path, do_plot=False, show_plot=show_plot, name='LTL-tr'+s +'ts'+ts) h0 = np.zeros(tasks_gen.n_dims) ltl_res = meta_val(val_metric=val_metric, h0=h0, alphas=alphas, lambdas=lambdas, gamma=gamma, inner_solver_class=inner_solver_class, inner_solver_test_class=inner_solver_test_class, loss_class=loss_class, data_train=data_train, data_valid=data_valid, data_test=data_test, metric_dict=metric_dict, results=results, n_processes=n_processes, verbose=verbose) res_dict[ltl_res.name] = ltl_res plot_resultsList(n_tasks+1, res_dict, save_dir_path=exp_dir_path, show_plot=show_plot, filename='plots') return res_dict
def main(config): if torch.cuda.is_available(): print_and_log('Successfully loaded CUDA!') device = 'cuda:' + str(config['Training']['gpu']) else: print('Could not load CUDA!') logging.warning('Could not load CUDA!') device = 'cpu' seed = config['Training']['seed'] if seed is not None: torch.random.manual_seed(seed) np.random.seed(seed) num_classes = config['Model Kwargs']['num_classes'] config['Model Kwargs']['num_classes'] = 2 ovr_models = [] loader_gen = data_util.from_config_ovr(config) for nclass in range(num_classes): print_and_log('Training {} vs. Rest'.format( config['Data Files']['datasets'][nclass])) train_loader, val_loader = next(loader_gen) model = nn_models.from_config(config).to(device) start_lr = config['Training']['lr'] epochs = config['Training']['epochs'] optimizer = optim.Adam(model.parameters(), lr=start_lr) lr_scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, epochs, eta_min=0.000005) criterion = nn.CrossEntropyLoss() train_losses, val_losses = [], [] train_accs, val_accs = [], [] lambd1 = config['Training']['l1_loss'] best_val_loss, best_model = float('inf'), model for epoch in range(epochs): print_and_log('Epoch: {}'.format(epoch)) # Train train_loss, train_acc, train_ce = train.train(model, optimizer, criterion, train_loader, lambd1=lambd1, device=device) print_and_log('\tTrain Loss: {}'.format(train_ce)) print_and_log('\tTrain Accuracy: {}'.format(train_acc)) train_losses.append(train_loss) train_accs.append(train_acc) # Val val_loss, val_acc, val_ce = train.val(model, criterion, val_loader, lambd1=lambd1, device=device) print_and_log('\tVal Loss: {}'.format(val_ce)) print_and_log('\tVal Accuracy: {}'.format(val_acc)) val_losses.append(val_loss) val_accs.append(val_acc) lr_scheduler.step() if val_loss < best_val_loss: best_model = copy.deepcopy(model) num_classes = model.num_classes conf_mat = train.confusion_matrix(model, val_loader, num_classes, device=device) print_and_log('Final eval confusion matrix:') print_and_log(conf_mat) ovr_models.append(best_model) final_model = nn_models.OVRModel(ovr_models) # Save model save_dir = config['Logging']['save_root'] try: os.mkdir(save_dir) except FileExistsError: pass model_path = os.path.join( save_dir, '{}_{}_{}seed{}.pt'.format('OVRModel', "".join(config['Data Files']['datasets']), config['Logging']['tag'], seed)) torch.save(final_model.state_dict(), model_path) print_and_log('Saved model to ' + model_path)
# loss_fn = nn.CrossEntropyLoss(weight = conf['classWeights'].to(device), ignore_index = 0) # loss_fn = functools.partial(cross_entropy2d, **conf['loss_params']) if conf['train']: epochs = conf['max_epochs'] print("Starting Training with {} epochs".format(epochs)) else: epochs = 1 print("Starting Evaluation with {} epochs".format(epochs)) num_classes = 21 print("Saving Conf Parameters: {}".format(conf['save_path'])) conf_path = "{}/conf.pickle".format(conf['save_path']) since = time.time() for epoch in range(start_epoch, epochs + 1): if conf['train']: train(model, device, train_loader, optimizer_ft, epoch, conf, loss_fn, num_classes) val(model, device, validation_loader, epoch, dataset_sizes['val'], conf, loss_fn, num_classes, dst) print("Time Taken for epoch%d: %d sec"%(epoch, time.time()-since)) since = time.time() with open(conf_path, "wb") as f: pickle.dump(conf, f) f.close() if epoch % 5 == 0: torch.save(model.state_dict(), "{}/FCN8_vgg16_{}.net".format(conf['model_path'], epoch))
def main(opt): if opt.disable_cudnn: torch.backends.cudnn.enabled = False print('Cudnn is disabled.') timestep = 4 logger = Logger(opt) opt.device = torch.device('cuda:{}'.format(opt.gpus[0])) Dataset = dataset_factory[opt.dataset] LstmData = SeqH36m(Dataset(opt, 'train', 1), timestep) train, val = task_factory[opt.task] if opt.task == "conv3d": model, optimizer, start_epoch = create_conv3d(opt, timestep) else: model, optimizer, start_epoch = create_lstm(opt, timestep) if len(opt.gpus) > 1: model = torch.nn.DataParallel(model, device_ids=opt.gpus).cuda(opt.device) else: model = model.cuda(opt.device) val_loader = torch.utils.data.DataLoader(SeqH36m(Dataset(opt, 'val', 1), timestep), batch_size=1, shuffle=False, num_workers=1, pin_memory=True) if opt.test: log_dict_train, preds = val(0, opt, val_loader, model) sio.savemat(os.path.join(opt.save_dir, 'preds.mat'), mdict={'preds': preds}) return train_loader = torch.utils.data.DataLoader( LstmData, batch_size=opt.batch_size * len(opt.gpus), shuffle=True, # if opt.debug == 0 else False, num_workers=opt.num_workers, pin_memory=True) best = -1 for epoch in range(start_epoch, opt.num_epochs + 1): mark = epoch if opt.save_all_models else 'last' log_dict_train, _ = train(epoch, opt, train_loader, model, optimizer, timestep) for k, v in log_dict_train.items(): logger.scalar_summary('train_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if opt.val_intervals > 0 and epoch % opt.val_intervals == 0: save_model( os.path.join(opt.save_dir, 'model_lstm_{}.pth'.format(mark)), epoch, model, optimizer) log_dict_val, preds = val(epoch, opt, val_loader, model, timestep) for k, v in log_dict_val.items(): logger.scalar_summary('val_{}'.format(k), v, epoch) logger.write('{} {:8f} | '.format(k, v)) if log_dict_val[opt.metric] > best: best = log_dict_val[opt.metric] save_model(os.path.join(opt.save_dir, 'model_lstm_best.pth'), epoch, model) else: save_model(os.path.join(opt.save_dir, 'model_lstm_last.pth'), epoch, model, optimizer) logger.write('\n') if epoch in opt.lr_step: lr = opt.lr * (0.1**(opt.lr_step.index(epoch) + 1)) print('Drop LR to', lr) for param_group in optimizer.param_groups: param_group['lr'] = lr # logger.close() return log_dict_train['loss']
def main(): opt = opts().parse() now = datetime.datetime.now() logger = Logger(opt.saveDir + '/logs_{}'.format(now.isoformat())) if opt.loadModel == 'none': model = inflate(opt).cuda() elif opt.loadModel == 'scratch': model = HourglassNet3D(opt.nChannels, opt.nStack, opt.nModules, opt.numReductions, ref.nJoints).cuda() else : model = torch.load(opt.loadModel).cuda() val_loader1 = torch.utils.data.DataLoader( h36m('val',opt), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) val_loader2 = torch.utils.data.DataLoader( mpii('val',opt), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) if (opt.test): val(0, opt, val_loader1, model) val(0, opt, val_loader2, model) pass train_loader = torch.utils.data.DataLoader( #h36m('train',opt), FusionDataset('train',opt), #posetrack('train', opt), batch_size = opt.dataloaderSize, shuffle = True, num_workers = int(ref.nThreads) ) optimizer = torch.optim.RMSprop( [{'params': model.parameters(), 'lr': opt.LRhg}], alpha = ref.alpha, eps = ref.epsilon, weight_decay = ref.weightDecay, momentum = ref.momentum ) scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', factor = opt.dropMag, patience = opt.patience, verbose = True, threshold = opt.threshold) for epoch in range(1, opt.nEpochs + 1): loss_train, acc_train = train(epoch, opt, train_loader, model, optimizer) logger.scalar_summary('loss_train', loss_train, epoch) logger.scalar_summary('acc_train', acc_train, epoch) #logger.scalar_summary('mpjpe_train', mpjpe_train, epoch) #logger.scalar_summary('loss3d_train', loss3d_train, epoch) if epoch % opt.valIntervals == 0: loss_val, acc_val = val(epoch, opt, val_loader1, model) logger.scalar_summary('loss_val', loss_val, epoch) logger.scalar_summary('acc_val', acc_val, epoch) logger.write('{:8f} {:8f} {:8f} {:8f} \n'.format(loss_train, acc_train, loss_val, acc_val)) loss_val, acc_val = val(epoch, opt, val_loader2, model) logger.scalar_summary('loss_val', loss_val, epoch) logger.scalar_summary('acc_val', acc_val, epoch) logger.write('{:8f} {:8f} {:8f} {:8f} \n'.format(loss_train, acc_train, loss_val, acc_val)) #logger.scalar_summary('mpjpe_val', mpjpe_val, epoch) #logger.scalar_summary('loss3d_val', loss3d_val, epoch) torch.save(model, os.path.join(opt.saveDir, 'model_{}.pth'.format(epoch))) else: logger.write('{:8f} {:8f} \n'.format(loss_train, acc_train)) #adjust_learning_rate(optimizer, epoch, opt.dropLR, opt.LR) if opt.scheduler == 1: scheduler.step(int(loss_train)) logger.close()
def main(): opt = opts().parse() now = datetime.datetime.now() logger = Logger(opt.saveDir, now.isoformat()) model = MSSH().cuda() optimizer = torch.optim.RMSprop(model.parameters(), opt.LR, alpha = ref.alpha, eps = ref.epsilon, weight_decay = ref.weightDecay, momentum = ref.momentum) criterion = torch.nn.MSELoss().cuda() # if opt.GPU > -1: # print('Using GPU', opt.GPU) # model = model.cuda(opt.GPU) # criterion = criterion.cuda(opt.GPU) val_loader = torch.utils.data.DataLoader( MPII(opt, 'val'), batch_size = 1, shuffle = False, num_workers = int(ref.nThreads) ) if opt.test: log_dict_train, preds = val(0, opt, val_loader, model, criterion) sio.savemat(os.path.join(opt.saveDir, 'preds.mat'), mdict = {'preds': preds}) return train_loader = torch.utils.data.DataLoader( MPII(opt, 'train'), batch_size = opt.trainBatch, shuffle = True if opt.DEBUG == 0 else False, num_workers = int(ref.nThreads) ) for epoch in range(1): model.train() Loss, Acc = AverageMeter(), AverageMeter() preds = [] nIters = len(train_loader) bar = Bar('{}'.format(opt.expID), max=nIters) for i, (input, target, meta) in enumerate(train_loader): input_var = torch.autograd.Variable(input).float().cuda() target_var = torch.autograd.Variable(target).float().cuda() #print( input_var) output = model(input_var) loss = criterion(output, target_var) Loss.update(loss.data[0], input.size(0)) Acc.update(Accuracy((output.data).cpu().numpy(), (target_var.data).cpu().numpy())) optimizer.zero_grad() loss.backward() optimizer.step() Bar.suffix = '{split} Epoch: [{0}][{1}/{2}]| Total: {total:} | ETA: {eta:} | Loss {loss.avg:.6f} | Acc {Acc.avg:.6f} ({Acc.val:.6f})'.format(epoch, i, nIters, total=bar.elapsed_td, eta=bar.eta_td, loss=Loss, Acc=Acc, split = "train") bar.next() bar.finish()
cost = trainBatch(crnn, criterion, optimizer, data, converter, use_cuda=opt.cuda) loss_avg.add(cost) i += 1 if i % opt.displayInterval == 0: print('[%d/%d][%d/%d] Loss: %f' % (epoch, opt.niter, i, len(train_dataloader), loss_avg.val())) loss_avg.reset() if i % opt.valInterval == 0: val(crnn, test_dataloader, criterion, converter, opt.n_test_disp, opt.batchSize, use_cuda=opt.cuda) # do checkpointing if i % opt.saveInterval == 0: #save_checkpoint() torch.save( crnn.state_dict(), '{0}/netCRNN_{1}_{2}.pth'.format(opt.checkpoints_folder, epoch, i))