def compute_loss(miximages, mixlabels, model, use_gpu): doubleset = TensorDataset(miximages, mixlabels) doubleloader = torch.utils.data.DataLoader(doubleset, batch_size=8192, shuffle=False) criterion = CrossEntropyLoss(size_average=True) meter = AverageMeter() for imgs, lbls in doubleloader: if use_gpu: imgs = imgs.cuda() lbls = lbls.cuda() with torch.no_grad(): # this line is key to preventing CUDA out of memory error predictions = model(imgs) meter.update(criterion(predictions, lbls), imgs.shape[0]) return meter.avg
def main(): # parse command line argument and generate config dictionary config = parse_args() logger.info(json.dumps(config, indent=2)) run_config = config['run_config'] optim_config = config['optim_config'] # TensorBoard SummaryWriter if run_config['tensorboard']: writer = SummaryWriter() else: writer = None # set random seed seed = run_config['seed'] torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) # create output directory outdir = run_config['outdir'] if not os.path.exists(outdir): os.makedirs(outdir) # save config as json file in output directory outpath = os.path.join(outdir, 'config.json') with open(outpath, 'w') as fout: json.dump(config, fout, indent=2) # load data loaders train_loader, test_loader = get_loader(config['data_config']) # load model logger.info('Loading model...') model = load_model(config['model_config']) n_params = sum([param.view(-1).size()[0] for param in model.parameters()]) logger.info('n_params: {}'.format(n_params)) if run_config['use_gpu']: model = nn.DataParallel(model) model.cuda() logger.info('Done') if config['data_config']['use_mixup']: train_criterion = CrossEntropyLoss(size_average=True) else: train_criterion = nn.CrossEntropyLoss(size_average=True) test_criterion = nn.CrossEntropyLoss(size_average=True) # create optimizer optim_config['steps_per_epoch'] = len(train_loader) optimizer, scheduler = create_optimizer(model.parameters(), optim_config) # run test before start training if run_config['test_first']: test(0, model, test_criterion, test_loader, run_config, writer) state = { 'config': config, 'state_dict': None, 'optimizer': None, 'epoch': 0, 'accuracy': 0, 'best_accuracy': 0, 'best_epoch': 0, } for epoch in range(1, optim_config['epochs'] + 1): # train train(epoch, model, optimizer, scheduler, train_criterion, train_loader, config, writer) # test accuracy = test(epoch, model, test_criterion, test_loader, run_config, writer) # update state dictionary state = update_state(state, epoch, accuracy, model, optimizer) # save model save_checkpoint(state, outdir) if run_config['tensorboard']: outpath = os.path.join(outdir, 'all_scalars.json') writer.export_scalars_to_json(outpath)
def train(trainset, model, args, devset): global WORD_VECTOR optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate) # optimizer = torch.optim.SGD(model.parameters(), lr=args.learning_rate) # loss_fn = torch.nn.CrossEntropyLoss(size_average=False) loss_fn = CrossEntropyLoss(smooth_eps=args.smoothing) if args.cuda: model.cuda() loss_fn.cuda() best_acc = 0 last_step = 0 model.train() for epoch in range(1, args.epochs + 1): # shuffle the data permutation = np.random.permutation(len(trainset["text"])) trainset["text"] = trainset["text"][permutation] trainset["label"] = trainset["label"][permutation] for steps in range(0, len(trainset["text"]), args.batch_size): feature = get_batch(trainset["text"][steps:steps + args.batch_size], WORD_VECTOR, embed_size=args.embed_size) target = trainset["label"][steps:steps + args.batch_size] # seq_len, batch_size, embed_size = feature.size() # feature = feature.view(-1, (batch_size, seq_len, embed_size)) feature = Variable(feature.cuda()) if args.cuda else Variable( feature.cpu()) target = Variable( torch.LongTensor(target)).cuda() if args.cuda else Variable( torch.LongTensor(target)).cpu() # if args.cuda: # feature = Variable(feature.cuda()) # target = Variable(torch.LongTensor(target)).cuda() optimizer.zero_grad() logit = model(feature) loss = loss_fn(logit, target) loss.backward() optimizer.step() if steps % args.log_freq == 0: corrects = (torch.max(logit, 1)[1].view( target.size()).data == target.data).sum() accuracy = 100.0 * corrects / args.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format( steps, loss.data, accuracy, corrects, args.batch_size)) if devset and steps % args.val_freq == 0: dev_acc = evaluation(devset, model, args) if dev_acc > best_acc: best_acc = dev_acc last_step = steps last_epoch = epoch # if args.save_best: if True: save(model, args.save_dir, 'best', "0") else: if steps - last_step >= args.early_stop: print('early stop by {} steps.'.format( args.early_stop)) elif steps % args.save_freq == 0: save(model, args.save_dir, 'snapshot', steps) if devset: print("Best evaluate acc is %s reached at step %s epoch: %s" % (best_acc.tolist(), last_step, last_epoch)) else: print("saving the last model..") save(model, args.save_dir, "best", "0")
def main(): # parse command line argument and generate config dictionary config = parse_args() logger.info(json.dumps(config, indent=2)) run_config = config['run_config'] optim_config = config['optim_config'] # TensorBoard SummaryWriter if run_config['tensorboard']: writer = SummaryWriter() else: writer = None # set random seed seed = run_config['seed'] torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) # create output directory outdir = run_config['outdir'] if not os.path.exists(outdir): os.makedirs(outdir) # save config as json file in output directory outpath = os.path.join(outdir, 'config.json') with open(outpath, 'w') as fout: json.dump(config, fout, indent=2) # load data loaders train_loader, test_loader = get_loader(config['data_config']) # compute covariances full_train_loader, _ = get_loader(config['data_config'], return_full=True) for batch in full_train_loader: full_images, full_labels = batch num_classes = config['data_config']['n_classes'] full_targets = onehot(full_labels, num_classes) xbar, ybar, xxcov, xycov, T = taylor.compute_moments(full_images, full_targets) #torch.save(xbar, 'xbar.pt') #torch.save(ybar, 'ybar.pt') #torch.save(xxcov, 'xxcov.pt') #torch.save(xycov, 'xycov.pt') num_components = config['data_config']['cov_components'] Uxx, Sxx, Vxx = taylor.decomposition(xxcov, num_components) Uxy, Sxy, Vxy = taylor.decomposition(xycov, 10) xdim = T.shape[1] # svd's of T[i,:,:] slices T_U = torch.zeros((num_classes, xdim, num_components)) T_S = torch.zeros((num_classes, num_components)) T_V = torch.zeros((num_classes, xdim, num_components)) for i in range(num_classes): T_U[i,:,:], T_S[i,:], T_V[i,:,:] = taylor.decomposition(T[i,:,:], num_components) #torch.save(Uxx, 'Uxx.pt') #torch.save(Sxx, 'Sxx.pt') #torch.save(Vxx, 'Vxx.pt') #torch.save(Uxy, 'Uxy.pt') #torch.save(Sxy, 'Sxy.pt') #torch.save(Vxy, 'Vxy.pt') if run_config['use_gpu']: moment_dict = { 'Uxx': Uxx.cuda(), 'Uxy': Uxy.cuda(), 'Sxx': Sxx.cuda(), 'Sxy': Sxy.cuda(), 'Vxx': Vxx.cuda(), 'Vxy': Vxy.cuda(), 'xbar': xbar.reshape(full_images.shape[1:]).cuda(), 'ybar': ybar.cuda(), 'T_U': T_U.cuda(), 'T_S': T_S.cuda(), 'T_V': T_V.cuda() } else: moment_dict = { 'Uxx': Uxx, 'Uxy': Uxy, 'Sxx': Sxx, 'Sxy': Sxy, 'Vxx': Vxx, 'Vxy': Vxy, 'xbar': xbar.reshape(full_images.shape[1:]), 'ybar': ybar, 'T_U': T_U, 'T_S': T_S, 'T_V': T_V } # set up dataframe for recording results: dfcols = [] dfcols.append('epoch') dfcols.append('train_loss') dfcols.append('train_acc') if config['data_config']['use_mixup']: dfcols.append('doublesum_train') dfcols.append('doublesum_eval') dfcols.append('doublesum_eval2') if config['data_config']['cov_components'] > 0: dfcols.append('taylor_base') dfcols.append('taylor_de') for k in [1, 2, 5, 20, 50, 200]: dfcols.append('taylor_d2_' + str(k)) for k in [1, 2, 5, 20, 50, 200]: dfcols.append('taylor_d2e_' + str(k)) dfcols.append('test_loss') dfcols.append('test_acc') resultsdf = pd.DataFrame(columns=dfcols) # load model logger.info('Loading model...') model = load_model(config['model_config']) n_params = sum([param.view(-1).size()[0] for param in model.parameters()]) logger.info('n_params: {}'.format(n_params)) if run_config['use_gpu']: model = nn.DataParallel(model) model.cuda() logger.info('Done') if config['data_config']['use_mixup']: train_criterion = CrossEntropyLoss(size_average=True) else: train_criterion = nn.CrossEntropyLoss(size_average=True) test_criterion = nn.CrossEntropyLoss(size_average=True) # create optimizer optim_config['steps_per_epoch'] = len(train_loader) optimizer, scheduler = create_optimizer(model.parameters(), optim_config) # run test before start training if run_config['test_first']: test(0, model, test_criterion, test_loader, run_config, writer) state = { 'config': config, 'state_dict': None, 'optimizer': None, 'epoch': 0, 'accuracy': 0, 'best_accuracy': 0, 'best_epoch': 0, } for epoch in range(1, optim_config['epochs'] + 1): # train dfrow = train(epoch, model, optimizer, scheduler, train_criterion, train_loader, config, writer, moment_dict) # test test_loss, accuracy = test(epoch, model, test_criterion, test_loader, run_config, writer) dfrow.append(test_loss) dfrow.append(accuracy) if epoch <= 4 or epoch % 5 == 0: resultsdf.loc[resultsdf.shape[0]] = list(dfrow) resultsdf.to_csv(os.path.join(outdir, 'results.csv')) # update state dictionary state = update_state(state, epoch, accuracy, model, optimizer) # save model save_checkpoint(state, outdir) if run_config['tensorboard']: outpath = os.path.join(outdir, 'all_scalars.json') writer.export_scalars_to_json(outpath)
def main(): # parse command line argument and generate config dictionary config = parse_args() logger.info(json.dumps(config, indent=2)) run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] human_tune = run_config['human_tune'] print('human tune type: ', type(human_tune), human_tune) # if human_tune: human_tune_scores = [] # set random seed seed = run_config['seed'] torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) if not run_config['no_output']: # create output directory outdir = run_config['outdir'] if not os.path.exists(outdir): os.makedirs(outdir) # save config as json file in output directory outpath = os.path.join(outdir, 'config.json') with open(outpath, 'w') as fout: json.dump(config, fout, indent=2) # load model logger.info('Loading model...') model = load_model(config['model_config']) n_params = sum([param.view(-1).size()[0] for param in model.parameters()]) logger.info('n_params: {}'.format(n_params)) if run_config['use_gpu']: model = nn.DataParallel(model) model.cuda() logger.info('Done') test_criterion = CrossEntropyLoss(size_average=True) master_scores = [] master_labels = [] master_outputs = [] master_probs = [] # load pretrained weights if given master_resume = run_config['resume'] print('master directory is: ', master_resume) # load data loaders print('loading data loaders') print('loading human tune test loaders') test_loaders = \ get_loader(config['data_config']) run_config['resume'] = '{0}/fold_{1}/model_best_state_c10h_val_c10_acc.pth'.format(master_resume, fold) if os.path.isfile(run_config['resume']): print("=> loading checkpoint '{}'".format(run_config['resume'])) checkpoint = torch.load(run_config['resume']) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(run_config['resume'], checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(run_config['resume'])) # get labels scores_test, labels_test, outputs_test, probs_test = test(checkpoint['epoch'], model, test_criterion, test_loaders, run_config, human_tune) master_scores.append(scores_test) master_labels.append(labels_test) master_outputs.append(outputs_test) master_probs.append(probs_test) master_labels = np.concatenate(master_labels) print('master labels shape: ', master_labels.shape) print('master labels argmax[:5]: {0}, \n master argmax labels[-5:]: {1}'.format(np.argmax(master_labels[:5], axis = 1), np.argmax(master_labels[-5:], axis = 1))) master_outputs = np.vstack(master_outputs) print('master outputs shape: ', master_outputs.shape) master_probs = np.vstack(master_probs) print('master probs shape: ', master_probs.shape) c10h_outdir = run_config['c10h_scores_outdir'] if not os.path.exists(c10h_outdir): os.makedirs(c10h_outdir) identifier = run_config['resume'].split('/')[-3] + '_' + run_config['resume'].split('/')[-4] print('identifier reduction: {0} to {1}'.format(str(run_config['resume']), identifier)) s_dir = os.path.join(str(c10h_outdir), identifier) # resave (overwrite) scores file with latest entries keys = master_scores[0].keys() print('keys: ', keys) with open(os.path.join(s_dir + '_master_scores.csv'), 'w') as output_file: # changed from above dict_writer = csv.DictWriter(output_file, keys) dict_writer.writeheader() dict_writer.writerows(master_scores)
def main(): # parse command line arguments config = parse_args() logger.info(json.dumps(config, indent=2)) run_config = config['run_config'] optim_config = config['optim_config'] data_config = config['data_config'] # TensorBoard SummaryWriter writer = SummaryWriter() if run_config['tensorboard'] else None # set random seed seed = run_config['seed'] torch.manual_seed(seed) np.random.seed(seed) random.seed(seed) # create output directory outdir = run_config['outdir'] if not os.path.exists(outdir): os.makedirs(outdir) # save config as json file in output directory outpath = os.path.join(outdir, 'config.json') with open(outpath, 'w') as fout: json.dump(config, fout, indent=2) # data loaders train_loader, test_loader = get_loader(optim_config['batch_size'], run_config['num_workers']) # model model = load_model(config['model_config']) model.cuda() n_params = sum([param.view(-1).size()[0] for param in model.parameters()]) logger.info('n_params: {}'.format(n_params)) if config['data_config']['use_mixup']: train_criterion = CrossEntropyLoss(size_average=True) else: train_criterion = nn.CrossEntropyLoss(size_average=True) test_criterion = nn.CrossEntropyLoss(size_average=True) # optimizer optimizer = torch.optim.SGD(model.parameters(), lr=optim_config['base_lr'], momentum=optim_config['momentum'], weight_decay=optim_config['weight_decay'], nesterov=optim_config['nesterov']) scheduler = torch.optim.lr_scheduler.MultiStepLR( optimizer, milestones=optim_config['milestones'], gamma=optim_config['lr_decay']) # run test before start training test(0, model, test_criterion, test_loader, run_config, writer) for epoch in range(1, optim_config['epochs'] + 1): scheduler.step() train(epoch, model, optimizer, train_criterion, train_loader, config, writer) accuracy = test(epoch, model, test_criterion, test_loader, run_config, writer) state = OrderedDict([ ('config', config), ('state_dict', model.state_dict()), ('optimizer', optimizer.state_dict()), ('epoch', epoch), ('accuracy', accuracy), ]) model_path = os.path.join(outdir, 'model_state.pth') torch.save(state, model_path) if run_config['tensorboard']: outpath = os.path.join(outdir, 'all_scalars.json') writer.export_scalars_to_json(outpath)