os.mkdir(f"{base_dir}/models") if param.gen_extra_images > 0: os.mkdir(f"{base_dir}/images/extra") # where we save the output log_output = open(f"{logs_dir}/log.txt", 'w') print(param) print(param, file=log_output) import torch import torch.autograd as autograd from torch.autograd import Variable # For plotting the Loss of D and G using tensorboard from tensorboard_logger import configure, log_value configure(logs_dir, flush_secs=5) import torchvision import torchvision.datasets as dset import torchvision.transforms as transf import torchvision.models as models import torchvision.utils as vutils if param.cuda: import torch.backends.cudnn as cudnn cudnn.benchmark = True # To see images from IPython.display import Image to_img = transf.ToPILImage()
type=int, default=200, help="Number of epochs to train for") parser.add_argument("-run_name", type=str, default="sem_seg_run_1", help="Name for run in tensorboard_logger") BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data') lr_clip = 1e-5 bnm_clip = 1e-2 if __name__ == "__main__": args = parser.parse_args() tb_log.configure('runs/{}'.format(args.run_name)) test_set = Indoor3DSemSeg(args.num_points, BASE_DIR, train=False, data_precent=0.01) test_loader = DataLoader(test_set, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=2) train_set = Indoor3DSemSeg(args.num_points, BASE_DIR, data_precent=1.0) train_loader = DataLoader(train_set, batch_size=args.batch_size, pin_memory=True,
batch_size=args.batch_size, shuffle=False, pin_memory=True, num_workers=args.workers) if args.mode == 'train': train_set = Indoor3DSemSeg(8192, train=True) train_loader = DataLoader(train_set, batch_size=args.batch_size, shuffle=True, pin_memory=True, num_workers=args.workers) # output dir config output_dir = os.path.join(args.output_dir, args.extra_tag) os.makedirs(output_dir, exist_ok=True) tb_log.configure(os.path.join(output_dir, 'tensorboard')) ckpt_dir = os.path.join(output_dir, 'ckpt') os.makedirs(ckpt_dir, exist_ok=True) log_file = os.path.join(output_dir, 'log.txt') log_f = open(log_file, 'w') for key, val in vars(args).items(): log_print("{:16} {}".format(key, val), log_f=log_f) # train and eval train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir, log_f) log_f.close() elif args.mode == 'eval': epoch = load_checkpoint(model, args.ckpt)
# Load checkpoint if checkpoint_path: print("Load checkpoint from: {}".format(checkpoint_path)) checkpoint = torch.load(checkpoint_path) model.load_state_dict(checkpoint["state_dict"]) optimizer.load_state_dict(checkpoint["optimizer"]) try: global_step = checkpoint["global_step"] global_epoch = checkpoint["global_epoch"] except: # TODO pass # Setup tensorboard logger tensorboard_logger.configure(log_path) print(hparams_debug_string()) # Train! try: train(model, train_loader, val_loader, optimizer, init_lr=hparams.initial_learning_rate, checkpoint_dir=checkpoint_dir, checkpoint_interval=hparams.checkpoint_interval, nepochs=hparams.nepochs, clip_thresh=hparams.clip_thresh) except KeyboardInterrupt:
def init(opt): # [folder] create folder for checkpoints try: os.makedirs(opt.out) except OSError: pass # [cuda] check cuda, if cuda is available, then display warning if torch.cuda.is_available() and not opt.cuda: sys.stdout.write('[WARNING] : You have a CUDA device, so you should probably run with --cuda') # [normalization] __return__ normalize images, set up mean and std normalize = transforms.Normalize( mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225]) # [scale] __return__ scale = transforms.Compose([ transforms.ToPILImage(), transforms.Resize(opt.imageSize), transforms.ToTensor(), transforms.Normalize( mean = [0.485, 0.456, 0.406], std = [0.229, 0.224, 0.225])]) # [transform] up sampling transforms transform = transforms.Compose([transforms.RandomCrop((opt.imageSize[0] * opt.upSampling, opt.imageSize[1] * opt.upSampling)), transforms.ToTensor()]) # [dataset] training dataset if opt.dataset == 'folder': dataset = datasets.ImageFolder(root = opt.dataroot, transform = transform) elif opt.dataset == 'cifar10': dataset = datasets.CIFAR10(root = opt.dataroot, train = True, download = True, transform = transform) elif opt.dataset == 'cifar100': dataset = datasets.CIFAR100(root = opt.dataroot, train = True, download = False, transform = transform) assert dataset # [dataloader] __return__ loading dataset dataloader = torch.utils.data.DataLoader( dataset, batch_size = opt.batchSize, shuffle = True, num_workers = int(opt.workers)) # [generator] __return__ generator of GAN generator = Generator(16, opt.upSampling) if opt.generatorWeights != '' and os.path.exists(opt.generatorWeights): generator.load_state_dict(torch.load(opt.generatorWeights)) # [discriminator] __return__ discriminator of GAN discriminator = Discriminator() if opt.discriminatorWeights != '' and os.path.exists(opt.discriminatorWeights): discriminator.load_state_dict(torch.load(opt.discriminatorWeights)) # [extractor] __return__ feature extractor of GAN # For the content loss feature_extractor = FeatureExtractor(torchvision.models.vgg19(pretrained = True)) # [loss] __return__ loss function content_criterion = nn.MSELoss() adversarial_criterion = nn.BCELoss() ones_const = Variable(torch.ones(opt.batchSize, 1)) # [cuda] if gpu is to be used if opt.cuda: generator.cuda() discriminator.cuda() feature_extractor.cuda() content_criterion.cuda() adversarial_criterion.cuda() ones_const = ones_const.cuda() # [optimizer] __return__ Optimizer for GAN optim_generator = optim.Adam(generator.parameters(), lr = opt.generatorLR) optim_discriminator = optim.Adam(discriminator.parameters(), lr = opt.discriminatorLR) # record configure configure('logs/{}-{}-{} -{}'.format(opt.dataset, str(opt.batchSize), str(opt.generatorLR), str(opt.discriminatorLR)), flush_secs = 5) # visualizer = Visualizer(image_size = (opt.imageSize[0] * opt.upSampling, opt.imageSize[1] * opt.upSampling)) # __return__ low resolution images low_res = torch.FloatTensor(opt.batchSize, 3, opt.imageSize[0], opt.imageSize[1]) return normalize,\ scale,\ dataloader,\ generator,\ discriminator,\ feature_extractor,\ content_criterion,\ adversarial_criterion,\ ones_const,\ optim_generator,\ optim_discriminator,\ low_res
def main(): # parse arg and start experiment global args best_ap = -1. best_iter = 0 args = parser.parse_args() args.config_of_data = config.datasets[args.data] # args.num_classes = config.datasets[args.data]['num_classes'] if configure is None: args.tensorboard = False print(Fore.RED + 'WARNING: you don\'t have tesnorboard_logger installed' + Fore.RESET) # optionally resume from a checkpoint if args.resume: if args.resume and os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) old_args = checkpoint['args'] print('Old args:') print(old_args) # set args based on checkpoint if args.start_iter <= 0: args.start_iter = checkpoint['iter'] + 1 best_iter = args.start_iter - 1 best_ap = checkpoint['best_ap'] for name in arch_resume_names: if name in vars(args) and name in vars(old_args): setattr(args, name, getattr(old_args, name)) model = get_model(**vars(args)) model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (iter {})" .format(args.resume, checkpoint['iter'])) else: print( "=> no checkpoint found at '{}'".format( Fore.RED + args.resume + Fore.RESET), file=sys.stderr) return else: # create model print("=> creating model '{}'".format(args.arch)) model = get_model(**vars(args)) # cudnn.benchmark = True cudnn.enabled = False # create dataloader if args.evaluate == 'val': train_loader, val_loader, test_loader = getDataloaders( splits=('val'), **vars(args)) validate(val_loader, model, best_iter) return elif args.evaluate == 'test': train_loader, val_loader, test_loader = getDataloaders( splits=('test'), **vars(args)) validate(test_loader, model, best_iter) return else: train_loader, val_loader, test_loader = getDataloaders( splits=('train', 'val'), **vars(args)) # define optimizer optimizer = get_optimizer(model, args) # check if the folder exists if os.path.exists(args.save): print(Fore.RED + args.save + Fore.RESET + ' already exists!', file=sys.stderr) if not args.force: ans = input('Do you want to overwrite it? [y/N]:') if ans not in ('y', 'Y', 'yes', 'Yes'): os.exit(1) print('remove existing ' + args.save) shutil.rmtree(args.save) os.makedirs(args.save) print('create folder: ' + Fore.GREEN + args.save + Fore.RESET) # copy code to save folder if args.save.find('debug') < 0: shutil.copytree( '.', os.path.join( args.save, 'src'), symlinks=True, ignore=shutil.ignore_patterns( '*.pyc', '__pycache__', '*.path.tar', '*.pth', '*.ipynb', '.*', 'data', 'save', 'save_backup')) # set up logging global log_print, f_log f_log = open(os.path.join(args.save, 'log.txt'), 'w') def log_print(*args): print(*args) print(*args, file=f_log) log_print('args:') log_print(args) print('model:', file=f_log) print(model, file=f_log, flush=True) # log_print('model:') # log_print(model) # log_print('optimizer:') # log_print(vars(optimizer)) log_print('# of params:', str(sum([p.numel() for p in model.parameters()]))) torch.save(args, os.path.join(args.save, 'args.pth')) scores = ['iter\tlr\ttrain_loss\tval_ap'] if args.tensorboard: configure(args.save, flush_secs=5) for i in range(args.start_iter, args.niters + 1, args.eval_freq): # print('iter {:3d} lr = {:.6e}'.format(i, lr)) # if args.tensorboard: # log_value('lr', lr, i) # train for args.eval_freq iterations train_loss = train(train_loader, model, optimizer, i, args.eval_freq) i += args.eval_freq - 1 # evaluate on validation set val_ap = validate(val_loader, model, i) # save scores to a tsv file, rewrite the whole file to prevent # accidental deletion scores.append(('{}\t{}' + '\t{:.4f}' * 2) .format(i, lr, train_loss, val_ap)) with open(os.path.join(args.save, 'scores.tsv'), 'w') as f: print('\n'.join(scores), file=f) # remember best err@1 and save checkpoint # TODO: change this is_best = val_ap > best_ap if is_best: best_ap = val_ap best_iter = i print(Fore.GREEN + 'Best var_err1 {}'.format(best_ap) + Fore.RESET) save_checkpoint({ 'args': args, 'iter': i, 'best_iter': best_iter, 'arch': args.arch, 'state_dict': model.state_dict(), 'best_ap': best_ap, }, is_best, args.save) if not is_best and i - best_iter >= args.patience > 0: break print('Best val_ap: {:.4f} at iter {}'.format(best_ap, best_iter))
def main(): global args, best_prec1 args = parser.parse_args() if args.tensorboard: configure("runs/%s" % (args.name)) # Data loading code normalize = transforms.Normalize( mean=[x / 255.0 for x in [125.3, 123.0, 113.9]], std=[x / 255.0 for x in [63.0, 62.1, 66.7]]) if args.augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize, ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), normalize, ]) transform_test = transforms.Compose([transforms.ToTensor(), normalize]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.dataset == "cifar10": train_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(datasets.CIFAR10( './data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) elif args.dataset == "cifar100": train_loader = torch.utils.data.DataLoader(datasets.CIFAR100( './data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(datasets.CIFAR100( './data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) elif args.dataset == "svhn": train_loader = torch.utils.data.DataLoader(datasets.SVHN( './data', split="train", download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader(datasets.SVHN( './data', split="test", transform=transform_test, download=True), batch_size=args.batch_size, shuffle=True, **kwargs) # create model model = dn.DenseNet3(args.layers, args.num_class, args.z_dim, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=True, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print('Best accuracy: ', best_prec1)
def main(): global args, optimizer_select # To set the model name automatically print args lr = args.lr args = get_model_name(args) print 'Model name: {}'.format(args.model_name) # To set the random seed random.seed(args.seed) torch.manual_seed(args.seed + 1) torch.cuda.manual_seed(args.seed + 2) print("Loading training set and testing set..."), train_set = visual_genome(args.dataset_option, 'train') test_set = visual_genome('small', 'test') print("Done.") train_loader = torch.utils.data.DataLoader(train_set, batch_size=1, shuffle=True, num_workers=8, pin_memory=True) test_loader = torch.utils.data.DataLoader(test_set, batch_size=1, shuffle=False, num_workers=8, pin_memory=True) # Model declaration net = Hierarchical_Descriptive_Model( nhidden=args.mps_feature_len, n_object_cats=train_set.num_object_classes, n_predicate_cats=train_set.num_predicate_classes, n_vocab=train_set.voc_size, voc_sign=train_set.voc_sign, max_word_length=train_set.max_size, MPS_iter=args.MPS_iter, use_language_loss=not args.disable_language_model, object_loss_weight=train_set.inverse_weight_object, predicate_loss_weight=train_set.inverse_weight_predicate, dropout=args.dropout, use_kmeans_anchors=not args.use_normal_anchors, gate_width=args.gate_width, nhidden_caption=args.nhidden_caption, nembedding=args.nembedding, rnn_type=args.rnn_type, rnn_droptout=args.caption_use_dropout, rnn_bias=args.caption_use_bias, use_region_reg=args.region_bbox_reg, use_kernel=args.use_kernel_function) params = list(net.parameters()) for param in params: print param.size() print net # To group up the features vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features( net) # Setting the state of the training model net.cuda() net.train() logger_path = "log/logger/{}".format(args.model_name) if os.path.exists(logger_path): shutil.rmtree(logger_path) configure(logger_path, flush_secs=5) # setting up the logger network.set_trainable(net, False) # network.weights_normal_init(net, dev=0.01) if args.finetune_language_model: print 'Only finetuning the language model from: {}'.format( args.resume_model) args.train_all = False if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) optimizer_select = 3 elif args.load_RPN: print 'Loading pretrained RPN: {}'.format(args.saved_model_path) args.train_all = False network.load_net(args.saved_model_path, net.rpn) net.reinitialize_fc_layers() optimizer_select = 1 elif args.resume_training: print 'Resume training from: {}'.format(args.resume_model) if len(args.resume_model) == 0: raise Exception('[resume_model] not specified') network.load_net(args.resume_model, net) args.train_all = True optimizer_select = 2 else: print 'Training from scratch.' net.rpn.initialize_parameters() net.reinitialize_fc_layers() optimizer_select = 0 args.train_all = True optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features) target_net = net if not os.path.exists(args.output_dir): os.mkdir(args.output_dir) top_Ns = [50, 100] best_recall = np.zeros(len(top_Ns)) if args.evaluate: recall = test(test_loader, net, top_Ns) print('======= Testing Result =======') for idx, top_N in enumerate(top_Ns): print( '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)) print('==============================') else: for epoch in range(0, args.max_epoch): # Training train(train_loader, target_net, optimizer, epoch) # snapshot the state save_name = os.path.join( args.output_dir, '{}_epoch_{}.h5'.format(args.model_name, epoch)) network.save_net(save_name, net) print('save model: {}'.format(save_name)) # Testing # network.set_trainable(net, False) # Without backward(), requires_grad takes no effect recall = test(test_loader, net, top_Ns) if np.all(recall > best_recall): best_recall = recall save_name = os.path.join(args.output_dir, '{}_best.h5'.format(args.model_name)) network.save_net(save_name, net) print('\nsave model: {}'.format(save_name)) print('Epoch[{epoch:d}]:'.format(epoch=epoch)), for idx, top_N in enumerate(top_Ns): print( '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)' .format(top_N=top_N, recall=recall[idx] * 100, best_recall=best_recall[idx] * 100)), # updating learning policy if epoch % args.step_size == 0 and epoch > 0: lr /= 10 args.lr = lr print '[learning rate: {}]'.format(lr) args.enable_clip_gradient = False if not args.finetune_language_model: args.train_all = True optimizer_select = 2 # update optimizer and correponding requires_grad state optimizer = network.get_optimizer(lr, optimizer_select, args, vgg_features_var, rpn_features, hdn_features, language_features)
def main(): n_epoch_pretrain = 2 use_tensorboard = True parser = argparse.ArgumentParser(description='SRGAN Train') parser.add_argument('--crop_size', default=96, type=int, help='training images crop size') parser.add_argument('--num_epochs', default=500, type=int, help='training epoch') parser.add_argument('--batch_size', default=32, type=int, help='training batch size') parser.add_argument('--train_set', default='data/train', type=str, help='train set path') parser.add_argument('--check_point', type=int, default=-1, help="continue with previous check_point") opt = parser.parse_args() input_size = opt.crop_size n_epoch = opt.num_epochs batch_size = opt.batch_size check_point = opt.check_point check_point_path = 'cp/' if not os.path.exists(check_point_path): os.makedirs(check_point_path) train_set = TrainDataset(opt.train_set, crop_size=input_size, upscale_factor=4) train_loader = DataLoader(dataset=train_set, num_workers=2, batch_size=batch_size, shuffle=True) dev_set = DevDataset('data/dev', upscale_factor=4) dev_loader = DataLoader(dataset=dev_set, num_workers=1, batch_size=1, shuffle=False) mse = nn.MSELoss() if not torch.cuda.is_available(): print ('!!!!!!!!!!!!!!USING CPU!!!!!!!!!!!!!') netG = Generator() print('# generator parameters:', sum(param.numel() for param in netG.parameters())) netD = Discriminator_WGAN() print('# discriminator parameters:', sum(param.numel() for param in netD.parameters())) if torch.cuda.is_available(): netG.cuda() netD.cuda() mse.cuda() if use_tensorboard: configure('log', flush_secs=5) # Pre-train generator using only MSE loss if check_point == -1: optimizerG = optim.Adam(netG.parameters()) for epoch in range(1, n_epoch_pretrain + 1): train_bar = tqdm(train_loader) netG.train() cache = {'g_loss': 0} for lowres, real_img_hr in train_bar: if torch.cuda.is_available(): real_img_hr = real_img_hr.cuda() if torch.cuda.is_available(): lowres = lowres.cuda() fake_img_hr = netG(lowres) # Train G netG.zero_grad() image_loss = mse(fake_img_hr, real_img_hr) cache['g_loss'] += image_loss image_loss.backward() optimizerG.step() # Print information by tqdm train_bar.set_description(desc='[%d/%d] Loss_G: %.4f' % (epoch, n_epoch_pretrain, image_loss)) optimizerG = optim.Adam(netG.parameters(), lr=1e-4) optimizerD = optim.Adam(netD.parameters(), lr=1e-4) if check_point != -1: if torch.cuda.is_available(): netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_gpu.pth')) netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_gpu.pth')) optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_gpu.pth')) optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_gpu.pth')) else : netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_cpu.pth')) netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_cpu.pth')) optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_cpu.pth')) optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_cpu.pth')) for epoch in range(1 + max(check_point, 0), n_epoch + 1 + max(check_point, 0)): train_bar = tqdm(train_loader) netG.train() netD.train() cache = {'mse_loss': 0, 'adv_loss': 0, 'g_loss': 0, 'd_loss': 0, 'ssim': 0, 'psnr': 0, 'd_top_grad' : 0, 'd_bot_grad' : 0, 'g_top_grad' : 0, 'g_bot_grad' : 0} for lowres, real_img_hr in train_bar: #print ('lr size : ' + str(data.size())) #print ('hr size : ' + str(target.size())) if torch.cuda.is_available(): real_img_hr = real_img_hr.cuda() lowres = lowres.cuda() fake_img_hr = netG(lowres) # Train D netD.zero_grad() logits_real = netD(real_img_hr).mean() logits_fake = netD(fake_img_hr).mean() gradient_penalty = compute_gradient_penalty(netD, real_img_hr, fake_img_hr) d_loss = logits_fake - logits_real + 10*gradient_penalty cache['d_loss'] += d_loss.item() d_loss.backward(retain_graph=True) optimizerD.step() dtg, dbg = get_grads_D_WAN(netD) cache['d_top_grad'] += dtg cache['d_bot_grad'] += dbg # Train G netG.zero_grad() image_loss = mse(fake_img_hr, real_img_hr) adversarial_loss = -1*netD(fake_img_hr).mean() g_loss = image_loss + 1e-3*adversarial_loss cache['mse_loss'] += image_loss.item() cache['adv_loss'] += adversarial_loss.item() cache['g_loss'] += g_loss.item() g_loss.backward() optimizerG.step() gtg, gbg = get_grads_G(netG) cache['g_top_grad'] += gtg cache['g_bot_grad'] += gbg # Print information by tqdm train_bar.set_description(desc='[%d/%d] D grads:(%f, %f) G grads:(%f, %f) Loss_D: %.4f Loss_G: %.4f = %.4f + %.4f' % (epoch, n_epoch, dtg, dbg, gtg, gbg, d_loss, g_loss, image_loss, adversarial_loss)) if use_tensorboard: log_value('d_loss', cache['d_loss']/len(train_loader), epoch) log_value('mse_loss', cache['mse_loss']/len(train_loader), epoch) log_value('adv_loss', cache['adv_loss']/len(train_loader), epoch) log_value('g_loss', cache['g_loss']/len(train_loader), epoch) log_value('D top layer gradient', cache['d_top_grad']/len(train_loader), epoch) log_value('D bot layer gradient', cache['d_bot_grad']/len(train_loader), epoch) log_value('G top layer gradient', cache['g_top_grad']/len(train_loader), epoch) log_value('G bot layer gradient', cache['g_bot_grad']/len(train_loader), epoch) # Save model parameters if torch.cuda.is_available(): torch.save(netG.state_dict(), 'cp/netG_epoch_%d_gpu.pth' % (epoch)) if epoch%5 == 0: torch.save(netD.state_dict(), 'cp/netD_epoch_%d_gpu.pth' % (epoch)) torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_gpu.pth' % (epoch)) torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_gpu.pth' % (epoch)) else: torch.save(netG.state_dict(), 'cp/netG_epoch_%d_cpu.pth' % (epoch)) if epoch%5 == 0: torch.save(netD.state_dict(), 'cp/netD_epoch_%d_cpu.pth' % (epoch)) torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_cpu.pth' % (epoch)) torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_cpu.pth' % (epoch)) # Visualize results with torch.no_grad(): netG.eval() out_path = 'vis/' if not os.path.exists(out_path): os.makedirs(out_path) dev_bar = tqdm(dev_loader) valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0} dev_images = [] for val_lr, val_hr_restore, val_hr in dev_bar: batch_size = val_lr.size(0) lr = val_lr hr = val_hr if torch.cuda.is_available(): lr = lr.cuda() hr = hr.cuda() sr = netG(lr) psnr = 10 * log10(1 / ((sr - hr) ** 2).mean().item()) ssim = pytorch_ssim.ssim(sr, hr).item() dev_bar.set_description(desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (psnr, ssim)) cache['ssim'] += ssim cache['psnr'] += psnr # Avoid out of memory crash on 8G GPU if len(dev_images) < 60 : dev_images.extend([to_image()(val_hr_restore.squeeze(0)), to_image()(hr.data.cpu().squeeze(0)), to_image()(sr.data.cpu().squeeze(0))]) dev_images = torch.stack(dev_images) dev_images = torch.chunk(dev_images, dev_images.size(0) // 3) dev_save_bar = tqdm(dev_images, desc='[saving training results]') index = 1 for image in dev_save_bar: image = utils.make_grid(image, nrow=3, padding=5) utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5) index += 1 if use_tensorboard: log_value('ssim', cache['ssim']/len(dev_loader), epoch) log_value('psnr', cache['psnr']/len(dev_loader), epoch)
import torch from torch import nn, optim from tensorboard_logger import configure, log_value from ntm import NTM from ntm.datasets import CopyDataset, RepeatCopyDataset, AssociativeDataset, NGram, PrioritySort from args import get_parser from marnn import * from dnc import DNC from dnc.sam import SAM args = get_parser().parse_args() print("args:\n",args) configure("runs/") print('name:',args.name) # ---------------------------------------------------------------------------- # -- initialize datasets, model, criterion and optimizer # ---------------------------------------------------------------------------- ''' ''' if args.task=='copy': args.task_json = 'ntm/tasks/copy.json' task_params = json.load(open(args.task_json)) task_params['max_seq_len']=args.max_seq_len dataset = CopyDataset(task_params)
torch.save(state, args.cv_dir+'/ckpt_E_%d_R_%.2E'%(epoch, reward)) #--------------------------------------------------------------------------------------------------------# trainset, testset = utils.get_dataset(args.img_size, args.data_dir) trainloader = torchdata.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers) testloader = torchdata.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers) agent = utils.get_model(num_actions) # ---- Load the pre-trained model ---------------------- start_epoch = 0 if args.load is not None: checkpoint = torch.load(args.load) agent.load_state_dict(checkpoint['agent']) start_epoch = checkpoint['epoch'] + 1 print 'loaded agent from', args.load # Parallelize the models if multiple GPUs available - Important for Large Batch Size if args.parallel: agent = nn.DataParallel(agent) agent.cuda() # Update the parameters of the policy network optimizer = optim.Adam(agent.parameters(), lr=args.lr) # Save the args to the checkpoint directory configure(args.cv_dir+'/log', flush_secs=5) for epoch in range(start_epoch, start_epocH+args.max_epochs+1): train(epoch) if epoch % 10 == 0: test(epoch)
def main(): opt = parse_args() print(json.dumps(vars(opt), indent=2)) rootpath = opt.rootpath trainCollection = opt.trainCollection valCollection = opt.valCollection if opt.loss_fun == "mrl" and opt.measure == "cosine": assert opt.text_norm is True assert opt.visual_norm is True # checkpoint path model_info = '%s_concate_%s_dp_%.1f_measure_%s' % (opt.model, opt.concate, opt.dropout, opt.measure) # text-side multi-level encoding info text_encode_info = 'vocab_%s_word_dim_%s_text_rnn_size_%s_text_norm_%s' % \ (opt.vocab, opt.word_dim, opt.text_rnn_size, opt.text_norm) text_encode_info += "_kernel_sizes_%s_num_%s" % (opt.text_kernel_sizes, opt.text_kernel_num) # video-side multi-level encoding info visual_encode_info = 'visual_feature_%s_visual_rnn_size_%d_l_2_norm_%d_visual_norm_%s' % \ (opt.visual_feature, opt.visual_rnn_size, opt.do_visual_feas_norm, opt.visual_norm) visual_encode_info += "_kernel_sizes_%s_num_%s" % (opt.visual_kernel_sizes, opt.visual_kernel_num) # common space learning info mapping_info = "mapping_text_%s_img_%s" % (opt.text_mapping_layers, opt.visual_mapping_layers) loss_info = 'loss_func_%s_margin_%s_direction_%s_max_violation_%s_cost_style_%s' % \ (opt.loss_fun, opt.margin, opt.direction, opt.max_violation, opt.cost_style) optimizer_info = 'optimizer_%s_lr_%s_decay_%.2f_grad_clip_%.1f_val_metric_%s' % \ (opt.optimizer, opt.learning_rate, opt.lr_decay_rate, opt.grad_clip, opt.val_metric) opt.logger_name = os.path.join(rootpath, trainCollection, opt.cv_name, valCollection, model_info, text_encode_info, visual_encode_info, mapping_info, loss_info, optimizer_info, opt.postfix) print(opt.logger_name) if checkToSkip(os.path.join(opt.logger_name, 'model_best.pth.tar'), opt.overwrite): sys.exit(0) if checkToSkip(os.path.join(opt.logger_name, 'val_metric.txt'), opt.overwrite): sys.exit(0) makedirsforfile(os.path.join(opt.logger_name, 'val_metric.txt')) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) opt.text_kernel_sizes = map(int, opt.text_kernel_sizes.split('-')) opt.visual_kernel_sizes = map(int, opt.visual_kernel_sizes.split('-')) # collections: trian, val collections = {'train': trainCollection, 'val': valCollection} cap_file = {'train': '%s.caption.txt' % trainCollection, 'val': '%s.caption.txt' % valCollection} # caption caption_files = {x: os.path.join(rootpath, collections[x], 'TextData', cap_file[x]) for x in collections} # Load visual features visual_feat_path = {x: os.path.join(rootpath, collections[x], 'FeatureData', opt.visual_feature) for x in collections} visual_feats = {x: BigFile(visual_feat_path[x]) for x in visual_feat_path} opt.visual_feat_dim = visual_feats['train'].ndims # set bow vocabulary and encoding bow_vocab_file = os.path.join(rootpath, opt.trainCollection, 'TextData', 'vocabulary', 'bow', opt.vocab + '.pkl') bow_vocab = pickle.load(open(bow_vocab_file, 'rb')) bow2vec = get_text_encoder('bow')(bow_vocab) opt.bow_vocab_size = len(bow_vocab) # set rnn vocabulary rnn_vocab_file = os.path.join(rootpath, opt.trainCollection, 'TextData', 'vocabulary', 'rnn', opt.vocab + '.pkl') rnn_vocab = pickle.load(open(rnn_vocab_file, 'rb')) opt.vocab_size = len(rnn_vocab) # initialize word embedding opt.we_parameter = None if opt.word_dim == 500: w2v_data_path = os.path.join(rootpath, "word2vec", 'flickr', 'vec500flickr30m') opt.we_parameter = get_we_parameter(rnn_vocab, w2v_data_path) # mapping layer structure opt.text_mapping_layers = map(int, opt.text_mapping_layers.split('-')) opt.visual_mapping_layers = map(int, opt.visual_mapping_layers.split('-')) if opt.concate == 'full': opt.text_mapping_layers[0] = opt.bow_vocab_size + opt.text_rnn_size * 2 + opt.text_kernel_num * len( opt.text_kernel_sizes) opt.visual_mapping_layers[0] = opt.visual_feat_dim + opt.visual_rnn_size * 2 + opt.visual_kernel_num * len( opt.visual_kernel_sizes) elif opt.concate == 'reduced': opt.text_mapping_layers[0] = opt.text_rnn_size * 2 + opt.text_kernel_num * len(opt.text_kernel_sizes) opt.visual_mapping_layers[0] = opt.visual_rnn_size * 2 + opt.visual_kernel_num * len(opt.visual_kernel_sizes) else: raise NotImplementedError('Model %s not implemented' % opt.model) # set data loader video2frames = { x: read_dict(os.path.join(rootpath, collections[x], 'FeatureData', opt.visual_feature, 'video2frames.txt')) for x in collections} data_loaders = data.get_data_loaders( caption_files, visual_feats, rnn_vocab, bow2vec, opt.batch_size, opt.workers, opt.n_caption, opt.do_visual_feas_norm, video2frames=video2frames) # Construct the model model = get_model(opt.model)(opt) opt.we_parameter = None # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another # training model.Eiters = checkpoint['Eiters'] print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})" .format(opt.resume, start_epoch, best_rsum)) validate(opt, data_loaders['val'], model, measure=opt.measure) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # Train the Model best_rsum = 0 no_impr_counter = 0 lr_counter = 0 best_epoch = None fout_val_metric_hist = open(os.path.join(opt.logger_name, 'val_metric_hist.txt'), 'w') for epoch in range(opt.num_epochs): print('Epoch[{0} / {1}] LR: {2}'.format(epoch, opt.num_epochs, get_learning_rate(model.optimizer)[0])) print('-' * 10) # train for one epoch train(opt, data_loaders['train'], model, epoch) # evaluate on validation set rsum = validate(opt, data_loaders['val'], model, measure=opt.measure) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) print(' * Current perf: {}'.format(rsum)) print(' * Best perf: {}'.format(best_rsum)) print('') fout_val_metric_hist.write('epoch_%d: %f\n' % (epoch, rsum)) fout_val_metric_hist.flush() if is_best: save_checkpoint({ 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/', best_epoch=best_epoch) best_epoch = epoch lr_counter += 1 decay_learning_rate(opt, model.optimizer, opt.lr_decay_rate) if not is_best: # Early stop occurs if the validation performance does not improve in ten consecutive epochs no_impr_counter += 1 if no_impr_counter > 10: save_checkpoint({ 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, 0, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/') print('Early stopping happended.\n') break # When the validation performance decreased after an epoch, # we divide the learning rate by 2 and continue training; # but we use each learning rate for at least 3 epochs. if lr_counter > 2: decay_learning_rate(opt, model.optimizer, 0.5) lr_counter = 0 else: no_impr_counter = 0 save_checkpoint({ 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, 0, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/') fout_val_metric_hist.close() print('best performance on validation: {}\n'.format(best_rsum)) with open(os.path.join(opt.logger_name, 'val_metric.txt'), 'w') as fout: fout.write('best performance on validation: ' + str(best_rsum))
def fit(self): config = self.config configure("{}".format(config.log_dir), flush_secs=5) num_steps_per_epoch = len(self.data_loader) cc = 0 config.perceptual = False for epoch in range(self.start_epoch, config.max_epochs): for step, (example_image, example_lms, right_imgs, right_lmss, wrong_imgs, wrong_lmss) in enumerate(self.data_loader): t1 = time.time() if config.cuda: example_image = Variable(example_image).cuda() example_lms = Variable(example_lms).cuda() right_lmss = Variable(right_lmss).cuda() right_imgs = Variable(right_imgs).cuda() wrong_imgs = Variable(wrong_imgs).cuda() wrong_lmss = Variable(wrong_lmss).cuda() else: example_image = Variable(example_image) example_lms = Variable(example_lms) right_lmss = Variable(right_lmss) right_imgs = Variable(right_imgs) wrong_imgs = Variable(wrong_imgs) wrong_lmss = Variable(wrong_lmss) fake_im = self.generator(example_image, right_lmss) real_im = right_imgs # train the discriminator D_real = self.discriminator(example_image, real_im, right_lmss) D_wrong = self.discriminator(example_image, real_im, wrong_lmss) D_fake = self.discriminator(example_image, fake_im.detach(), right_lmss) loss_real = self.bce_loss_fn(D_real, self.ones) loss_wrong = self.bce_loss_fn(D_wrong, self.zeros) loss_fake = self.bce_loss_fn(D_fake, self.zeros) loss_disc = loss_real + 0.5 * (loss_wrong + loss_fake) loss_disc.backward() self.opt_d.step() self._reset_gradients() # train the generator fake_im = self.generator(example_image, right_lmss) D_fake = self.discriminator(example_image, fake_im, right_lmss) loss_gen = self.bce_loss_fn(D_fake, self.ones) loss = loss_gen loss.backward() self.opt_g.step() self._reset_gradients() t2 = time.time() if (step + 1) % 10 == 0 or (step + 1) == num_steps_per_epoch: steps_remain = num_steps_per_epoch - step + 1 + \ (config.max_epochs - epoch + 1) * num_steps_per_epoch eta = int((t2 - t1) * steps_remain) # if config.perceptual: # print("[{}/{}][{}/{}] Loss_G: {:.4f}, loss_perceptual: {:.4f} ETA: {} second" # .format(epoch+1, config.max_epochs, # step+1, num_steps_per_epoch, loss_gen.data[0], loss_perc.data[0], eta)) # log_value('generator_loss',loss_gen.data[0] , step + num_steps_per_epoch * epoch) # else: print( "[{}/{}][{}/{}] Loss_G: {:.4f}, Loss_D: {:.4f}, ETA: {} second" .format(epoch + 1, config.max_epochs, step + 1, num_steps_per_epoch, loss_gen.data[0], loss_disc.data[0], eta)) if (step) % (num_steps_per_epoch / 50) == 0: fake_store = fake_im.data.permute( 0, 2, 1, 3, 4).contiguous().view(config.batch_size * 16, 3, 64, 64) torchvision.utils.save_image(fake_store, "{}fake_{}.png".format( config.sample_dir, cc), nrow=16, normalize=True) real_store = right_imgs.data.permute( 0, 2, 1, 3, 4).contiguous().view(config.batch_size * 16, 3, 64, 64) torchvision.utils.save_image(real_store, "{}real_{}.png".format( config.sample_dir, cc), nrow=16, normalize=True) cc += 1 torch.save( self.generator.state_dict(), "{}/generator_{}.pth".format(config.model_dir, cc)) torch.save( self.discriminator.state_dict(), "{}/discriminator_{}.pth".format(config.model_dir, cc))
import numpy as np from evaluate import evaluate from args import vocab_pkl_path, train_caption_pkl_path, feature_h5_path from args import num_epochs, batch_size, learning_rate, ss_factor from args import projected_size, hidden_size, mid_size from args import feature_size, max_frames, max_words from args import use_cuda, use_checkpoint from args import banet_pth_path, optimizer_pth_path from args import best_banet_pth_path, best_optimizer_pth_path from args import test_range, test_prediction_txt_path, test_reference_txt_path from args import log_environment from tensorboard_logger import configure, log_value sys.path.append('./coco-caption/') from pycocotools.coco import COCO configure(log_environment, flush_secs=10) # 加载词典 with open(vocab_pkl_path, 'rb') as f: vocab = pickle.load(f) vocab_size = len(vocab) # 构建模型 banet = BANet(feature_size, projected_size, mid_size, hidden_size, max_frames, max_words, vocab) if os.path.exists(banet_pth_path) and use_checkpoint: banet.load_state_dict(torch.load(banet_pth_path)) if use_cuda:
import torch import torch.nn as nn from sklearn.utils import shuffle from datasets import arxiv2 from transformers import GPT2Tokenizer, GPT2Model, GPT2LMHeadModel from opt import OpenAIAdam from text_utils import TextEncoder from utils import (encode_dataset2, iter_data, ResultLogger, make_path) from loss import SummarizationLossCompute2 import pickle from tensorboard_logger import configure, log_value configure("./gpt2_analysis", flush_secs=5) def transform_arxiv(X1,X2): n_batch = len(X1) delimiter = [encoder['<|TL;DR|>']] end_token = [encoder['<|endoftext|>']] xmb = np.zeros((n_batch, n_ctx), dtype=np.int32) mmb = np.zeros((n_batch, n_ctx), dtype=np.float32) for i, (x1,x2), in enumerate(zip(X1,X2)): new_x1 = x1[:800] new_x2 = x2[:200] x12 = new_x1 + delimiter x13 = new_x2 + end_token xmb[i,:len(x12)] = x12 xmb[i,len(x12):len(x12)+len(x13)] = x13 mmb[i,:len(x12)] = 1
print("Load checkpoint from: {}".format(checkpoint_path)) if use_cuda: checkpoint = torch.load(checkpoint_path, map_location=torch.device('cuda')) else: checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu')) model.load_state_dict(checkpoint["state_dict"]) # optimizer.load_state_dict(checkpoint["optimizer"]) try: global_step = checkpoint["global_step"] global_epoch = checkpoint["global_epoch"] except: # TODO pass # Setup tensorboard logger tensorboard_logger.configure("log/run-test") print(hparams_debug_string()) # Train! try: train(model, data_loader, optimizer, init_lr=hparams.initial_learning_rate, checkpoint_dir=checkpoint_dir, checkpoint_interval=hparams.checkpoint_interval, nepochs=hparams.nepochs, clip_thresh=hparams.clip_thresh) except: save_checkpoint( model, optimizer, global_step, checkpoint_dir, global_epoch) traceback.print_exc()
import torch import numpy as np from train_parameters import * import torch.nn as nn from torch import optim import torch.nn.functional as F from tqdm import tqdm import os from glob import glob import random import math from tensorboard_logger import configure, log_value configure("../../dataset/MSVD/tensorboard/run-1") device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu") # GET VIDEO ID'S video_ids_tr = os.listdir(caption_tr_path) video_ids_tr = [item[:-3] for item in video_ids_tr] video_ids_vl = os.listdir(caption_vl_path) video_ids_vl = [item[:-3] for item in video_ids_vl] all_video_ids = video_ids_tr + video_ids_vl # Dictionary of word:vector word2vec = torch.load(word2vec_path) word2vec['SOS'] = np.zeros((word_dim)) word2vec['EOS'] = np.ones((word_dim))
args = parser.parse_args() args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S")) if args.debug: args.save += "_debug" utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py')) log_format = '%(asctime)s %(message)s' logging.basicConfig(stream=sys.stdout, level=logging.INFO, format=log_format, datefmt='%m/%d %I:%M:%S %p') fh = logging.FileHandler(os.path.join(args.save, 'log.txt')) fh.setFormatter(logging.Formatter(log_format)) logging.getLogger().addHandler(fh) configure(args.save + "/%s" % (args.name)) CIFAR_CLASSES = 10 def main(): if not torch.cuda.is_available(): logging.info('no gpu device available') sys.exit(1) np.random.seed(args.seed) torch.cuda.set_device(args.gpu) cudnn.benchmark = True torch.manual_seed(args.seed) cudnn.enabled = True torch.cuda.manual_seed(args.seed)
def main(): global best_prec1 if args.tensorboard: configure('log/'+args.arch.lower() + '_bs' + str(args.batch_size) + '_ep' + str(args.epochs) + '_loglr' + str(args.lr) + '_size' + str(args.img_size)+ '_wd' + str(args.weight_decay)) print(args) # create model print("=> creating model '{}'".format(args.arch)) #if args.arch.lower().startswith('resnet'): # model.avgpool = nn.AvgPool2d(args.img_size // 32, 1) #model.fc = nn.Linear(model.fc.in_features, args.num_classes) # default parameter n_class=1000, input_size=224, width_mult=1. model = Ensemble() if not args.resume: model.MobileNetV2.load_state_dict(torch.load('mobilenet_pretrained.pth')) model.NASNetAMobile.load_state_dict(torch.load('nasnet_pretrained.pth')) model = torch.nn.DataParallel(model).cuda() print(model) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # Data loading code import datautil ''' norm_dict = { #320:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[ 1., 1., 1.]), 320:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[0.2295, 0.2385, 0.2479]), 0:transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]), 256:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[ 0.2295, 0.2385, 0.2479]), 224:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[ 0.2295, 0.2385, 0.2479]), } norm_default = norm_dict[0] normalize = norm_dict[args.img_size] currrent tensor([[ 0.4828, 0.4693, 0.4602]], device='cuda:0') tensor([[ 45.3332, 41.1241, 45.7719]], device='cuda:0') ''' #normalize = transforms.Normalize(mean=[0.48280172,0.46929353,0.46019437],std=[0.25859008,0.28414325,0.288328]) normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) train_data = datautil.SceneDataset(args.data,img_transform= transforms.Compose([ transforms.RandomResizedCrop(args.img_size), transforms.RandomHorizontalFlip(), transforms.ToTensor(), normalize])) train_loader = torch.utils.data.DataLoader(train_data,batch_size=args.batch_size,shuffle=True,num_workers=args.workers,pin_memory=True) if args.val: val_data = datautil.SceneDataset(args.val, img_transform= transforms.Compose([ #transforms.Scale(256), transforms.Resize((args.img_size,args.img_size)), transforms.ToTensor(), normalize])) val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size//2, shuffle=False, num_workers=args.workers, pin_memory=True) # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() # optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay) optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, weight_decay=args.weight_decay) #optimizer = torch.optim.RMSprop(model.parameters(), args.lr, # momentum=args.momentum, # weight_decay=args.weight_decay,eps=1) if args.evaluate: validate(val_loader, model, criterion,0) return for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set if args.val: prec1 = validate(val_loader, model, criterion,epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) if epoch % args.interval == 0: save_checkpoint({ 'epoch': epoch + 1, 'arch': args.arch, 'state_dict': model.state_dict(), })
args.src_dataset, args.src_split, args.tgt_dataset, args.tgt_split, args.input_ch) if args.net in ["fcn", "psp"]: model_name = "%s-%s-%s-res%s" % (args.method, args.savename, args.net, args.res) else: model_name = "%s-%s-%s" % (args.method, args.savename, args.net) outdir = os.path.join(args.base_outdir, mode) # Create Model Dir pth_dir = os.path.join(outdir, "pth") mkdir_if_not_exist(pth_dir) # Create Model Dir and Set TF-Logger tflog_dir = os.path.join(outdir, "tflog", model_name) mkdir_if_not_exist(tflog_dir) configure(tflog_dir, flush_secs=5) # Save param dic if resume_flg: json_fn = os.path.join(outdir, "param-%s_resume.json" % model_name) else: json_fn = os.path.join(outdir, "param-%s.json" % model_name) check_if_done(json_fn) save_dic_to_json(args.__dict__, json_fn) train_img_shape = tuple([int(x) for x in args.train_img_shape]) use_crop = True if args.crop_size > 0 else False joint_transform = get_joint_transform(crop_size=args.crop_size, rotate_angle=args.rotate_angle) if use_crop else None
def main(): global args, best_prec1 args = parser.parse_args() # torch.cuda.set_device(args.gpu) if args.tensorboard: print("Using TensorBoard") configure("exp/%s" % (args.name)) # Data loading code if args.augment: transform_train = transforms.Compose([ transforms.ToTensor(), transforms.Lambda(lambda x: F.pad( Variable(x.unsqueeze(0), requires_grad=False, volatile=True), (4, 4, 4, 4), mode='replicate').data.squeeze()), transforms.ToPILImage(), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) kwargs = {'num_workers': 1, 'pin_memory': True} assert (args.dataset == 'cifar10' or args.dataset == 'cifar100') train_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) # create model model = WideResNetMulti(args.layers, args.dataset == 'cifar10' and 10 or 100, args.num_rotate_classes, args.widen_factor, dropRate=args.droprate) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # for training on multiple GPUs. # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use # model = torch.nn.DataParallel(model).cuda() model = model.cuda() # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define loss function (criterion) and optimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch + 1) # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, }, is_best) print 'Best accuracy: ', best_prec1
def main(): # Hyper Parameters parser = arguments.get_argument_parser() opt = parser.parse_args() if not os.path.exists(opt.model_name): os.makedirs(opt.model_name) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) logger = logging.getLogger(__name__) logger.info(opt) # Load Tokenizer and Vocabulary tokenizer = BertTokenizer.from_pretrained('bert-base-uncased') vocab = tokenizer.vocab opt.vocab_size = len(vocab) train_loader, val_loader = image_caption.get_loaders( opt.data_path, opt.data_name, tokenizer, opt.batch_size, opt.workers, opt) model = VSEModel(opt) lr_schedules = [ opt.lr_update, ] # optionally resume from a checkpoint start_epoch = 0 if opt.resume: if os.path.isfile(opt.resume): logger.info("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] if not model.is_data_parallel: model.make_data_parallel() model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another training model.Eiters = checkpoint['Eiters'] logger.info( "=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format( opt.resume, start_epoch, best_rsum)) # validate(opt, val_loader, model) if opt.reset_start_epoch: start_epoch = 0 else: logger.info("=> no checkpoint found at '{}'".format(opt.resume)) if not model.is_data_parallel: model.make_data_parallel() # Train the Model best_rsum = 0 for epoch in range(start_epoch, opt.num_epochs): logger.info(opt.logger_name) logger.info(opt.model_name) adjust_learning_rate(opt, model.optimizer, epoch, lr_schedules) if epoch >= opt.vse_mean_warmup_epochs: opt.max_violation = True model.set_max_violation(opt.max_violation) # Set up the all warm-up options if opt.precomp_enc_type == 'backbone': if epoch < opt.embedding_warmup_epochs: model.freeze_backbone() logger.info( 'All backbone weights are frozen, only train the embedding layers' ) else: model.unfreeze_backbone(3) if epoch < opt.embedding_warmup_epochs: logger.info('Warm up the embedding layers') elif epoch < opt.embedding_warmup_epochs + opt.backbone_warmup_epochs: model.unfreeze_backbone( 3) # only train the last block of resnet backbone elif epoch < opt.embedding_warmup_epochs + opt.backbone_warmup_epochs * 2: model.unfreeze_backbone(2) elif epoch < opt.embedding_warmup_epochs + opt.backbone_warmup_epochs * 3: model.unfreeze_backbone(1) else: model.unfreeze_backbone(0) # train for one epoch train(opt, train_loader, model, epoch, val_loader) # evaluate on validation set rsum = validate(opt, val_loader, model) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) if not os.path.exists(opt.model_name): os.mkdir(opt.model_name) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, filename='checkpoint.pth'.format(epoch), prefix=opt.model_name + '/')
opt = args() opt.data_r = opt.dataset if opt.data_r == 'MNIST': ## tsfm = transforms.Compose([ transforms.Resize(opt.image_size), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) else: ## tsfm = transforms.Compose([ transforms.Resize(opt.image_size), transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)), ]) torch.cuda.manual_seed(opt.manual_seed) opt.dataset = os.path.join(opt.dataset, opt.dataset + '_' + str(opt.p2)) #opt.datasetNIST/MNIST_1.0 configure(os.path.join(opt.savingroot, opt.dataset, str(opt.p1 * 100) + '%complementary/' + '/logs'), flush_secs=5) ##tensorboard_logger.configure(logdir, flush_secs=2) ##Configure logging: a file will be written to logdir, and flushed every flush_secs train_gan(opt)
from torch.autograd import Variable import torch.nn as nn import torch.utils.data as dd import torch # Setup arguments and constants =============================================== # %% params = parser.parse_args() try: os.mkdir(params.checkpoint_dir) os.mkdir(params.log_dir) except FileExistsError: pass configure(params.log_dir) input_dim = 300 output_dim = 512 print('Starting with params:', params) # Pre-process data and create dataloaders ===================================== # %% print('Pre-processing data') (train, valid, test) = data.make_dataset(params) train_loader = dd.DataLoader(dataset=train, batch_size=params.batch_size, shuffle=True) valid_loader = dd.DataLoader(dataset=valid,
else: config = read_config.Config("config.yml") model_name = config.model_path.format(config.proportion, config.top_k, config.hidden_size, config.batch_size, config.optim, config.lr, config.weight_decay, config.dropout, "mix", config.mode) print(config.config) config.write_config("log/configs/{}_config.json".format(model_name)) configure("log/tensorboard/{}".format(model_name), flush_secs=5) callback = Callbacks(config.batch_size, "log/db/{}".format(model_name)) callback.add_element(["train_loss", "test_loss", "train_mse", "test_mse"]) data_labels_paths = {3: "data/one_op/expressions.txt", 5: "data/two_ops/expressions.txt", 7: "data/three_ops/expressions.txt"} proportion = config.proportion # proportion is in percentage. vary from [1, 100]. # First is training size and second is validation size per program length dataset_sizes = { 3: [proportion * 250, proportion * 50], 5: [proportion * 1000, proportion * 100],
# set output path ========================================================== path_out = '../trained_models/batch12_/' + args.path_out if not os.path.exists(path_out): # create output path os.makedirs(path_out) # create output for models path_models = os.path.join(path_out, 'models') if not os.path.exists(path_models): os.makedirs(path_models) # tensorboard configure("{}".format(path_out), flush_secs=5) # data ===================================================================== batch_size = args.batch_size n_epochs = args.n_epochs lr = args.lr DEPTH = args.depth AUGMENT = args.augment COORD = args.coord FLOW = args.flow # Datasets for DHF1K ds_train = DHF1K(mode=TRAIN, transformation=True, depth=DEPTH, d_augm=AUGMENT, coord=COORD) ds_validate = DHF1K(mode=VAL, transformation=True, depth=DEPTH, d_augm=AUGMENT, coord=COORD) # Dataloaders dataloader = {
from __future__ import print_function import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim import torchvision from torchvision import datasets, transforms from torch.autograd import Variable import numpy as np import models.joint_resnet3 from tensorboard_logger import configure, log_value import config use_cuda = torch.cuda.is_available() OUTPATH = './checkpoint/checkpoint_joint3' configure("runs/run-joint3", flush_secs=5) EPOCH = config.EPOCH BATCH = config.BATCH # Training dataset train_loader = torch.utils.data.DataLoader(datasets.CIFAR100( root='.', train=True, download=True, transform=transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), transforms.Normalize((0.5071, 0.4867, 0.4408), (0.2675, 0.2565, 0.2761)), ])),
def main(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument( '--data_path', default='/data3/zhangyf/cross_modal_retrieval/SCAN/data', help='path to datasets') parser.add_argument('--data_name', default='f30k_precomp', help='{coco,f30k}_precomp') parser.add_argument( '--vocab_path', default='/data3/zhangyf/cross_modal_retrieval/SCAN/vocab/', help='Path to saved vocabulary json files.') parser.add_argument('--margin', default=0.2, type=float, help='Rank loss margin.') parser.add_argument('--num_epochs', default=20, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.') parser.add_argument('--word_dim', default=300, type=int, help='Dimensionality of the word embedding.') parser.add_argument('--decoder_dim', default=512, type=int, help='Dimensionality of the word embedding.') parser.add_argument('--embed_size', default=1024, type=int, help='Dimensionality of the joint embedding.') parser.add_argument('--grad_clip', default=2., type=float, help='Gradient clipping threshold.') parser.add_argument('--num_layers', default=1, type=int, help='Number of GRU layers.') parser.add_argument('--learning_rate', default=.0002, type=float, help='Initial learning rate.') parser.add_argument('--lr_update', default=10, type=int, help='Number of epochs to update the learning rate.') parser.add_argument('--workers', default=4, type=int, help='Number of data loader workers.') parser.add_argument('--log_step', default=30, type=int, help='Number of steps to print and record the log.') parser.add_argument('--val_step', default=500, type=int, help='Number of steps to run validation.') parser.add_argument('--logger_name', default='./runs/runX/log', help='Path to save Tensorboard log.') parser.add_argument('--model_name', default='./runs/runX/checkpoint', help='Path to save the model.') parser.add_argument( '--resume', default= '/data3/zhangyf/cross_modal_retrieval/vsepp_next_train_12_31_f30k/run/coco_vse++_ft_128_f30k_next/model_best.pth.tar', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--max_violation', action='store_true', help='Use max instead of sum in the rank loss.') parser.add_argument('--img_dim', default=2048, type=int, help='Dimensionality of the image embedding.') parser.add_argument('--no_imgnorm', action='store_true', help='Do not normalize the image embeddings.') parser.add_argument('--no_txtnorm', action='store_true', help='Do not normalize the text embeddings.') parser.add_argument('--precomp_enc_type', default="basic", help='basic|weight_norm') parser.add_argument('--reset_train', action='store_true', help='Ensure the training is always done in ' 'train mode (Not recommended).') parser.add_argument('--finetune', action='store_true', help='Fine-tune the image encoder.') parser.add_argument('--cnn_type', default='resnet152', help="""The CNN used for image encoder (e.g. vgg19, resnet152)""") parser.add_argument('--crop_size', default=224, type=int, help='Size of an image crop as the CNN input.') opt = parser.parse_args() print(opt) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) # Load Vocabulary Wrapper vocab = pickle.load( open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name), 'rb')) opt.vocab_size = len(vocab) # Load data loaders train_loader, val_loader = data.get_loaders(opt.data_name, vocab, opt.batch_size, opt.workers, opt) # Construct the model model = SCAN(opt) # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another # training model.Eiters = checkpoint['Eiters'] print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format( opt.resume, start_epoch, best_rsum)) validate(opt, val_loader, model) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # Train the Model best_rsum = 0 for epoch in range(opt.num_epochs): print(opt.logger_name) print(opt.model_name) adjust_learning_rate(opt, model.optimizer, epoch) # train for one epoch bset_rsum = train(opt, train_loader, model, epoch, val_loader, best_rsum) # evaluate on validation set rsum = validate(opt, val_loader, model) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) if not os.path.exists(opt.model_name): os.mkdir(opt.model_name) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, filename='checkpoint_{}.pth.tar'.format(epoch), prefix=opt.model_name + '/')
def main(): global args, best_prec1 args = parser.parse_args() # torch.cuda.set_device(args.gpu) if args.tensorboard: print("Using TensorBoard") configure("exp/%s" % (args.name)) # Data loading code transform_train = transforms.Compose([ transforms.Pad(4, padding_mode='edge'), transforms.RandomCrop(32), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) kwargs = {'num_workers': 1, 'pin_memory': True} train_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=True, download=True, transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.__dict__[args.dataset.upper()]('../data', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=False, **kwargs) # create model model = get_model(args.arch, args.dataset, args.num_rotate_classes) model = model.cuda() # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) # define optimizer optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=args.nesterov, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] best_prec1 = checkpoint['best_prec1'] model.load_state_dict(checkpoint['model_state']) optimizer.load_state_dict(checkpoint['optimizer']) print("=> loaded checkpoint '{}' (epoch {})".format( args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) cudnn.benchmark = True # define learning rate scheduler if not args.milestones: milestones = [args.epochs] else: milestones = args.milestones scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=milestones, gamma=args.gamma, last_epoch=args.start_epoch - 1) # define loss function (criterion) criterion = nn.CrossEntropyLoss().cuda() for epoch in range(args.start_epoch, args.epochs): scheduler.step() # train for one epoch train(train_loader, model, criterion, optimizer, epoch) # evaluate on validation set prec1 = validate(val_loader, model, criterion, epoch) # remember best prec@1 and save checkpoint is_best = prec1 > best_prec1 best_prec1 = max(prec1, best_prec1) save_checkpoint( { 'epoch': epoch + 1, 'model_state': model.state_dict(), 'optimizer': optimizer.state_dict(), 'best_prec1': best_prec1, }, is_best) print('Best accuracy: {}'.format(best_prec1))
def main(): # Hyper Parameters parser = argparse.ArgumentParser() parser.add_argument('--data_path', default='./data/', help='path to datasets') parser.add_argument('--data_name', default='precomp', help='{coco,f30k}_precomp') parser.add_argument('--vocab_path', default='./vocab/', help='Path to saved vocabulary json files.') parser.add_argument('--margin', default=0.2, type=float, help='Rank loss margin.') parser.add_argument('--num_epochs', default=30, type=int, help='Number of training epochs.') parser.add_argument('--batch_size', default=128, type=int, help='Size of a training mini-batch.') parser.add_argument('--word_dim', default=300, type=int, help='Dimensionality of the word embedding.') parser.add_argument('--embed_size', default=1024, type=int, help='Dimensionality of the joint embedding.') parser.add_argument('--grad_clip', default=2., type=float, help='Gradient clipping threshold.') parser.add_argument('--num_layers', default=1, type=int, help='Number of GRU layers.') parser.add_argument('--learning_rate', default=.0002, type=float, help='Initial learning rate.') parser.add_argument('--lr_update', default=15, type=int, help='Number of epochs to update the learning rate.') parser.add_argument('--workers', default=10, type=int, help='Number of data loader workers.') parser.add_argument('--log_step', default=10, type=int, help='Number of steps to print and record the log.') parser.add_argument('--val_step', default=500, type=int, help='Number of steps to run validation.') parser.add_argument('--logger_name', default='./runs/runX/log', help='Path to save Tensorboard log.') parser.add_argument('--model_name', default='./runs/runX/checkpoint', help='Path to save the model.') parser.add_argument('--resume', default='', type=str, metavar='PATH', help='path to latest checkpoint (default: none)') parser.add_argument('--max_violation', action='store_true', help='Use max instead of sum in the rank loss.') parser.add_argument('--img_dim', default=2048, type=int, help='Dimensionality of the image embedding.') parser.add_argument('--no_imgnorm', action='store_true', help='Do not normalize the image embeddings.') parser.add_argument('--no_txtnorm', action='store_true', help='Do not normalize the text embeddings.') parser.add_argument( '--raw_feature_norm', default="clipped_l2norm", help='clipped_l2norm|l2norm|clipped_l1norm|l1norm|no_norm|softmax') parser.add_argument('--agg_func', default="LogSumExp", help='LogSumExp|Mean|Max|Sum') parser.add_argument('--cross_attn', default="t2i", help='t2i|i2t') parser.add_argument('--precomp_enc_type', default="basic", help='basic|weight_norm') parser.add_argument('--bi_gru', action='store_true', help='Use bidirectional GRU.') parser.add_argument('--lambda_lse', default=6., type=float, help='LogSumExp temp.') parser.add_argument('--lambda_softmax', default=9., type=float, help='Attention softmax temperature.') opt = parser.parse_args() print(opt) logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO) tb_logger.configure(opt.logger_name, flush_secs=5) # Load Vocabulary Wrapper vocab = deserialize_vocab( os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name)) opt.vocab_size = len(vocab) # Load data loaders train_loader, val_loader = data.get_loaders(opt.data_name, vocab, opt.batch_size, opt.workers, opt) # Construct the model model = SCAN(opt) # optionally resume from a checkpoint if opt.resume: if os.path.isfile(opt.resume): print("=> loading checkpoint '{}'".format(opt.resume)) checkpoint = torch.load(opt.resume) start_epoch = checkpoint['epoch'] best_rsum = checkpoint['best_rsum'] model.load_state_dict(checkpoint['model']) # Eiters is used to show logs as the continuation of another # training model.Eiters = checkpoint['Eiters'] print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format( opt.resume, start_epoch, best_rsum)) validate(opt, val_loader, model) else: print("=> no checkpoint found at '{}'".format(opt.resume)) # Train the Model best_rsum = 0 for epoch in range(opt.num_epochs): print(opt.logger_name) print(opt.model_name) adjust_learning_rate(opt, model.optimizer, epoch) # train for one epoch train(opt, train_loader, model, epoch, val_loader) # evaluate on validation set rsum = validate(opt, val_loader, model) # remember best R@ sum and save checkpoint is_best = rsum > best_rsum best_rsum = max(rsum, best_rsum) if not os.path.exists(opt.model_name): os.mkdir(opt.model_name) save_checkpoint( { 'epoch': epoch + 1, 'model': model.state_dict(), 'best_rsum': best_rsum, 'opt': opt, 'Eiters': model.Eiters, }, is_best, filename='checkpoint_{}.pth.tar'.format(epoch), prefix=opt.model_name + '/')
def __init__(self, config, data_loader): """ Construct a new Trainer instance. Params ------ - config: object containing command line arguments. - data_loader: data iterator """ self.config = config if config.is_train: self.train_loader = data_loader[0] self.valid_loader = data_loader[1] else: self.test_loader = data_loader # network params self.num_blocks = config.num_blocks self.num_layers_total = config.num_layers_total self.growth_rate = config.growth_rate self.bottleneck = config.bottleneck self.theta = config.compression # training params self.epochs = config.epochs self.start_epoch = 0 self.best_valid_acc = 0. self.init_lr = config.init_lr self.lr = self.init_lr self.is_decay = True self.momentum = config.momentum self.weight_decay = config.weight_decay self.dropout_rate = config.dropout_rate if config.lr_sched == '': self.is_decay = False else: self.lr_decay = [float(x) for x in config.lr_sched.split(',')] # other params self.ckpt_dir = config.ckpt_dir self.logs_dir = config.logs_dir self.num_gpu = config.num_gpu self.use_tensorboard = config.use_tensorboard self.resume = config.resume self.print_freq = config.print_freq self.dataset = config.dataset if self.dataset == 'cifar10': self.num_classes = 10 elif self.dataset == 'cifar100': self.num_classes = 100 else: self.num_classes = 1000 # build densenet model self.model = DenseNet(self.num_blocks, self.num_layers_total, self.growth_rate, self.num_classes, self.bottleneck, self.dropout_rate, self.theta) print('[*] Number of model parameters: {:,}'.format( sum([p.data.nelement() for p in self.model.parameters()]))) # define loss and optimizer self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.init_lr, momentum=self.momentum, weight_decay=self.weight_decay) if self.num_gpu > 0: self.model.cuda() self.criterion.cuda() # finally configure tensorboard logging if self.use_tensorboard: tensorboard_dir = self.logs_dir + self.get_model_name() print('[*] Saving tensorboard logs to {}'.format(tensorboard_dir)) if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) configure(tensorboard_dir)
def __init__(self, config, data_loader): """ Construct a new Trainer instance. Args ---- - config: object containing command line arguments. - data_loader: data iterator """ self.config = config # glimpse network params self.patch_size = config.patch_size self.glimpse_scale = config.glimpse_scale self.num_patches = config.num_patches self.loc_hidden = config.loc_hidden self.glimpse_hidden = config.glimpse_hidden # core network params self.num_glimpses = config.num_glimpses self.hidden_size = config.hidden_size # reinforce params self.std = config.std self.M = config.M # data params if config.is_train: self.train_loader = data_loader[0] self.valid_loader = data_loader[1] self.num_train = len(self.train_loader.sampler.indices) self.num_valid = len(self.valid_loader.sampler.indices) else: self.test_loader = data_loader self.num_test = len(self.test_loader.dataset) self.num_classes = 10 self.num_channels = 1 # training params self.epochs = config.epochs self.start_epoch = 0 self.momentum = config.momentum self.lr = config.init_lr # misc params self.use_gpu = config.use_gpu self.best = config.best self.ckpt_dir = config.ckpt_dir self.logs_dir = config.logs_dir self.best_valid_acc = 0. self.counter = 0 self.lr_patience = config.lr_patience self.train_patience = config.train_patience self.use_tensorboard = config.use_tensorboard self.resume = config.resume self.print_freq = config.print_freq self.plot_freq = config.plot_freq self.model_name = 'ram_{}_{}x{}_{}'.format( config.num_glimpses, config.patch_size, config.patch_size, config.glimpse_scale ) self.plot_dir = './plots/' + self.model_name + '/' if not os.path.exists(self.plot_dir): os.makedirs(self.plot_dir) # configure tensorboard logging if self.use_tensorboard: tensorboard_dir = self.logs_dir + self.model_name print('[*] Saving tensorboard logs to {}'.format(tensorboard_dir)) if not os.path.exists(tensorboard_dir): os.makedirs(tensorboard_dir) configure(tensorboard_dir) # build RAM model self.model = RecurrentAttention( self.patch_size, self.num_patches, self.glimpse_scale, self.num_channels, self.loc_hidden, self.glimpse_hidden, self.std, self.hidden_size, self.num_classes, ) if self.use_gpu: self.model.cuda() print('[*] Number of model parameters: {:,}'.format( sum([p.data.nelement() for p in self.model.parameters()]))) # # initialize optimizer and scheduler # self.optimizer = optim.SGD( # self.model.parameters(), lr=self.lr, momentum=self.momentum, # ) # self.scheduler = ReduceLROnPlateau( # self.optimizer, 'min', patience=self.lr_patience # ) self.optimizer = optim.Adam( self.model.parameters(), lr=3e-4, )
def main(): if args.tensorboard: configure("runs/%s"%(args.name)) if args.augment: transform_train = transforms.Compose([ transforms.RandomCrop(32, padding=4), transforms.RandomHorizontalFlip(), transforms.ToTensor(), ]) else: transform_train = transforms.Compose([ transforms.ToTensor(), ]) transform_test = transforms.Compose([ transforms.ToTensor(), ]) kwargs = {'num_workers': 1, 'pin_memory': True} if args.in_dataset == "CIFAR-10": # Data loading code normalizer = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]], std=[x/255.0 for x in [63.0, 62.1, 66.7]]) train_loader = torch.utils.data.DataLoader( torchvision.datasets.ImageFolder('./datasets/row_train_data/CIFAR-10', transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.CIFAR10('./datasets/cifar10', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) num_classes = 10 lr_schedule=[50, 75, 90] elif args.in_dataset == "CIFAR-100": # Data loading code normalizer = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]], std=[x/255.0 for x in [63.0, 62.1, 66.7]]) train_loader = torch.utils.data.DataLoader( torchvision.datasets.ImageFolder('./datasets/row_train_data/CIFAR-100', transform=transform_train), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( datasets.CIFAR100('./datasets/cifar100', train=False, transform=transform_test), batch_size=args.batch_size, shuffle=True, **kwargs) num_classes = 100 lr_schedule=[50, 75, 90] elif args.in_dataset == "SVHN": # Data loading code normalizer = None transform = transforms.Compose([transforms.ToTensor(),]) train_loader = torch.utils.data.DataLoader( torchvision.datasets.ImageFolder('./datasets/row_train_data/SVHN', transform=transform), batch_size=args.batch_size, shuffle=True, **kwargs) val_loader = torch.utils.data.DataLoader( svhn.SVHN('datasets/svhn/', split='test', transform=transforms.ToTensor(), download=False), batch_size=args.batch_size, shuffle=False, **kwargs) args.epochs = 20 args.save_epoch = 2 lr_schedule=[10, 15, 18] num_classes = 10 # create model if args.model_arch == 'densenet': model = dn.DenseNet3(args.layers, num_classes + 1, args.growth, reduction=args.reduce, bottleneck=args.bottleneck, dropRate=args.droprate, normalizer=normalizer) elif args.model_arch == 'wideresnet': model = wn.WideResNet(args.depth, num_classes + 1, widen_factor=args.width, dropRate=args.droprate, normalizer=normalizer) else: assert False, 'Not supported model arch: {}'.format(args.model_arch) attack = LinfPGDAttack(model = model, eps=args.epsilon, nb_iter=args.iters, eps_iter=args.iter_size, rand_init=True, targeted=True, num_classes=num_classes+1, loss_func='CE', elementwise_best=True) # get the number of model parameters print('Number of model parameters: {}'.format( sum([p.data.nelement() for p in model.parameters()]))) model = model.cuda() cudnn.benchmark = True # define loss function (criterion) and pptimizer criterion = nn.CrossEntropyLoss().cuda() optimizer = torch.optim.SGD(model.parameters(), args.lr, momentum=args.momentum, nesterov=True, weight_decay=args.weight_decay) # optionally resume from a checkpoint if args.resume: if os.path.isfile(args.resume): print("=> loading checkpoint '{}'".format(args.resume)) checkpoint = torch.load(args.resume) args.start_epoch = checkpoint['epoch'] model.load_state_dict(checkpoint['state_dict']) print("=> loaded checkpoint '{}' (epoch {})" .format(args.resume, checkpoint['epoch'])) else: print("=> no checkpoint found at '{}'".format(args.resume)) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch, lr_schedule) # train for one epoch train_rowl(train_loader, model, criterion, optimizer, epoch, num_classes, attack) # evaluate on validation set prec1 = validate(val_loader, model, criterion, num_classes, epoch) # remember best prec@1 and save checkpoint if (epoch + 1) % args.save_epoch == 0: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), }, epoch + 1)
def test(rank, args, shared_model, dtype): test_ctr = 0 torch.manual_seed(args.seed + rank) # set up logger timestring = str(date.today()) + '_' + \ time.strftime("%Hh-%Mm-%Ss", time.localtime(time.time())) run_name = args.save_name + '_' + timestring configure("logs/run_" + run_name, flush_secs=5) env = LoveLetterEnv(AgentRandom(args.seed + rank), args.seed + rank) env.seed(args.seed + rank) state = env.reset() model = ActorCritic(state.shape[0], env.action_space).type(dtype) model.eval() state = torch.from_numpy(state).type(dtype) reward_sum = 0 max_reward = -99999999 max_winrate = 0 rewards_recent = deque([], 100) done = True start_time = time.time() episode_length = 0 while True: episode_length += 1 # Sync with the shared model if done: model.load_state_dict(shared_model.state_dict()) cx = Variable(torch.zeros(1, 256).type(dtype), volatile=True) hx = Variable(torch.zeros(1, 256).type(dtype), volatile=True) else: cx = Variable(cx.data.type(dtype), volatile=True) hx = Variable(hx.data.type(dtype), volatile=True) value, logit, (hx, cx) = model((Variable(state.unsqueeze(0), volatile=True), (hx, cx))) prob = F.softmax(logit) action = prob.max(1)[1].data.cpu().numpy() state, reward, done, _ = env.step(action[0, 0]) done = done or episode_length >= args.max_episode_length reward_sum += reward if done: rewards_recent.append(reward_sum) rewards_recent_avg = sum(rewards_recent) / len(rewards_recent) print( "{} | Episode Reward {: >4}, Length {: >2} | Avg Reward {:0.2f}" .format( time.strftime("%Hh %Mm %Ss", time.gmtime(time.time() - start_time)), reward_sum, episode_length, rewards_recent_avg)) # if not stuck or args.evaluate: log_value('Reward', reward_sum, test_ctr) log_value('Reward Average', rewards_recent_avg, test_ctr) log_value('Episode length', episode_length, test_ctr) if reward_sum >= max_reward: # pickle.dump(shared_model.state_dict(), open(args.save_name + '_max' + '.p', 'wb')) path_output = args.save_name + '_max' torch.save(shared_model.state_dict(), path_output) path_now = "{}_{}".format(args.save_name, datetime.datetime.now().isoformat()) torch.save(shared_model.state_dict(), path_now) max_reward = reward_sum win_rate_v_random = Arena.compare_agents_float( lambda seed: AgentA3C(path_output, dtype, seed), lambda seed: AgentRandom(seed), 800) msg = " {} | VsRandom: {: >4}%".format( datetime.datetime.now().strftime("%c"), round(win_rate_v_random * 100, 2)) print(msg) log_value('Win Rate vs Random', win_rate_v_random, test_ctr) if win_rate_v_random > max_winrate: print("Found superior model at {}".format( datetime.datetime.now().isoformat())) torch.save( shared_model.state_dict(), "{}_{}_best_{}".format( args.save_name, datetime.datetime.now().isoformat(), win_rate_v_random)) max_winrate = win_rate_v_random reward_sum = 0 episode_length = 0 state = env.reset() test_ctr += 1 if test_ctr % 10 == 0 and not args.evaluate: # pickle.dump(shared_model.state_dict(), open(args.save_name + '.p', 'wb')) torch.save(shared_model.state_dict(), args.save_name) if not args.evaluate: time.sleep(60) elif test_ctr == evaluation_episodes: # Ensure the environment is closed so we can complete the # submission env.close() # gym.upload('monitor/' + run_name, api_key=api_key) state = torch.from_numpy(state).type(dtype)
os.mkdir("%s/run-%d/images" % (param.output_folder, run)) os.mkdir("%s/run-%d/models" % (param.output_folder, run)) # where we save the output log_output = open("%s/run-%d/logs/log.txt" % (param.output_folder, run), 'w') print(param) print(param, file=log_output) import numpy import torch import torch.autograd as autograd from torch.autograd import Variable # For plotting the Loss of D and G using tensorboard from tensorboard_logger import configure, log_value configure("%s/run-%d/logs" % (param.output_folder, run), flush_secs=5) import torchvision import torchvision.datasets as dset import torchvision.transforms as transf import torchvision.models as models import torchvision.utils as vutils if param.cuda: import torch.backends.cudnn as cudnn cudnn.benchmark = True # To see images from IPython.display import Image to_img = transf.ToPILImage()
import torch import torch.nn as nn import torch.nn.functional as F import torch.optim as optim from torch.utils.data import Dataset, DataLoader from tensorboard_logger import configure, log_value from utils import spacy_tokenizer, file_stats from utils import create_word_vocab, create_weights from utils import to_tensor from model import ATAE_LSTM configure("runs/model_pd_10", flush_secs=5) ################################################################### ########################## Data ################################### ################################################################### restaurant_train_file = 'Data/Restaurant/restaurant_train.json' restaurant_test_file = 'Data/Restaurant/restaurant_test.json' pd_data = 'Data/targeted_data.json' ################################################################### ##################### Vocab Initialization ######################## ################################################################### words = [] word2idx = {} words.append('<pad>')