os.mkdir(f"{base_dir}/models")
if param.gen_extra_images > 0:
	os.mkdir(f"{base_dir}/images/extra")

# where we save the output
log_output = open(f"{logs_dir}/log.txt", 'w')
print(param)
print(param, file=log_output)

import torch
import torch.autograd as autograd
from torch.autograd import Variable

# For plotting the Loss of D and G using tensorboard
from tensorboard_logger import configure, log_value
configure(logs_dir, flush_secs=5)

import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transf
import torchvision.models as models
import torchvision.utils as vutils

if param.cuda:
	import torch.backends.cudnn as cudnn
	cudnn.benchmark = True

# To see images
from IPython.display import Image
to_img = transf.ToPILImage()
Ejemplo n.º 2
0
                    type=int,
                    default=200,
                    help="Number of epochs to train for")
parser.add_argument("-run_name",
                    type=str,
                    default="sem_seg_run_1",
                    help="Name for run in tensorboard_logger")

BASE_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data')

lr_clip = 1e-5
bnm_clip = 1e-2

if __name__ == "__main__":
    args = parser.parse_args()
    tb_log.configure('runs/{}'.format(args.run_name))

    test_set = Indoor3DSemSeg(args.num_points,
                              BASE_DIR,
                              train=False,
                              data_precent=0.01)
    test_loader = DataLoader(test_set,
                             batch_size=args.batch_size,
                             shuffle=True,
                             pin_memory=True,
                             num_workers=2)

    train_set = Indoor3DSemSeg(args.num_points, BASE_DIR, data_precent=1.0)
    train_loader = DataLoader(train_set,
                              batch_size=args.batch_size,
                              pin_memory=True,
Ejemplo n.º 3
0
                             batch_size=args.batch_size,
                             shuffle=False,
                             pin_memory=True,
                             num_workers=args.workers)

    if args.mode == 'train':
        train_set = Indoor3DSemSeg(8192, train=True)
        train_loader = DataLoader(train_set,
                                  batch_size=args.batch_size,
                                  shuffle=True,
                                  pin_memory=True,
                                  num_workers=args.workers)
        # output dir config
        output_dir = os.path.join(args.output_dir, args.extra_tag)
        os.makedirs(output_dir, exist_ok=True)
        tb_log.configure(os.path.join(output_dir, 'tensorboard'))
        ckpt_dir = os.path.join(output_dir, 'ckpt')
        os.makedirs(ckpt_dir, exist_ok=True)

        log_file = os.path.join(output_dir, 'log.txt')
        log_f = open(log_file, 'w')

        for key, val in vars(args).items():
            log_print("{:16} {}".format(key, val), log_f=log_f)

        # train and eval
        train_and_eval(model, train_loader, eval_loader, tb_log, ckpt_dir,
                       log_f)
        log_f.close()
    elif args.mode == 'eval':
        epoch = load_checkpoint(model, args.ckpt)
Ejemplo n.º 4
0
    # Load checkpoint
    if checkpoint_path:
        print("Load checkpoint from: {}".format(checkpoint_path))
        checkpoint = torch.load(checkpoint_path)
        model.load_state_dict(checkpoint["state_dict"])
        optimizer.load_state_dict(checkpoint["optimizer"])
        try:
            global_step = checkpoint["global_step"]
            global_epoch = checkpoint["global_epoch"]
        except:
            # TODO
            pass

    # Setup tensorboard logger
    tensorboard_logger.configure(log_path)

    print(hparams_debug_string())

    # Train!
    try:
        train(model,
              train_loader,
              val_loader,
              optimizer,
              init_lr=hparams.initial_learning_rate,
              checkpoint_dir=checkpoint_dir,
              checkpoint_interval=hparams.checkpoint_interval,
              nepochs=hparams.nepochs,
              clip_thresh=hparams.clip_thresh)
    except KeyboardInterrupt:
def init(opt):
    # [folder] create folder for checkpoints
    try: os.makedirs(opt.out)
    except OSError: pass

    # [cuda] check cuda, if cuda is available, then display warning
    if torch.cuda.is_available() and not opt.cuda:
        sys.stdout.write('[WARNING] : You have a CUDA device, so you should probably run with --cuda')

    # [normalization] __return__ normalize images, set up mean and std
    normalize = transforms.Normalize(
                                        mean = [0.485, 0.456, 0.406],
                                        std = [0.229, 0.224, 0.225])
    # [scale] __return__
    scale = transforms.Compose([
                                    transforms.ToPILImage(),
                                    transforms.Resize(opt.imageSize),
                                    transforms.ToTensor(),
                                    transforms.Normalize(
                                                            mean = [0.485, 0.456, 0.406],
                                                            std = [0.229, 0.224, 0.225])])

    # [transform] up sampling transforms
    transform = transforms.Compose([transforms.RandomCrop((opt.imageSize[0] * opt.upSampling,
                                                           opt.imageSize[1] * opt.upSampling)),
                                    transforms.ToTensor()])
    # [dataset] training dataset
    if opt.dataset == 'folder':
        dataset = datasets.ImageFolder(root = opt.dataroot, transform = transform)
    elif opt.dataset == 'cifar10':
        dataset = datasets.CIFAR10(root = opt.dataroot, train = True, download = True, transform = transform)
    elif opt.dataset == 'cifar100':
        dataset = datasets.CIFAR100(root = opt.dataroot, train = True, download = False, transform = transform)
    assert dataset
    
    # [dataloader] __return__ loading dataset
    dataloader = torch.utils.data.DataLoader(
                                                 dataset,
                                                 batch_size = opt.batchSize,
                                                 shuffle = True,
                                                 num_workers = int(opt.workers))
    # [generator] __return__ generator of GAN
    generator = Generator(16, opt.upSampling)
    if opt.generatorWeights != '' and os.path.exists(opt.generatorWeights):
        generator.load_state_dict(torch.load(opt.generatorWeights))

    # [discriminator] __return__ discriminator of GAN
    discriminator = Discriminator()
    if opt.discriminatorWeights != '' and os.path.exists(opt.discriminatorWeights):
        discriminator.load_state_dict(torch.load(opt.discriminatorWeights))

    # [extractor] __return__ feature extractor of GAN
    # For the content loss
    feature_extractor = FeatureExtractor(torchvision.models.vgg19(pretrained = True))

    # [loss] __return__ loss function
    content_criterion = nn.MSELoss()
    adversarial_criterion = nn.BCELoss()
    ones_const = Variable(torch.ones(opt.batchSize, 1))

    # [cuda] if gpu is to be used
    if opt.cuda:
        generator.cuda()
        discriminator.cuda()
        feature_extractor.cuda()
        content_criterion.cuda()
        adversarial_criterion.cuda()
        ones_const = ones_const.cuda()

    # [optimizer] __return__ Optimizer for GAN 
    optim_generator = optim.Adam(generator.parameters(), lr = opt.generatorLR)
    optim_discriminator = optim.Adam(discriminator.parameters(), lr = opt.discriminatorLR)

    # record configure
    configure('logs/{}-{}-{} -{}'.format(opt.dataset, str(opt.batchSize), str(opt.generatorLR), str(opt.discriminatorLR)), flush_secs = 5)
    # visualizer = Visualizer(image_size = (opt.imageSize[0] * opt.upSampling, opt.imageSize[1] * opt.upSampling))

    # __return__ low resolution images
    low_res = torch.FloatTensor(opt.batchSize, 3, opt.imageSize[0], opt.imageSize[1])

    return normalize,\
           scale,\
           dataloader,\
           generator,\
           discriminator,\
           feature_extractor,\
           content_criterion,\
           adversarial_criterion,\
           ones_const,\
           optim_generator,\
           optim_discriminator,\
           low_res
Ejemplo n.º 6
0
def main():
    # parse arg and start experiment
    global args
    best_ap = -1.
    best_iter = 0

    args = parser.parse_args()
    args.config_of_data = config.datasets[args.data]
    # args.num_classes = config.datasets[args.data]['num_classes']
    if configure is None:
        args.tensorboard = False
        print(Fore.RED +
              'WARNING: you don\'t have tesnorboard_logger installed' +
              Fore.RESET)

    # optionally resume from a checkpoint
    if args.resume:
        if args.resume and os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            old_args = checkpoint['args']
            print('Old args:')
            print(old_args)
            # set args based on checkpoint
            if args.start_iter <= 0:
                args.start_iter = checkpoint['iter'] + 1
            best_iter = args.start_iter - 1
            best_ap = checkpoint['best_ap']
            for name in arch_resume_names:
                if name in vars(args) and name in vars(old_args):
                    setattr(args, name, getattr(old_args, name))
            model = get_model(**vars(args))
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (iter {})"
                  .format(args.resume, checkpoint['iter']))
        else:
            print(
                "=> no checkpoint found at '{}'".format(
                    Fore.RED +
                    args.resume +
                    Fore.RESET),
                file=sys.stderr)
            return
    else:
        # create model
        print("=> creating model '{}'".format(args.arch))
        model = get_model(**vars(args))

    # cudnn.benchmark = True
    cudnn.enabled = False

    # create dataloader
    if args.evaluate == 'val':
        train_loader, val_loader, test_loader = getDataloaders(
            splits=('val'), **vars(args))
        validate(val_loader, model, best_iter)
        return
    elif args.evaluate == 'test':
        train_loader, val_loader, test_loader = getDataloaders(
            splits=('test'), **vars(args))
        validate(test_loader, model, best_iter)
        return
    else:
        train_loader, val_loader, test_loader = getDataloaders(
            splits=('train', 'val'), **vars(args))

    # define optimizer
    optimizer = get_optimizer(model, args)

    # check if the folder exists
    if os.path.exists(args.save):
        print(Fore.RED + args.save + Fore.RESET
              + ' already exists!', file=sys.stderr)
        if not args.force:
            ans = input('Do you want to overwrite it? [y/N]:')
            if ans not in ('y', 'Y', 'yes', 'Yes'):
                os.exit(1)
        print('remove existing ' + args.save)
        shutil.rmtree(args.save)
    os.makedirs(args.save)
    print('create folder: ' + Fore.GREEN + args.save + Fore.RESET)

    # copy code to save folder
    if args.save.find('debug') < 0:
        shutil.copytree(
            '.',
            os.path.join(
                args.save,
                'src'),
            symlinks=True,
            ignore=shutil.ignore_patterns(
                '*.pyc',
                '__pycache__',
                '*.path.tar',
                '*.pth',
                '*.ipynb',
                '.*',
                'data',
                'save',
                'save_backup'))

    # set up logging
    global log_print, f_log
    f_log = open(os.path.join(args.save, 'log.txt'), 'w')

    def log_print(*args):
        print(*args)
        print(*args, file=f_log)
    log_print('args:')
    log_print(args)
    print('model:', file=f_log)
    print(model, file=f_log, flush=True)
    # log_print('model:')
    # log_print(model)
    # log_print('optimizer:')
    # log_print(vars(optimizer))
    log_print('# of params:',
              str(sum([p.numel() for p in model.parameters()])))
    torch.save(args, os.path.join(args.save, 'args.pth'))
    scores = ['iter\tlr\ttrain_loss\tval_ap']
    if args.tensorboard:
        configure(args.save, flush_secs=5)

    for i in range(args.start_iter, args.niters + 1, args.eval_freq):
        # print('iter {:3d} lr = {:.6e}'.format(i, lr))
        # if args.tensorboard:
        #     log_value('lr', lr, i)

        # train for args.eval_freq iterations
        train_loss = train(train_loader, model, optimizer,
                           i, args.eval_freq)
        i += args.eval_freq - 1

        # evaluate on validation set
        val_ap = validate(val_loader, model, i)

        # save scores to a tsv file, rewrite the whole file to prevent
        # accidental deletion
        scores.append(('{}\t{}' + '\t{:.4f}' * 2)
                      .format(i, lr, train_loss, val_ap))
        with open(os.path.join(args.save, 'scores.tsv'), 'w') as f:
            print('\n'.join(scores), file=f)

        # remember best err@1 and save checkpoint
        # TODO: change this
        is_best = val_ap > best_ap
        if is_best:
            best_ap = val_ap
            best_iter = i
            print(Fore.GREEN + 'Best var_err1 {}'.format(best_ap) +
                  Fore.RESET)
        save_checkpoint({
            'args': args,
            'iter': i,
            'best_iter': best_iter,
            'arch': args.arch,
            'state_dict': model.state_dict(),
            'best_ap': best_ap,
        }, is_best, args.save)
        if not is_best and i - best_iter >= args.patience > 0:
            break
    print('Best val_ap: {:.4f} at iter {}'.format(best_ap, best_iter))
Ejemplo n.º 7
0
def main():
    global args, best_prec1
    args = parser.parse_args()
    if args.tensorboard: configure("runs/%s" % (args.name))

    # Data loading code
    normalize = transforms.Normalize(
        mean=[x / 255.0 for x in [125.3, 123.0, 113.9]],
        std=[x / 255.0 for x in [63.0, 62.1, 66.7]])

    if args.augment:
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            normalize,
        ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            normalize,
        ])
    transform_test = transforms.Compose([transforms.ToTensor(), normalize])

    kwargs = {'num_workers': 1, 'pin_memory': True}
    if args.dataset == "cifar10":
        train_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
            './data', train=True, download=True, transform=transform_train),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        val_loader = torch.utils.data.DataLoader(datasets.CIFAR10(
            './data', train=False, transform=transform_test),
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 **kwargs)
    elif args.dataset == "cifar100":
        train_loader = torch.utils.data.DataLoader(datasets.CIFAR100(
            './data', train=True, download=True, transform=transform_train),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        val_loader = torch.utils.data.DataLoader(datasets.CIFAR100(
            './data', train=False, transform=transform_test),
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 **kwargs)
    elif args.dataset == "svhn":
        train_loader = torch.utils.data.DataLoader(datasets.SVHN(
            './data', split="train", download=True, transform=transform_train),
                                                   batch_size=args.batch_size,
                                                   shuffle=True,
                                                   **kwargs)
        val_loader = torch.utils.data.DataLoader(datasets.SVHN(
            './data', split="test", transform=transform_test, download=True),
                                                 batch_size=args.batch_size,
                                                 shuffle=True,
                                                 **kwargs)

    # create model
    model = dn.DenseNet3(args.layers,
                         args.num_class,
                         args.z_dim,
                         args.growth,
                         reduction=args.reduce,
                         bottleneck=args.bottleneck,
                         dropRate=args.droprate)

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # for training on multiple GPUs.
    # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=True,
                                weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
    print('Best accuracy: ', best_prec1)
Ejemplo n.º 8
0
def main():
    global args, optimizer_select
    # To set the model name automatically
    print args
    lr = args.lr
    args = get_model_name(args)
    print 'Model name: {}'.format(args.model_name)

    # To set the random seed
    random.seed(args.seed)
    torch.manual_seed(args.seed + 1)
    torch.cuda.manual_seed(args.seed + 2)

    print("Loading training set and testing set..."),
    train_set = visual_genome(args.dataset_option, 'train')
    test_set = visual_genome('small', 'test')
    print("Done.")

    train_loader = torch.utils.data.DataLoader(train_set,
                                               batch_size=1,
                                               shuffle=True,
                                               num_workers=8,
                                               pin_memory=True)
    test_loader = torch.utils.data.DataLoader(test_set,
                                              batch_size=1,
                                              shuffle=False,
                                              num_workers=8,
                                              pin_memory=True)

    # Model declaration
    net = Hierarchical_Descriptive_Model(
        nhidden=args.mps_feature_len,
        n_object_cats=train_set.num_object_classes,
        n_predicate_cats=train_set.num_predicate_classes,
        n_vocab=train_set.voc_size,
        voc_sign=train_set.voc_sign,
        max_word_length=train_set.max_size,
        MPS_iter=args.MPS_iter,
        use_language_loss=not args.disable_language_model,
        object_loss_weight=train_set.inverse_weight_object,
        predicate_loss_weight=train_set.inverse_weight_predicate,
        dropout=args.dropout,
        use_kmeans_anchors=not args.use_normal_anchors,
        gate_width=args.gate_width,
        nhidden_caption=args.nhidden_caption,
        nembedding=args.nembedding,
        rnn_type=args.rnn_type,
        rnn_droptout=args.caption_use_dropout,
        rnn_bias=args.caption_use_bias,
        use_region_reg=args.region_bbox_reg,
        use_kernel=args.use_kernel_function)

    params = list(net.parameters())
    for param in params:
        print param.size()
    print net

    # To group up the features
    vgg_features_fix, vgg_features_var, rpn_features, hdn_features, language_features = group_features(
        net)

    # Setting the state of the training model
    net.cuda()
    net.train()
    logger_path = "log/logger/{}".format(args.model_name)
    if os.path.exists(logger_path):
        shutil.rmtree(logger_path)
    configure(logger_path, flush_secs=5)  # setting up the logger

    network.set_trainable(net, False)
    #  network.weights_normal_init(net, dev=0.01)
    if args.finetune_language_model:
        print 'Only finetuning the language model from: {}'.format(
            args.resume_model)
        args.train_all = False
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        optimizer_select = 3

    elif args.load_RPN:
        print 'Loading pretrained RPN: {}'.format(args.saved_model_path)
        args.train_all = False
        network.load_net(args.saved_model_path, net.rpn)
        net.reinitialize_fc_layers()
        optimizer_select = 1

    elif args.resume_training:
        print 'Resume training from: {}'.format(args.resume_model)
        if len(args.resume_model) == 0:
            raise Exception('[resume_model] not specified')
        network.load_net(args.resume_model, net)
        args.train_all = True
        optimizer_select = 2

    else:
        print 'Training from scratch.'
        net.rpn.initialize_parameters()
        net.reinitialize_fc_layers()
        optimizer_select = 0
        args.train_all = True

    optimizer = network.get_optimizer(lr, optimizer_select, args,
                                      vgg_features_var, rpn_features,
                                      hdn_features, language_features)

    target_net = net
    if not os.path.exists(args.output_dir):
        os.mkdir(args.output_dir)

    top_Ns = [50, 100]
    best_recall = np.zeros(len(top_Ns))

    if args.evaluate:
        recall = test(test_loader, net, top_Ns)
        print('======= Testing Result =======')
        for idx, top_N in enumerate(top_Ns):
            print(
                '[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)'
                .format(top_N=top_N,
                        recall=recall[idx] * 100,
                        best_recall=best_recall[idx] * 100))

        print('==============================')
    else:
        for epoch in range(0, args.max_epoch):
            # Training
            train(train_loader, target_net, optimizer, epoch)
            # snapshot the state
            save_name = os.path.join(
                args.output_dir,
                '{}_epoch_{}.h5'.format(args.model_name, epoch))
            network.save_net(save_name, net)
            print('save model: {}'.format(save_name))

            # Testing
            # network.set_trainable(net, False) # Without backward(), requires_grad takes no effect

            recall = test(test_loader, net, top_Ns)

            if np.all(recall > best_recall):
                best_recall = recall
                save_name = os.path.join(args.output_dir,
                                         '{}_best.h5'.format(args.model_name))
                network.save_net(save_name, net)
                print('\nsave model: {}'.format(save_name))

            print('Epoch[{epoch:d}]:'.format(epoch=epoch)),
            for idx, top_N in enumerate(top_Ns):
                print(
                    '\t[Recall@{top_N:d}] {recall:2.3f}%% (best: {best_recall:2.3f}%%)'
                    .format(top_N=top_N,
                            recall=recall[idx] * 100,
                            best_recall=best_recall[idx] * 100)),

            # updating learning policy
            if epoch % args.step_size == 0 and epoch > 0:
                lr /= 10
                args.lr = lr
                print '[learning rate: {}]'.format(lr)

                args.enable_clip_gradient = False
                if not args.finetune_language_model:
                    args.train_all = True
                    optimizer_select = 2
                # update optimizer and correponding requires_grad state
                optimizer = network.get_optimizer(lr, optimizer_select, args,
                                                  vgg_features_var,
                                                  rpn_features, hdn_features,
                                                  language_features)
Ejemplo n.º 9
0
def main():
	n_epoch_pretrain = 2
	use_tensorboard = True

	parser = argparse.ArgumentParser(description='SRGAN Train')
	parser.add_argument('--crop_size', default=96, type=int, help='training images crop size')
	parser.add_argument('--num_epochs', default=500, type=int, help='training epoch')
	parser.add_argument('--batch_size', default=32, type=int, help='training batch size')
	parser.add_argument('--train_set', default='data/train', type=str, help='train set path')
	parser.add_argument('--check_point', type=int, default=-1, help="continue with previous check_point")

	opt = parser.parse_args()

	input_size = opt.crop_size
	n_epoch = opt.num_epochs
	batch_size = opt.batch_size
	check_point = opt.check_point

	check_point_path = 'cp/'
	if not os.path.exists(check_point_path):
		os.makedirs(check_point_path)

	train_set = TrainDataset(opt.train_set, crop_size=input_size, upscale_factor=4)
	train_loader = DataLoader(dataset=train_set, num_workers=2, batch_size=batch_size, shuffle=True)

	dev_set = DevDataset('data/dev', upscale_factor=4)
	dev_loader = DataLoader(dataset=dev_set, num_workers=1, batch_size=1, shuffle=False)

	mse = nn.MSELoss()
		
	if not torch.cuda.is_available():
		print ('!!!!!!!!!!!!!!USING CPU!!!!!!!!!!!!!')

	netG = Generator()
	print('# generator parameters:', sum(param.numel() for param in netG.parameters()))
	netD = Discriminator_WGAN()
	print('# discriminator parameters:', sum(param.numel() for param in netD.parameters()))

	if torch.cuda.is_available():
		netG.cuda()
		netD.cuda()
		mse.cuda()
	
	if use_tensorboard:
		configure('log', flush_secs=5)
	
	# Pre-train generator using only MSE loss
	if check_point == -1:
		optimizerG = optim.Adam(netG.parameters())
		for epoch in range(1, n_epoch_pretrain + 1):	
			train_bar = tqdm(train_loader)
			
			netG.train()
			
			cache = {'g_loss': 0}
			
			for lowres, real_img_hr in train_bar:
				if torch.cuda.is_available():
					real_img_hr = real_img_hr.cuda()
					
				if torch.cuda.is_available():
					lowres = lowres.cuda()
					
				fake_img_hr = netG(lowres)

				# Train G
				netG.zero_grad()
				
				image_loss = mse(fake_img_hr, real_img_hr)
				cache['g_loss'] += image_loss
				
				image_loss.backward()
				optimizerG.step()

				# Print information by tqdm
				train_bar.set_description(desc='[%d/%d] Loss_G: %.4f' % (epoch, n_epoch_pretrain, image_loss))
	
	optimizerG = optim.Adam(netG.parameters(), lr=1e-4)
	optimizerD = optim.Adam(netD.parameters(), lr=1e-4)
	
	if check_point != -1:
		if torch.cuda.is_available():
			netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_gpu.pth'))
			netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_gpu.pth'))
			optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_gpu.pth'))
			optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_gpu.pth'))
		else :
			netG.load_state_dict(torch.load('cp/netG_epoch_' + str(check_point) + '_cpu.pth'))
			netD.load_state_dict(torch.load('cp/netD_epoch_' + str(check_point) + '_cpu.pth'))
			optimizerG.load_state_dict(torch.load('cp/optimizerG_epoch_' + str(check_point) + '_cpu.pth'))
			optimizerD.load_state_dict(torch.load('cp/optimizerD_epoch_' + str(check_point) + '_cpu.pth'))
	
	for epoch in range(1 + max(check_point, 0), n_epoch + 1 + max(check_point, 0)):
		train_bar = tqdm(train_loader)
		
		netG.train()
		netD.train()
		
		cache = {'mse_loss': 0, 'adv_loss': 0, 'g_loss': 0, 'd_loss': 0, 'ssim': 0, 'psnr': 0, 'd_top_grad' : 0, 'd_bot_grad' : 0, 'g_top_grad' : 0, 'g_bot_grad' : 0}
		
		for lowres, real_img_hr in train_bar:
			#print ('lr size : ' + str(data.size()))
			#print ('hr size : ' + str(target.size()))
			
			if torch.cuda.is_available():
				real_img_hr = real_img_hr.cuda()
				lowres = lowres.cuda()
				
			fake_img_hr = netG(lowres)
			
			# Train D
			netD.zero_grad()
			
			logits_real = netD(real_img_hr).mean()
			logits_fake = netD(fake_img_hr).mean()
			gradient_penalty = compute_gradient_penalty(netD, real_img_hr, fake_img_hr)
            
			d_loss = logits_fake - logits_real + 10*gradient_penalty
			
			cache['d_loss'] += d_loss.item()
			
			d_loss.backward(retain_graph=True)
			optimizerD.step()
			
			dtg, dbg = get_grads_D_WAN(netD)

			cache['d_top_grad'] += dtg
			cache['d_bot_grad'] += dbg

			# Train G
			
			netG.zero_grad()
			
			image_loss = mse(fake_img_hr, real_img_hr)
			adversarial_loss = -1*netD(fake_img_hr).mean()
			
			g_loss = image_loss + 1e-3*adversarial_loss

			cache['mse_loss'] += image_loss.item()
			cache['adv_loss'] += adversarial_loss.item()
			cache['g_loss'] += g_loss.item()

			g_loss.backward()
			optimizerG.step()
			
			gtg, gbg = get_grads_G(netG)

			cache['g_top_grad'] += gtg
			cache['g_bot_grad'] += gbg

			# Print information by tqdm
			train_bar.set_description(desc='[%d/%d] D grads:(%f, %f) G grads:(%f, %f) Loss_D: %.4f Loss_G: %.4f = %.4f + %.4f' % (epoch, n_epoch, dtg, dbg, gtg, gbg, d_loss, g_loss, image_loss, adversarial_loss))
		
		if use_tensorboard:
			log_value('d_loss', cache['d_loss']/len(train_loader), epoch)
		
			log_value('mse_loss', cache['mse_loss']/len(train_loader), epoch)
			log_value('adv_loss', cache['adv_loss']/len(train_loader), epoch)
			log_value('g_loss', cache['g_loss']/len(train_loader), epoch)
			
			log_value('D top layer gradient', cache['d_top_grad']/len(train_loader), epoch)
			log_value('D bot layer gradient', cache['d_bot_grad']/len(train_loader), epoch)
			log_value('G top layer gradient', cache['g_top_grad']/len(train_loader), epoch)
			log_value('G bot layer gradient', cache['g_bot_grad']/len(train_loader), epoch)
		
		# Save model parameters	
		if torch.cuda.is_available():
			torch.save(netG.state_dict(), 'cp/netG_epoch_%d_gpu.pth' % (epoch))
			if epoch%5 == 0:
				torch.save(netD.state_dict(), 'cp/netD_epoch_%d_gpu.pth' % (epoch))
				torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_gpu.pth' % (epoch))
				torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_gpu.pth' % (epoch))
		else:
			torch.save(netG.state_dict(), 'cp/netG_epoch_%d_cpu.pth' % (epoch))
			if epoch%5 == 0:
				torch.save(netD.state_dict(), 'cp/netD_epoch_%d_cpu.pth' % (epoch))
				torch.save(optimizerG.state_dict(), 'cp/optimizerG_epoch_%d_cpu.pth' % (epoch))
				torch.save(optimizerD.state_dict(), 'cp/optimizerD_epoch_%d_cpu.pth' % (epoch))
				
		# Visualize results
		with torch.no_grad():
			netG.eval()
			out_path = 'vis/'
			if not os.path.exists(out_path):
				os.makedirs(out_path)
				
			dev_bar = tqdm(dev_loader)
			valing_results = {'mse': 0, 'ssims': 0, 'psnr': 0, 'ssim': 0, 'batch_sizes': 0}
			dev_images = []
			for val_lr, val_hr_restore, val_hr in dev_bar:
				batch_size = val_lr.size(0)
				lr = val_lr
				hr = val_hr
				if torch.cuda.is_available():
					lr = lr.cuda()
					hr = hr.cuda()
				
				sr = netG(lr)
				
				psnr = 10 * log10(1 / ((sr - hr) ** 2).mean().item())
				ssim = pytorch_ssim.ssim(sr, hr).item()
				dev_bar.set_description(desc='[converting LR images to SR images] PSNR: %.4f dB SSIM: %.4f' % (psnr, ssim))
				
				cache['ssim'] += ssim
				cache['psnr'] += psnr
				
				# Avoid out of memory crash on 8G GPU
				if len(dev_images) < 60 :
					dev_images.extend([to_image()(val_hr_restore.squeeze(0)), to_image()(hr.data.cpu().squeeze(0)), to_image()(sr.data.cpu().squeeze(0))])
			
			dev_images = torch.stack(dev_images)
			dev_images = torch.chunk(dev_images, dev_images.size(0) // 3)
			
			dev_save_bar = tqdm(dev_images, desc='[saving training results]')
			index = 1
			for image in dev_save_bar:
				image = utils.make_grid(image, nrow=3, padding=5)
				utils.save_image(image, out_path + 'epoch_%d_index_%d.png' % (epoch, index), padding=5)
				index += 1
		
			if use_tensorboard:			
				log_value('ssim', cache['ssim']/len(dev_loader), epoch)
				log_value('psnr', cache['psnr']/len(dev_loader), epoch)
Ejemplo n.º 10
0
import torch
from torch import nn, optim
from tensorboard_logger import configure, log_value

from ntm import NTM
from ntm.datasets import CopyDataset, RepeatCopyDataset, AssociativeDataset, NGram, PrioritySort
from args import get_parser
from marnn import *
from dnc import DNC
from dnc.sam import SAM

args = get_parser().parse_args()
print("args:\n",args)

configure("runs/")
print('name:',args.name)

# ----------------------------------------------------------------------------
# -- initialize datasets, model, criterion and optimizer
# ----------------------------------------------------------------------------

'''
'''


if args.task=='copy':
    args.task_json = 'ntm/tasks/copy.json'
    task_params = json.load(open(args.task_json))
    task_params['max_seq_len']=args.max_seq_len
    dataset = CopyDataset(task_params)
Ejemplo n.º 11
0
    torch.save(state, args.cv_dir+'/ckpt_E_%d_R_%.2E'%(epoch, reward))

#--------------------------------------------------------------------------------------------------------#
trainset, testset = utils.get_dataset(args.img_size, args.data_dir)
trainloader = torchdata.DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=args.num_workers)
testloader = torchdata.DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=args.num_workers)
agent = utils.get_model(num_actions)

# ---- Load the pre-trained model ----------------------
start_epoch = 0
if args.load is not None:
    checkpoint = torch.load(args.load)
    agent.load_state_dict(checkpoint['agent'])
    start_epoch = checkpoint['epoch'] + 1
    print 'loaded agent from', args.load

# Parallelize the models if multiple GPUs available - Important for Large Batch Size
if args.parallel:
    agent = nn.DataParallel(agent)
agent.cuda()

# Update the parameters of the policy network
optimizer = optim.Adam(agent.parameters(), lr=args.lr)

# Save the args to the checkpoint directory
configure(args.cv_dir+'/log', flush_secs=5)
for epoch in range(start_epoch, start_epocH+args.max_epochs+1):
    train(epoch)
    if epoch % 10 == 0:
        test(epoch)
Ejemplo n.º 12
0
def main():
    opt = parse_args()
    print(json.dumps(vars(opt), indent=2))

    rootpath = opt.rootpath
    trainCollection = opt.trainCollection
    valCollection = opt.valCollection

    if opt.loss_fun == "mrl" and opt.measure == "cosine":
        assert opt.text_norm is True
        assert opt.visual_norm is True

    # checkpoint path
    model_info = '%s_concate_%s_dp_%.1f_measure_%s' % (opt.model, opt.concate, opt.dropout, opt.measure)
    # text-side multi-level encoding info
    text_encode_info = 'vocab_%s_word_dim_%s_text_rnn_size_%s_text_norm_%s' % \
                       (opt.vocab, opt.word_dim, opt.text_rnn_size, opt.text_norm)
    text_encode_info += "_kernel_sizes_%s_num_%s" % (opt.text_kernel_sizes, opt.text_kernel_num)
    # video-side multi-level encoding info
    visual_encode_info = 'visual_feature_%s_visual_rnn_size_%d_l_2_norm_%d_visual_norm_%s' % \
                         (opt.visual_feature, opt.visual_rnn_size, opt.do_visual_feas_norm, opt.visual_norm)
    visual_encode_info += "_kernel_sizes_%s_num_%s" % (opt.visual_kernel_sizes, opt.visual_kernel_num)
    # common space learning info
    mapping_info = "mapping_text_%s_img_%s" % (opt.text_mapping_layers, opt.visual_mapping_layers)
    loss_info = 'loss_func_%s_margin_%s_direction_%s_max_violation_%s_cost_style_%s' % \
                (opt.loss_fun, opt.margin, opt.direction, opt.max_violation, opt.cost_style)
    optimizer_info = 'optimizer_%s_lr_%s_decay_%.2f_grad_clip_%.1f_val_metric_%s' % \
                     (opt.optimizer, opt.learning_rate, opt.lr_decay_rate, opt.grad_clip, opt.val_metric)

    opt.logger_name = os.path.join(rootpath, trainCollection, opt.cv_name, valCollection, model_info, text_encode_info,
                                   visual_encode_info, mapping_info, loss_info, optimizer_info, opt.postfix)
    print(opt.logger_name)

    if checkToSkip(os.path.join(opt.logger_name, 'model_best.pth.tar'), opt.overwrite):
        sys.exit(0)
    if checkToSkip(os.path.join(opt.logger_name, 'val_metric.txt'), opt.overwrite):
        sys.exit(0)
    makedirsforfile(os.path.join(opt.logger_name, 'val_metric.txt'))
    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    opt.text_kernel_sizes = map(int, opt.text_kernel_sizes.split('-'))
    opt.visual_kernel_sizes = map(int, opt.visual_kernel_sizes.split('-'))
    # collections: trian, val
    collections = {'train': trainCollection, 'val': valCollection}
    cap_file = {'train': '%s.caption.txt' % trainCollection,
                'val': '%s.caption.txt' % valCollection}
    # caption
    caption_files = {x: os.path.join(rootpath, collections[x], 'TextData', cap_file[x])
                     for x in collections}
    # Load visual features
    visual_feat_path = {x: os.path.join(rootpath, collections[x], 'FeatureData', opt.visual_feature)
                        for x in collections}
    visual_feats = {x: BigFile(visual_feat_path[x]) for x in visual_feat_path}
    opt.visual_feat_dim = visual_feats['train'].ndims

    # set bow vocabulary and encoding
    bow_vocab_file = os.path.join(rootpath, opt.trainCollection, 'TextData', 'vocabulary', 'bow', opt.vocab + '.pkl')
    bow_vocab = pickle.load(open(bow_vocab_file, 'rb'))
    bow2vec = get_text_encoder('bow')(bow_vocab)
    opt.bow_vocab_size = len(bow_vocab)

    # set rnn vocabulary 
    rnn_vocab_file = os.path.join(rootpath, opt.trainCollection, 'TextData', 'vocabulary', 'rnn', opt.vocab + '.pkl')
    rnn_vocab = pickle.load(open(rnn_vocab_file, 'rb'))
    opt.vocab_size = len(rnn_vocab)

    # initialize word embedding
    opt.we_parameter = None
    if opt.word_dim == 500:
        w2v_data_path = os.path.join(rootpath, "word2vec", 'flickr', 'vec500flickr30m')
        opt.we_parameter = get_we_parameter(rnn_vocab, w2v_data_path)

    # mapping layer structure
    opt.text_mapping_layers = map(int, opt.text_mapping_layers.split('-'))
    opt.visual_mapping_layers = map(int, opt.visual_mapping_layers.split('-'))
    if opt.concate == 'full':
        opt.text_mapping_layers[0] = opt.bow_vocab_size + opt.text_rnn_size * 2 + opt.text_kernel_num * len(
            opt.text_kernel_sizes)
        opt.visual_mapping_layers[0] = opt.visual_feat_dim + opt.visual_rnn_size * 2 + opt.visual_kernel_num * len(
            opt.visual_kernel_sizes)
    elif opt.concate == 'reduced':
        opt.text_mapping_layers[0] = opt.text_rnn_size * 2 + opt.text_kernel_num * len(opt.text_kernel_sizes)
        opt.visual_mapping_layers[0] = opt.visual_rnn_size * 2 + opt.visual_kernel_num * len(opt.visual_kernel_sizes)
    else:
        raise NotImplementedError('Model %s not implemented' % opt.model)

    # set data loader
    video2frames = {
        x: read_dict(os.path.join(rootpath, collections[x], 'FeatureData', opt.visual_feature, 'video2frames.txt'))
        for x in collections}
    data_loaders = data.get_data_loaders(
        caption_files, visual_feats, rnn_vocab, bow2vec, opt.batch_size, opt.workers, opt.n_caption,
        opt.do_visual_feas_norm,
        video2frames=video2frames)

    # Construct the model
    model = get_model(opt.model)(opt)
    opt.we_parameter = None

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})"
                  .format(opt.resume, start_epoch, best_rsum))
            validate(opt, data_loaders['val'], model, measure=opt.measure)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    no_impr_counter = 0
    lr_counter = 0
    best_epoch = None
    fout_val_metric_hist = open(os.path.join(opt.logger_name, 'val_metric_hist.txt'), 'w')
    for epoch in range(opt.num_epochs):
        print('Epoch[{0} / {1}] LR: {2}'.format(epoch, opt.num_epochs, get_learning_rate(model.optimizer)[0]))
        print('-' * 10)
        # train for one epoch
        train(opt, data_loaders['train'], model, epoch)

        # evaluate on validation set
        rsum = validate(opt, data_loaders['val'], model, measure=opt.measure)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        print(' * Current perf: {}'.format(rsum))
        print(' * Best perf: {}'.format(best_rsum))
        print('')
        fout_val_metric_hist.write('epoch_%d: %f\n' % (epoch, rsum))
        fout_val_metric_hist.flush()

        if is_best:
            save_checkpoint({
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            }, is_best, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/',
                best_epoch=best_epoch)
            best_epoch = epoch

        lr_counter += 1
        decay_learning_rate(opt, model.optimizer, opt.lr_decay_rate)
        if not is_best:
            # Early stop occurs if the validation performance does not improve in ten consecutive epochs
            no_impr_counter += 1
            if no_impr_counter > 10:
                save_checkpoint({
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'best_rsum': best_rsum,
                    'opt': opt,
                    'Eiters': model.Eiters,
                }, 0, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/')
                print('Early stopping happended.\n')
                break

            # When the validation performance decreased after an epoch,
            # we divide the learning rate by 2 and continue training;
            # but we use each learning rate for at least 3 epochs.
            if lr_counter > 2:
                decay_learning_rate(opt, model.optimizer, 0.5)
                lr_counter = 0
        else:
            no_impr_counter = 0
    save_checkpoint({
        'epoch': epoch + 1,
        'model': model.state_dict(),
        'best_rsum': best_rsum,
        'opt': opt,
        'Eiters': model.Eiters,
    }, 0, filename='checkpoint_epoch_%s.pth.tar' % epoch, prefix=opt.logger_name + '/')
    fout_val_metric_hist.close()

    print('best performance on validation: {}\n'.format(best_rsum))
    with open(os.path.join(opt.logger_name, 'val_metric.txt'), 'w') as fout:
        fout.write('best performance on validation: ' + str(best_rsum))
Ejemplo n.º 13
0
    def fit(self):
        config = self.config
        configure("{}".format(config.log_dir), flush_secs=5)

        num_steps_per_epoch = len(self.data_loader)
        cc = 0
        config.perceptual = False

        for epoch in range(self.start_epoch, config.max_epochs):
            for step, (example_image, example_lms, right_imgs, right_lmss,
                       wrong_imgs, wrong_lmss) in enumerate(self.data_loader):
                t1 = time.time()

                if config.cuda:
                    example_image = Variable(example_image).cuda()
                    example_lms = Variable(example_lms).cuda()
                    right_lmss = Variable(right_lmss).cuda()
                    right_imgs = Variable(right_imgs).cuda()
                    wrong_imgs = Variable(wrong_imgs).cuda()
                    wrong_lmss = Variable(wrong_lmss).cuda()

                else:
                    example_image = Variable(example_image)
                    example_lms = Variable(example_lms)
                    right_lmss = Variable(right_lmss)
                    right_imgs = Variable(right_imgs)
                    wrong_imgs = Variable(wrong_imgs)
                    wrong_lmss = Variable(wrong_lmss)

                fake_im = self.generator(example_image, right_lmss)
                real_im = right_imgs

                # train the discriminator

                D_real = self.discriminator(example_image, real_im, right_lmss)

                D_wrong = self.discriminator(example_image, real_im,
                                             wrong_lmss)

                D_fake = self.discriminator(example_image, fake_im.detach(),
                                            right_lmss)

                loss_real = self.bce_loss_fn(D_real, self.ones)
                loss_wrong = self.bce_loss_fn(D_wrong, self.zeros)
                loss_fake = self.bce_loss_fn(D_fake, self.zeros)

                loss_disc = loss_real + 0.5 * (loss_wrong + loss_fake)
                loss_disc.backward()
                self.opt_d.step()
                self._reset_gradients()

                # train the generator
                fake_im = self.generator(example_image, right_lmss)
                D_fake = self.discriminator(example_image, fake_im, right_lmss)

                loss_gen = self.bce_loss_fn(D_fake, self.ones)
                loss = loss_gen
                loss.backward()
                self.opt_g.step()
                self._reset_gradients()

                t2 = time.time()

                if (step + 1) % 10 == 0 or (step + 1) == num_steps_per_epoch:
                    steps_remain = num_steps_per_epoch - step + 1 + \
                        (config.max_epochs - epoch + 1) * num_steps_per_epoch
                    eta = int((t2 - t1) * steps_remain)
                    # if config.perceptual:
                    #     print("[{}/{}][{}/{}]   Loss_G: {:.4f}, loss_perceptual: {:.4f}  ETA: {} second"
                    #           .format(epoch+1, config.max_epochs,
                    #                   step+1, num_steps_per_epoch, loss_gen.data[0], loss_perc.data[0],  eta))
                    #     log_value('generator_loss',loss_gen.data[0] , step + num_steps_per_epoch * epoch)
                    # else:

                    print(
                        "[{}/{}][{}/{}]   Loss_G: {:.4f}, Loss_D: {:.4f},  ETA: {} second"
                        .format(epoch + 1, config.max_epochs, step + 1,
                                num_steps_per_epoch, loss_gen.data[0],
                                loss_disc.data[0], eta))
                if (step) % (num_steps_per_epoch / 50) == 0:
                    fake_store = fake_im.data.permute(
                        0, 2, 1, 3,
                        4).contiguous().view(config.batch_size * 16, 3, 64, 64)
                    torchvision.utils.save_image(fake_store,
                                                 "{}fake_{}.png".format(
                                                     config.sample_dir, cc),
                                                 nrow=16,
                                                 normalize=True)
                    real_store = right_imgs.data.permute(
                        0, 2, 1, 3,
                        4).contiguous().view(config.batch_size * 16, 3, 64, 64)
                    torchvision.utils.save_image(real_store,
                                                 "{}real_{}.png".format(
                                                     config.sample_dir, cc),
                                                 nrow=16,
                                                 normalize=True)
                    cc += 1

                    torch.save(
                        self.generator.state_dict(),
                        "{}/generator_{}.pth".format(config.model_dir, cc))
                    torch.save(
                        self.discriminator.state_dict(),
                        "{}/discriminator_{}.pth".format(config.model_dir, cc))
Ejemplo n.º 14
0
import numpy as np
from evaluate import evaluate
from args import vocab_pkl_path, train_caption_pkl_path, feature_h5_path
from args import num_epochs, batch_size, learning_rate, ss_factor
from args import projected_size, hidden_size, mid_size
from args import feature_size, max_frames, max_words
from args import use_cuda, use_checkpoint
from args import banet_pth_path, optimizer_pth_path
from args import best_banet_pth_path, best_optimizer_pth_path
from args import test_range, test_prediction_txt_path, test_reference_txt_path
from args import log_environment
from tensorboard_logger import configure, log_value
sys.path.append('./coco-caption/')
from pycocotools.coco import COCO

configure(log_environment, flush_secs=10)


# 加载词典
with open(vocab_pkl_path, 'rb') as f:
    vocab = pickle.load(f)
vocab_size = len(vocab)

# 构建模型
banet = BANet(feature_size, projected_size, mid_size, hidden_size,
              max_frames, max_words, vocab)


if os.path.exists(banet_pth_path) and use_checkpoint:
    banet.load_state_dict(torch.load(banet_pth_path))
if use_cuda:
Ejemplo n.º 15
0
import torch
import torch.nn as nn
from sklearn.utils import shuffle
from datasets import arxiv2
from transformers import GPT2Tokenizer, GPT2Model, GPT2LMHeadModel
from opt import OpenAIAdam
from text_utils import TextEncoder
from utils import (encode_dataset2, iter_data,
                   ResultLogger, make_path)
from loss import SummarizationLossCompute2
import pickle
from tensorboard_logger import configure, log_value



configure("./gpt2_analysis", flush_secs=5)

def transform_arxiv(X1,X2):
    n_batch = len(X1)
    delimiter = [encoder['<|TL;DR|>']]
    end_token = [encoder['<|endoftext|>']]
    xmb = np.zeros((n_batch, n_ctx), dtype=np.int32)
    mmb = np.zeros((n_batch, n_ctx), dtype=np.float32)
    for i, (x1,x2), in enumerate(zip(X1,X2)):
        new_x1 = x1[:800]
        new_x2 = x2[:200]
        x12 = new_x1 + delimiter
        x13 = new_x2 + end_token
        xmb[i,:len(x12)] = x12
        xmb[i,len(x12):len(x12)+len(x13)] = x13 
        mmb[i,:len(x12)] = 1
        print("Load checkpoint from: {}".format(checkpoint_path))
        if use_cuda:
            checkpoint = torch.load(checkpoint_path, map_location=torch.device('cuda'))
        else:
            checkpoint = torch.load(checkpoint_path, map_location=torch.device('cpu'))
        model.load_state_dict(checkpoint["state_dict"])
        # optimizer.load_state_dict(checkpoint["optimizer"])
        try:
            global_step = checkpoint["global_step"]
            global_epoch = checkpoint["global_epoch"]
        except:
            # TODO
            pass

    # Setup tensorboard logger
    tensorboard_logger.configure("log/run-test")

    print(hparams_debug_string())

    # Train!
    try:
        train(model, data_loader, optimizer,
              init_lr=hparams.initial_learning_rate,
              checkpoint_dir=checkpoint_dir,
              checkpoint_interval=hparams.checkpoint_interval,
              nepochs=hparams.nepochs,
              clip_thresh=hparams.clip_thresh)
    except:
        save_checkpoint(
            model, optimizer, global_step, checkpoint_dir, global_epoch)
        traceback.print_exc()
Ejemplo n.º 17
0
import torch
import numpy as np
from train_parameters import *
import torch.nn as nn
from torch import optim
import torch.nn.functional as F
from tqdm import tqdm
import os
from glob import glob
import random
import math
from tensorboard_logger import configure, log_value

configure("../../dataset/MSVD/tensorboard/run-1")

device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")

# GET VIDEO ID'S
video_ids_tr = os.listdir(caption_tr_path)
video_ids_tr = [item[:-3] for item in video_ids_tr]

video_ids_vl = os.listdir(caption_vl_path)
video_ids_vl = [item[:-3] for item in video_ids_vl]

all_video_ids = video_ids_tr + video_ids_vl

# Dictionary of word:vector
word2vec = torch.load(word2vec_path)
word2vec['SOS'] = np.zeros((word_dim))
word2vec['EOS'] = np.ones((word_dim))
Ejemplo n.º 18
0
args = parser.parse_args()

args.save = 'search-{}-{}'.format(args.save, time.strftime("%Y%m%d-%H%M%S"))
if args.debug:
    args.save += "_debug"
utils.create_exp_dir(args.save, scripts_to_save=glob.glob('*.py'))

log_format = '%(asctime)s %(message)s'
logging.basicConfig(stream=sys.stdout,
                    level=logging.INFO,
                    format=log_format,
                    datefmt='%m/%d %I:%M:%S %p')
fh = logging.FileHandler(os.path.join(args.save, 'log.txt'))
fh.setFormatter(logging.Formatter(log_format))
logging.getLogger().addHandler(fh)
configure(args.save + "/%s" % (args.name))

CIFAR_CLASSES = 10


def main():
    if not torch.cuda.is_available():
        logging.info('no gpu device available')
        sys.exit(1)

    np.random.seed(args.seed)
    torch.cuda.set_device(args.gpu)
    cudnn.benchmark = True
    torch.manual_seed(args.seed)
    cudnn.enabled = True
    torch.cuda.manual_seed(args.seed)
Ejemplo n.º 19
0
def main():
    global best_prec1
    if args.tensorboard:
        configure('log/'+args.arch.lower() + '_bs' + str(args.batch_size) + '_ep' + str(args.epochs) + '_loglr' + str(args.lr) +
                '_size' + str(args.img_size)+ '_wd' + str(args.weight_decay))
    print(args)
    # create model
    print("=> creating model '{}'".format(args.arch))
    
    #if args.arch.lower().startswith('resnet'):
    #    model.avgpool = nn.AvgPool2d(args.img_size // 32, 1)
    #model.fc = nn.Linear(model.fc.in_features, args.num_classes)

    # default parameter n_class=1000, input_size=224, width_mult=1.
    model = Ensemble()
    if not args.resume:
        model.MobileNetV2.load_state_dict(torch.load('mobilenet_pretrained.pth'))
        model.NASNetAMobile.load_state_dict(torch.load('nasnet_pretrained.pth'))

    model = torch.nn.DataParallel(model).cuda()
    print(model)
    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # Data loading code
    import datautil
    '''
    norm_dict = {
    #320:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[ 1.,  1.,  1.]),
    320:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[0.2295,  0.2385,  0.2479]),
    0:transforms.Normalize(mean=[0.485, 0.456, 0.406],std=[0.229, 0.224, 0.225]),
    256:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[ 0.2295,  0.2385,  0.2479]),
    224:transforms.Normalize(mean=[0.4333,0.4429,0.4313],std=[ 0.2295,  0.2385,  0.2479]),
    }
    norm_default = norm_dict[0]
    normalize = norm_dict[args.img_size]

    currrent 
    tensor([[ 0.4828,  0.4693,  0.4602]], device='cuda:0')
    tensor([[ 45.3332,  41.1241,  45.7719]], device='cuda:0')

    '''
    #normalize = transforms.Normalize(mean=[0.48280172,0.46929353,0.46019437],std=[0.25859008,0.28414325,0.288328])
    normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406],
    std=[0.229, 0.224, 0.225])
    train_data = datautil.SceneDataset(args.data,img_transform=
                                             transforms.Compose([
                                             transforms.RandomResizedCrop(args.img_size),
                                             transforms.RandomHorizontalFlip(),
                                             transforms.ToTensor(),
                                             normalize]))


    train_loader = torch.utils.data.DataLoader(train_data,batch_size=args.batch_size,shuffle=True,num_workers=args.workers,pin_memory=True)
    if args.val:
        val_data = datautil.SceneDataset(args.val, img_transform=
                                        transforms.Compose([
                                            #transforms.Scale(256),
                                            transforms.Resize((args.img_size,args.img_size)),
                                            transforms.ToTensor(),
                                            normalize]))
        val_loader = torch.utils.data.DataLoader(val_data, batch_size=args.batch_size//2, shuffle=False,
                                             num_workers=args.workers, pin_memory=True)
    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    # optimizer = torch.optim.Adam(model.parameters(), args.lr, weight_decay=args.weight_decay)

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                 momentum=args.momentum,
                                weight_decay=args.weight_decay)
    #optimizer = torch.optim.RMSprop(model.parameters(), args.lr,
    #                             momentum=args.momentum,
    #                            weight_decay=args.weight_decay,eps=1)

    if args.evaluate:
        validate(val_loader, model, criterion,0)
        return

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        if args.val:
            prec1 = validate(val_loader, model, criterion,epoch)

        # remember best prec@1 and save checkpoint
            is_best = prec1 > best_prec1
            best_prec1 = max(prec1, best_prec1)
        if epoch % args.interval == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'arch': args.arch,
                'state_dict': model.state_dict(),
            })
    args.src_dataset, args.src_split, args.tgt_dataset, args.tgt_split, args.input_ch)
if args.net in ["fcn", "psp"]:
    model_name = "%s-%s-%s-res%s" % (args.method, args.savename, args.net, args.res)
else:
    model_name = "%s-%s-%s" % (args.method, args.savename, args.net)

outdir = os.path.join(args.base_outdir, mode)

# Create Model Dir
pth_dir = os.path.join(outdir, "pth")
mkdir_if_not_exist(pth_dir)

# Create Model Dir and  Set TF-Logger
tflog_dir = os.path.join(outdir, "tflog", model_name)
mkdir_if_not_exist(tflog_dir)
configure(tflog_dir, flush_secs=5)

# Save param dic
if resume_flg:
    json_fn = os.path.join(outdir, "param-%s_resume.json" % model_name)
else:
    json_fn = os.path.join(outdir, "param-%s.json" % model_name)

check_if_done(json_fn)
save_dic_to_json(args.__dict__, json_fn)

train_img_shape = tuple([int(x) for x in args.train_img_shape])

use_crop = True if args.crop_size > 0 else False
joint_transform = get_joint_transform(crop_size=args.crop_size, rotate_angle=args.rotate_angle) if use_crop else None
def main():
    global args, best_prec1
    args = parser.parse_args()
    # torch.cuda.set_device(args.gpu)
    if args.tensorboard:
        print("Using TensorBoard")
        configure("exp/%s" % (args.name))

    # Data loading code
    if args.augment:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            transforms.Lambda(lambda x: F.pad(
                Variable(x.unsqueeze(0), requires_grad=False, volatile=True),
                (4, 4, 4, 4),
                mode='replicate').data.squeeze()),
            transforms.ToPILImage(),
            transforms.RandomCrop(32),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
        ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
        ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    kwargs = {'num_workers': 1, 'pin_memory': True}
    assert (args.dataset == 'cifar10' or args.dataset == 'cifar100')
    train_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=True,
                                                download=True,
                                                transform=transform_train),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)
    val_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=False,
                                                transform=transform_test),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)

    # create model
    model = WideResNetMulti(args.layers,
                            args.dataset == 'cifar10' and 10 or 100,
                            args.num_rotate_classes,
                            args.widen_factor,
                            dropRate=args.droprate)

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # for training on multiple GPUs.
    # Use CUDA_VISIBLE_DEVICES=0,1 to specify which GPUs to use
    # model = torch.nn.DataParallel(model).cuda()
    model = model.cuda()

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # define loss function (criterion) and optimizer
    criterion = nn.CrossEntropyLoss().cuda()
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=args.nesterov,
                                weight_decay=args.weight_decay)

    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch + 1)

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
    print 'Best accuracy: ', best_prec1
Ejemplo n.º 22
0
def main():
    # Hyper Parameters
    parser = arguments.get_argument_parser()
    opt = parser.parse_args()

    if not os.path.exists(opt.model_name):
        os.makedirs(opt.model_name)
    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    logger = logging.getLogger(__name__)
    logger.info(opt)

    # Load Tokenizer and Vocabulary
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
    vocab = tokenizer.vocab
    opt.vocab_size = len(vocab)

    train_loader, val_loader = image_caption.get_loaders(
        opt.data_path, opt.data_name, tokenizer, opt.batch_size, opt.workers,
        opt)

    model = VSEModel(opt)

    lr_schedules = [
        opt.lr_update,
    ]

    # optionally resume from a checkpoint
    start_epoch = 0
    if opt.resume:
        if os.path.isfile(opt.resume):
            logger.info("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            if not model.is_data_parallel:
                model.make_data_parallel()
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another training
            model.Eiters = checkpoint['Eiters']
            logger.info(
                "=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                    opt.resume, start_epoch, best_rsum))
            # validate(opt, val_loader, model)
            if opt.reset_start_epoch:
                start_epoch = 0
        else:
            logger.info("=> no checkpoint found at '{}'".format(opt.resume))

    if not model.is_data_parallel:
        model.make_data_parallel()

    # Train the Model
    best_rsum = 0
    for epoch in range(start_epoch, opt.num_epochs):
        logger.info(opt.logger_name)
        logger.info(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch, lr_schedules)

        if epoch >= opt.vse_mean_warmup_epochs:
            opt.max_violation = True
            model.set_max_violation(opt.max_violation)

        # Set up the all warm-up options
        if opt.precomp_enc_type == 'backbone':
            if epoch < opt.embedding_warmup_epochs:
                model.freeze_backbone()
                logger.info(
                    'All backbone weights are frozen, only train the embedding layers'
                )
            else:
                model.unfreeze_backbone(3)

            if epoch < opt.embedding_warmup_epochs:
                logger.info('Warm up the embedding layers')
            elif epoch < opt.embedding_warmup_epochs + opt.backbone_warmup_epochs:
                model.unfreeze_backbone(
                    3)  # only train the last block of resnet backbone
            elif epoch < opt.embedding_warmup_epochs + opt.backbone_warmup_epochs * 2:
                model.unfreeze_backbone(2)
            elif epoch < opt.embedding_warmup_epochs + opt.backbone_warmup_epochs * 3:
                model.unfreeze_backbone(1)
            else:
                model.unfreeze_backbone(0)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint.pth'.format(epoch),
            prefix=opt.model_name + '/')
Ejemplo n.º 23
0
    opt = args()
    opt.data_r = opt.dataset

    if opt.data_r == 'MNIST':  ##
        tsfm = transforms.Compose([
            transforms.Resize(opt.image_size),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])
    else:  ##
        tsfm = transforms.Compose([
            transforms.Resize(opt.image_size),
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)),
        ])

    torch.cuda.manual_seed(opt.manual_seed)
    opt.dataset = os.path.join(opt.dataset, opt.dataset + '_' + str(opt.p2))
    #opt.datasetNIST/MNIST_1.0
    configure(os.path.join(opt.savingroot, opt.dataset,
                           str(opt.p1 * 100) + '%complementary/' + '/logs'),
              flush_secs=5)

    ##tensorboard_logger.configure(logdir, flush_secs=2)
    ##Configure logging: a file will be written to logdir, and flushed every flush_secs

    train_gan(opt)
Ejemplo n.º 24
0
from torch.autograd import Variable
import torch.nn as nn
import torch.utils.data as dd
import torch

# Setup arguments and constants ===============================================
# %%
params = parser.parse_args()
try:
    os.mkdir(params.checkpoint_dir)
    os.mkdir(params.log_dir)
except FileExistsError:
    pass

configure(params.log_dir)

input_dim = 300
output_dim = 512

print('Starting with params:', params)

# Pre-process data and create dataloaders =====================================
# %%
print('Pre-processing data')
(train, valid, test) = data.make_dataset(params)

train_loader = dd.DataLoader(dataset=train,
                             batch_size=params.batch_size,
                             shuffle=True)
valid_loader = dd.DataLoader(dataset=valid,
Ejemplo n.º 25
0
else:
    config = read_config.Config("config.yml")

model_name = config.model_path.format(config.proportion,
                                      config.top_k,
                                      config.hidden_size,
                                      config.batch_size,
                                      config.optim, config.lr,
                                      config.weight_decay,
                                      config.dropout,
                                      "mix",
                                      config.mode)
print(config.config)

config.write_config("log/configs/{}_config.json".format(model_name))
configure("log/tensorboard/{}".format(model_name), flush_secs=5)


callback = Callbacks(config.batch_size, "log/db/{}".format(model_name))
callback.add_element(["train_loss", "test_loss", "train_mse", "test_mse"])

data_labels_paths = {3: "data/one_op/expressions.txt",
                     5: "data/two_ops/expressions.txt",
                     7: "data/three_ops/expressions.txt"}

proportion = config.proportion  # proportion is in percentage. vary from [1, 100].

# First is training size and second is validation size per program length
dataset_sizes = {
    3: [proportion * 250, proportion * 50],
    5: [proportion * 1000, proportion * 100],
Ejemplo n.º 26
0

	# set output path ==========================================================
	path_out = '../trained_models/batch12_/' + args.path_out

	if not os.path.exists(path_out):
		# create output path
		os.makedirs(path_out)

		# create output for models
		path_models = os.path.join(path_out, 'models')
		if not os.path.exists(path_models):
			os.makedirs(path_models)

	# tensorboard
	configure("{}".format(path_out), flush_secs=5)

	# data =====================================================================
	batch_size = args.batch_size
	n_epochs = args.n_epochs
	lr = args.lr
	DEPTH = args.depth
	AUGMENT = args.augment
	COORD = args.coord
	FLOW = args.flow
	# Datasets for DHF1K
	ds_train = DHF1K(mode=TRAIN, transformation=True, depth=DEPTH, d_augm=AUGMENT, coord=COORD)
	ds_validate = DHF1K(mode=VAL, transformation=True, depth=DEPTH, d_augm=AUGMENT, coord=COORD)

	# Dataloaders
	dataloader = {
Ejemplo n.º 27
0
from __future__ import print_function
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
from torchvision import datasets, transforms
from torch.autograd import Variable
import numpy as np
import models.joint_resnet3
from tensorboard_logger import configure, log_value
import config

use_cuda = torch.cuda.is_available()
OUTPATH = './checkpoint/checkpoint_joint3'
configure("runs/run-joint3", flush_secs=5)
EPOCH = config.EPOCH
BATCH = config.BATCH

# Training dataset
train_loader = torch.utils.data.DataLoader(datasets.CIFAR100(
    root='.',
    train=True,
    download=True,
    transform=transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.5071, 0.4867, 0.4408),
                             (0.2675, 0.2565, 0.2761)),
    ])),
Ejemplo n.º 28
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--data_path',
        default='/data3/zhangyf/cross_modal_retrieval/SCAN/data',
        help='path to datasets')
    parser.add_argument('--data_name',
                        default='f30k_precomp',
                        help='{coco,f30k}_precomp')
    parser.add_argument(
        '--vocab_path',
        default='/data3/zhangyf/cross_modal_retrieval/SCAN/vocab/',
        help='Path to saved vocabulary json files.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=20,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--decoder_dim',
                        default=512,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=10,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=4,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=30,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='./runs/runX/log',
                        help='Path to save Tensorboard log.')
    parser.add_argument('--model_name',
                        default='./runs/runX/checkpoint',
                        help='Path to save the model.')
    parser.add_argument(
        '--resume',
        default=
        '/data3/zhangyf/cross_modal_retrieval/vsepp_next_train_12_31_f30k/run/coco_vse++_ft_128_f30k_next/model_best.pth.tar',
        type=str,
        metavar='PATH',
        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--no_txtnorm',
                        action='store_true',
                        help='Do not normalize the text embeddings.')
    parser.add_argument('--precomp_enc_type',
                        default="basic",
                        help='basic|weight_norm')
    parser.add_argument('--reset_train',
                        action='store_true',
                        help='Ensure the training is always done in '
                        'train mode (Not recommended).')
    parser.add_argument('--finetune',
                        action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--cnn_type',
                        default='resnet152',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--crop_size',
                        default=224,
                        type=int,
                        help='Size of an image crop as the CNN input.')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(
        open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name),
             'rb'))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)

    # Construct the model
    model = SCAN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        bset_rsum = train(opt, train_loader, model, epoch, val_loader,
                          best_rsum)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}.pth.tar'.format(epoch),
            prefix=opt.model_name + '/')
def main():
    global args, best_prec1
    args = parser.parse_args()
    # torch.cuda.set_device(args.gpu)
    if args.tensorboard:
        print("Using TensorBoard")
        configure("exp/%s" % (args.name))

    # Data loading code
    transform_train = transforms.Compose([
        transforms.Pad(4, padding_mode='edge'),
        transforms.RandomCrop(32),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ])
    transform_test = transforms.Compose([
        transforms.ToTensor(),
    ])

    kwargs = {'num_workers': 1, 'pin_memory': True}
    train_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=True,
                                                download=True,
                                                transform=transform_train),
        batch_size=args.batch_size,
        shuffle=True,
        **kwargs)
    val_loader = torch.utils.data.DataLoader(
        datasets.__dict__[args.dataset.upper()]('../data',
                                                train=False,
                                                transform=transform_test),
        batch_size=args.batch_size,
        shuffle=False,
        **kwargs)

    # create model
    model = get_model(args.arch, args.dataset, args.num_rotate_classes)
    model = model.cuda()

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    # define optimizer
    optimizer = torch.optim.SGD(model.parameters(),
                                args.lr,
                                momentum=args.momentum,
                                nesterov=args.nesterov,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            best_prec1 = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['model_state'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    cudnn.benchmark = True

    # define learning rate scheduler
    if not args.milestones:
        milestones = [args.epochs]
    else:
        milestones = args.milestones
    scheduler = lr_scheduler.MultiStepLR(optimizer,
                                         milestones=milestones,
                                         gamma=args.gamma,
                                         last_epoch=args.start_epoch - 1)

    # define loss function (criterion)
    criterion = nn.CrossEntropyLoss().cuda()

    for epoch in range(args.start_epoch, args.epochs):
        scheduler.step()

        # train for one epoch
        train(train_loader, model, criterion, optimizer, epoch)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, epoch)

        # remember best prec@1 and save checkpoint
        is_best = prec1 > best_prec1
        best_prec1 = max(prec1, best_prec1)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model_state': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'best_prec1': best_prec1,
            }, is_best)
    print('Best accuracy: {}'.format(best_prec1))
Ejemplo n.º 30
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='./data/',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='precomp',
                        help='{coco,f30k}_precomp')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocabulary json files.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=30,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=15,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='./runs/runX/log',
                        help='Path to save Tensorboard log.')
    parser.add_argument('--model_name',
                        default='./runs/runX/checkpoint',
                        help='Path to save the model.')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--no_txtnorm',
                        action='store_true',
                        help='Do not normalize the text embeddings.')
    parser.add_argument(
        '--raw_feature_norm',
        default="clipped_l2norm",
        help='clipped_l2norm|l2norm|clipped_l1norm|l1norm|no_norm|softmax')
    parser.add_argument('--agg_func',
                        default="LogSumExp",
                        help='LogSumExp|Mean|Max|Sum')
    parser.add_argument('--cross_attn', default="t2i", help='t2i|i2t')
    parser.add_argument('--precomp_enc_type',
                        default="basic",
                        help='basic|weight_norm')
    parser.add_argument('--bi_gru',
                        action='store_true',
                        help='Use bidirectional GRU.')
    parser.add_argument('--lambda_lse',
                        default=6.,
                        type=float,
                        help='LogSumExp temp.')
    parser.add_argument('--lambda_softmax',
                        default=9.,
                        type=float,
                        help='Attention softmax temperature.')
    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = deserialize_vocab(
        os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)

    # Construct the model
    model = SCAN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}.pth.tar'.format(epoch),
            prefix=opt.model_name + '/')
Ejemplo n.º 31
0
    def __init__(self, config, data_loader):
        """
        Construct a new Trainer instance.

        Params
        ------
        - config: object containing command line arguments.
        - data_loader: data iterator
        """
        self.config = config
        if config.is_train:
            self.train_loader = data_loader[0]
            self.valid_loader = data_loader[1]
        else:
            self.test_loader = data_loader

        # network params
        self.num_blocks = config.num_blocks
        self.num_layers_total = config.num_layers_total
        self.growth_rate = config.growth_rate
        self.bottleneck = config.bottleneck
        self.theta = config.compression

        # training params
        self.epochs = config.epochs
        self.start_epoch = 0
        self.best_valid_acc = 0.
        self.init_lr = config.init_lr
        self.lr = self.init_lr
        self.is_decay = True
        self.momentum = config.momentum
        self.weight_decay = config.weight_decay
        self.dropout_rate = config.dropout_rate
        if config.lr_sched == '':
            self.is_decay = False
        else:
            self.lr_decay = [float(x) for x in config.lr_sched.split(',')]

        # other params
        self.ckpt_dir = config.ckpt_dir
        self.logs_dir = config.logs_dir
        self.num_gpu = config.num_gpu
        self.use_tensorboard = config.use_tensorboard
        self.resume = config.resume
        self.print_freq = config.print_freq
        self.dataset = config.dataset
        if self.dataset == 'cifar10':
            self.num_classes = 10
        elif self.dataset == 'cifar100':
            self.num_classes = 100
        else:
            self.num_classes = 1000

        # build densenet model
        self.model = DenseNet(self.num_blocks, self.num_layers_total,
            self.growth_rate, self.num_classes, self.bottleneck, 
                self.dropout_rate, self.theta)

        print('[*] Number of model parameters: {:,}'.format(
            sum([p.data.nelement() for p in self.model.parameters()])))

        # define loss and optimizer
        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.SGD(self.model.parameters(), lr=self.init_lr,
                momentum=self.momentum, weight_decay=self.weight_decay)

        if self.num_gpu > 0:
            self.model.cuda()
            self.criterion.cuda()

        # finally configure tensorboard logging
        if self.use_tensorboard:
            tensorboard_dir = self.logs_dir + self.get_model_name()
            print('[*] Saving tensorboard logs to {}'.format(tensorboard_dir))
            if not os.path.exists(tensorboard_dir):
                os.makedirs(tensorboard_dir)
            configure(tensorboard_dir)
Ejemplo n.º 32
0
    def __init__(self, config, data_loader):
        """
        Construct a new Trainer instance.

        Args
        ----
        - config: object containing command line arguments.
        - data_loader: data iterator
        """
        self.config = config

        # glimpse network params
        self.patch_size = config.patch_size
        self.glimpse_scale = config.glimpse_scale
        self.num_patches = config.num_patches
        self.loc_hidden = config.loc_hidden
        self.glimpse_hidden = config.glimpse_hidden

        # core network params
        self.num_glimpses = config.num_glimpses
        self.hidden_size = config.hidden_size

        # reinforce params
        self.std = config.std
        self.M = config.M

        # data params
        if config.is_train:
            self.train_loader = data_loader[0]
            self.valid_loader = data_loader[1]
            self.num_train = len(self.train_loader.sampler.indices)
            self.num_valid = len(self.valid_loader.sampler.indices)
        else:
            self.test_loader = data_loader
            self.num_test = len(self.test_loader.dataset)
        self.num_classes = 10
        self.num_channels = 1

        # training params
        self.epochs = config.epochs
        self.start_epoch = 0
        self.momentum = config.momentum
        self.lr = config.init_lr

        # misc params
        self.use_gpu = config.use_gpu
        self.best = config.best
        self.ckpt_dir = config.ckpt_dir
        self.logs_dir = config.logs_dir
        self.best_valid_acc = 0.
        self.counter = 0
        self.lr_patience = config.lr_patience
        self.train_patience = config.train_patience
        self.use_tensorboard = config.use_tensorboard
        self.resume = config.resume
        self.print_freq = config.print_freq
        self.plot_freq = config.plot_freq
        self.model_name = 'ram_{}_{}x{}_{}'.format(
            config.num_glimpses, config.patch_size,
            config.patch_size, config.glimpse_scale
        )

        self.plot_dir = './plots/' + self.model_name + '/'
        if not os.path.exists(self.plot_dir):
            os.makedirs(self.plot_dir)

        # configure tensorboard logging
        if self.use_tensorboard:
            tensorboard_dir = self.logs_dir + self.model_name
            print('[*] Saving tensorboard logs to {}'.format(tensorboard_dir))
            if not os.path.exists(tensorboard_dir):
                os.makedirs(tensorboard_dir)
            configure(tensorboard_dir)

        # build RAM model
        self.model = RecurrentAttention(
            self.patch_size, self.num_patches, self.glimpse_scale,
            self.num_channels, self.loc_hidden, self.glimpse_hidden,
            self.std, self.hidden_size, self.num_classes,
        )
        if self.use_gpu:
            self.model.cuda()

        print('[*] Number of model parameters: {:,}'.format(
            sum([p.data.nelement() for p in self.model.parameters()])))

        # # initialize optimizer and scheduler
        # self.optimizer = optim.SGD(
        #     self.model.parameters(), lr=self.lr, momentum=self.momentum,
        # )
        # self.scheduler = ReduceLROnPlateau(
        #     self.optimizer, 'min', patience=self.lr_patience
        # )
        self.optimizer = optim.Adam(
            self.model.parameters(), lr=3e-4,
        )
def main():
    if args.tensorboard: configure("runs/%s"%(args.name))

    if args.augment:
        transform_train = transforms.Compose([
            transforms.RandomCrop(32, padding=4),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            ])
    else:
        transform_train = transforms.Compose([
            transforms.ToTensor(),
            ])

    transform_test = transforms.Compose([
        transforms.ToTensor(),
        ])

    kwargs = {'num_workers': 1, 'pin_memory': True}

    if args.in_dataset == "CIFAR-10":
        # Data loading code
        normalizer = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]],
                                         std=[x/255.0 for x in [63.0, 62.1, 66.7]])
        train_loader = torch.utils.data.DataLoader(
            torchvision.datasets.ImageFolder('./datasets/row_train_data/CIFAR-10', transform=transform_train),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR10('./datasets/cifar10', train=False, transform=transform_test),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        num_classes = 10
        lr_schedule=[50, 75, 90]
    elif args.in_dataset == "CIFAR-100":
        # Data loading code
        normalizer = transforms.Normalize(mean=[x/255.0 for x in [125.3, 123.0, 113.9]],
                                         std=[x/255.0 for x in [63.0, 62.1, 66.7]])
        train_loader = torch.utils.data.DataLoader(
            torchvision.datasets.ImageFolder('./datasets/row_train_data/CIFAR-100', transform=transform_train),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        val_loader = torch.utils.data.DataLoader(
            datasets.CIFAR100('./datasets/cifar100', train=False, transform=transform_test),
            batch_size=args.batch_size, shuffle=True, **kwargs)

        num_classes = 100
        lr_schedule=[50, 75, 90]
    elif args.in_dataset == "SVHN":
        # Data loading code
        normalizer = None
        transform = transforms.Compose([transforms.ToTensor(),])
        train_loader = torch.utils.data.DataLoader(
            torchvision.datasets.ImageFolder('./datasets/row_train_data/SVHN', transform=transform),
            batch_size=args.batch_size, shuffle=True, **kwargs)
        val_loader = torch.utils.data.DataLoader(
            svhn.SVHN('datasets/svhn/', split='test',
                                  transform=transforms.ToTensor(), download=False),
            batch_size=args.batch_size, shuffle=False, **kwargs)

        args.epochs = 20
        args.save_epoch = 2
        lr_schedule=[10, 15, 18]
        num_classes = 10

    # create model
    if args.model_arch == 'densenet':
        model = dn.DenseNet3(args.layers, num_classes + 1, args.growth, reduction=args.reduce,
                             bottleneck=args.bottleneck, dropRate=args.droprate, normalizer=normalizer)
    elif args.model_arch == 'wideresnet':
        model = wn.WideResNet(args.depth, num_classes + 1, widen_factor=args.width, dropRate=args.droprate, normalizer=normalizer)
    else:
        assert False, 'Not supported model arch: {}'.format(args.model_arch)

    attack = LinfPGDAttack(model = model, eps=args.epsilon, nb_iter=args.iters, eps_iter=args.iter_size, rand_init=True, targeted=True, num_classes=num_classes+1, loss_func='CE', elementwise_best=True)

    # get the number of model parameters
    print('Number of model parameters: {}'.format(
        sum([p.data.nelement() for p in model.parameters()])))

    model = model.cuda()

    cudnn.benchmark = True

    # define loss function (criterion) and pptimizer
    criterion = nn.CrossEntropyLoss().cuda()

    optimizer = torch.optim.SGD(model.parameters(), args.lr,
                                momentum=args.momentum,
                                nesterov=True,
                                weight_decay=args.weight_decay)

    # optionally resume from a checkpoint
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume)
            args.start_epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['state_dict'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))


    for epoch in range(args.start_epoch, args.epochs):
        adjust_learning_rate(optimizer, epoch, lr_schedule)

        # train for one epoch
        train_rowl(train_loader, model, criterion, optimizer, epoch, num_classes, attack)

        # evaluate on validation set
        prec1 = validate(val_loader, model, criterion, num_classes, epoch)

        # remember best prec@1 and save checkpoint
        if (epoch + 1) % args.save_epoch == 0:
            save_checkpoint({
                'epoch': epoch + 1,
                'state_dict': model.state_dict(),
            }, epoch + 1)
Ejemplo n.º 34
0
def test(rank, args, shared_model, dtype):
    test_ctr = 0
    torch.manual_seed(args.seed + rank)

    # set up logger
    timestring = str(date.today()) + '_' + \
        time.strftime("%Hh-%Mm-%Ss", time.localtime(time.time()))
    run_name = args.save_name + '_' + timestring
    configure("logs/run_" + run_name, flush_secs=5)

    env = LoveLetterEnv(AgentRandom(args.seed + rank), args.seed + rank)
    env.seed(args.seed + rank)
    state = env.reset()

    model = ActorCritic(state.shape[0], env.action_space).type(dtype)

    model.eval()

    state = torch.from_numpy(state).type(dtype)
    reward_sum = 0
    max_reward = -99999999
    max_winrate = 0
    rewards_recent = deque([], 100)
    done = True

    start_time = time.time()

    episode_length = 0
    while True:
        episode_length += 1
        # Sync with the shared model
        if done:
            model.load_state_dict(shared_model.state_dict())
            cx = Variable(torch.zeros(1, 256).type(dtype), volatile=True)
            hx = Variable(torch.zeros(1, 256).type(dtype), volatile=True)
        else:
            cx = Variable(cx.data.type(dtype), volatile=True)
            hx = Variable(hx.data.type(dtype), volatile=True)

        value, logit, (hx, cx) = model((Variable(state.unsqueeze(0),
                                                 volatile=True), (hx, cx)))
        prob = F.softmax(logit)
        action = prob.max(1)[1].data.cpu().numpy()

        state, reward, done, _ = env.step(action[0, 0])
        done = done or episode_length >= args.max_episode_length
        reward_sum += reward

        if done:
            rewards_recent.append(reward_sum)
            rewards_recent_avg = sum(rewards_recent) / len(rewards_recent)
            print(
                "{} | Episode Reward {: >4}, Length {: >2} | Avg Reward {:0.2f}"
                .format(
                    time.strftime("%Hh %Mm %Ss",
                                  time.gmtime(time.time() - start_time)),
                    reward_sum, episode_length, rewards_recent_avg))

            # if not stuck or args.evaluate:
            log_value('Reward', reward_sum, test_ctr)
            log_value('Reward Average', rewards_recent_avg, test_ctr)
            log_value('Episode length', episode_length, test_ctr)

            if reward_sum >= max_reward:
                # pickle.dump(shared_model.state_dict(), open(args.save_name + '_max' + '.p', 'wb'))
                path_output = args.save_name + '_max'
                torch.save(shared_model.state_dict(), path_output)
                path_now = "{}_{}".format(args.save_name,
                                          datetime.datetime.now().isoformat())
                torch.save(shared_model.state_dict(), path_now)
                max_reward = reward_sum

                win_rate_v_random = Arena.compare_agents_float(
                    lambda seed: AgentA3C(path_output, dtype, seed),
                    lambda seed: AgentRandom(seed), 800)
                msg = " {} | VsRandom: {: >4}%".format(
                    datetime.datetime.now().strftime("%c"),
                    round(win_rate_v_random * 100, 2))
                print(msg)
                log_value('Win Rate vs Random', win_rate_v_random, test_ctr)
                if win_rate_v_random > max_winrate:
                    print("Found superior model at {}".format(
                        datetime.datetime.now().isoformat()))
                    torch.save(
                        shared_model.state_dict(), "{}_{}_best_{}".format(
                            args.save_name,
                            datetime.datetime.now().isoformat(),
                            win_rate_v_random))
                    max_winrate = win_rate_v_random

            reward_sum = 0
            episode_length = 0
            state = env.reset()
            test_ctr += 1

            if test_ctr % 10 == 0 and not args.evaluate:
                # pickle.dump(shared_model.state_dict(), open(args.save_name + '.p', 'wb'))
                torch.save(shared_model.state_dict(), args.save_name)
            if not args.evaluate:
                time.sleep(60)
            elif test_ctr == evaluation_episodes:
                # Ensure the environment is closed so we can complete the
                # submission
                env.close()
                # gym.upload('monitor/' + run_name, api_key=api_key)

        state = torch.from_numpy(state).type(dtype)
Ejemplo n.º 35
0
os.mkdir("%s/run-%d/images" % (param.output_folder, run))
os.mkdir("%s/run-%d/models" % (param.output_folder, run))

# where we save the output
log_output = open("%s/run-%d/logs/log.txt" % (param.output_folder, run), 'w')
print(param)
print(param, file=log_output)

import numpy
import torch
import torch.autograd as autograd
from torch.autograd import Variable

# For plotting the Loss of D and G using tensorboard
from tensorboard_logger import configure, log_value
configure("%s/run-%d/logs" % (param.output_folder, run), flush_secs=5)

import torchvision
import torchvision.datasets as dset
import torchvision.transforms as transf
import torchvision.models as models
import torchvision.utils as vutils

if param.cuda:
	import torch.backends.cudnn as cudnn
	cudnn.benchmark = True

# To see images
from IPython.display import Image
to_img = transf.ToPILImage()
Ejemplo n.º 36
0
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

from tensorboard_logger import configure, log_value

from utils import spacy_tokenizer, file_stats
from utils import create_word_vocab, create_weights
from utils import to_tensor

from model import ATAE_LSTM

configure("runs/model_pd_10", flush_secs=5)

###################################################################
########################## Data ###################################
################################################################### 

restaurant_train_file = 'Data/Restaurant/restaurant_train.json'
restaurant_test_file =  'Data/Restaurant/restaurant_test.json'
pd_data = 'Data/targeted_data.json'

###################################################################
##################### Vocab Initialization ########################
###################################################################
words = []
word2idx = {}
words.append('<pad>')