コード例 #1
0
ファイル: train.py プロジェクト: shenkev/vsepp
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='/media/shenkev/data/Ubuntu/vsepp/data/data',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='coco_precomp',
                        help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=30,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--crop_size',
                        default=224,
                        type=int,
                        help='Size of an image crop as the CNN input.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=15,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='runs/coco_vse',
                        help='Path to save the model and Tensorboard log.')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        default=True,
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=4096,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--finetune',
                        action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--cnn_type',
                        default='vgg19',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--use_restval',
                        action='store_true',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(
        open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name),
             'rb'))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.crop_size, opt.batch_size,
                                                opt.workers, opt)

    # Construct the model
    model = VSE(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            prefix=opt.logger_name + '/')
コード例 #2
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = MobileNet2(input_size=args.input_size, scale=args.scaling)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(MobileNet2,
                                    args.batch_size // len(args.gpus) if args.gpus is not None else args.batch_size,
                                    device, dtype, args.input_size, 3, args.scaling)))

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size, args.batch_size, args.input_size,
                                           args.workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), args.learning_rate, momentum=args.momentum, weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=args.min_lr,
                        max_lr=args.max_lr, step_size=args.epochs_per_step * len(train_loader), mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer, base_lr=args.min_lr, max_lr=args.max_lr,
                             step_size=args.epochs_per_step * len(train_loader), mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer, milestones=args.schedule, gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})"
                  .format(args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.input_size in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.input_size]:
            claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
            claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling]
            csv_logger.write_text(
                'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))
    train_network(args.start_epoch, args.epochs, scheduler, model, train_loader, val_loader, optimizer, criterion,
                  device, dtype, args.batch_size, args.log_interval, csv_logger, save_path, claimed_acc1, claimed_acc5,
                  best_test)
コード例 #3
0
def main():
    opt = get_opt()
    tb_logger.configure(opt.logger_name, flush_secs=5, opt=opt)
    logfname = os.path.join(opt.logger_name, 'log.txt')
    logging.basicConfig(filename=logfname,
                        format='%(asctime)s %(message)s',
                        level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler(sys.stdout))

    logging.info(str(opt.d))

    torch.manual_seed(opt.seed)
    if opt.cuda:
        # TODO: remove deterministic
        torch.backends.cudnn.deterministic = True
        torch.cuda.manual_seed(opt.seed)
        np.random.seed(opt.seed)
    # helps with wide-resnet by reducing memory and time 2x
    cudnn.benchmark = True

    train_loader, test_loader, train_test_loader = get_loaders(opt)

    if opt.epoch_iters == 0:
        opt.epoch_iters = int(
            np.ceil(1. * len(train_loader.dataset) / opt.batch_size))
    opt.maxiter = opt.epoch_iters * opt.epochs
    if opt.g_epoch:
        opt.gvar_start *= opt.epoch_iters
        opt.g_optim_start = (opt.g_optim_start * opt.epoch_iters) + 1

    model = models.init_model(opt)

    optimizer = OptimizerFactory(model, train_loader, tb_logger, opt)
    epoch = 0
    save_checkpoint = utils.SaveCheckpoint()

    # optionally resume from a checkpoint
    if not opt.noresume:
        model_path = os.path.join(opt.logger_name, opt.ckpt_name)
        if os.path.isfile(model_path):
            print("=> loading checkpoint '{}'".format(model_path))
            checkpoint = torch.load(model_path)
            best_prec1 = checkpoint['best_prec1']
            optimizer.gvar.load_state_dict(checkpoint['gvar'])
            optimizer.niters = checkpoint['niters']
            epoch = checkpoint['epoch']
            model.load_state_dict(checkpoint['model'])
            save_checkpoint.best_prec1 = best_prec1
            print("=> loaded checkpoint '{}' (epoch {}, best_prec {})".format(
                model_path, epoch, best_prec1))
        else:
            print("=> no checkpoint found at '{}'".format(model_path))

    if opt.niters > 0:
        max_iters = opt.niters
    else:
        max_iters = opt.epochs * opt.epoch_iters

    if opt.untrain_steps > 0:
        untrain(model, optimizer.gvar, opt)

    while optimizer.niters < max_iters:
        optimizer.epoch = epoch
        utils.adjust_lr(optimizer, opt)
        ecode = train(tb_logger, epoch, train_loader, model, optimizer, opt,
                      test_loader, save_checkpoint, train_test_loader)
        if ecode == -1:
            break
        epoch += 1
    tb_logger.save_log()
コード例 #4
0
                    required=False,
                    help='Model Save Directory')

parser.add_argument('--graph_dir',
                    type=str,
                    default="./graphs/cifar10_r10",
                    required=False,
                    help='Graph Save Directory')

args = parser.parse_args()
print(args)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

train_loader, test_loader = get_loaders(args)

asmv = AsymValley(args.graph_dir)

num_classes = 10

model = models.resnet18()
model.fc = nn.Linear(512, num_classes)

model.load_state_dict(torch.load("./checkpoints/r18_10"))

criterion = nn.CrossEntropyLoss()

model = model.to(device)

asmv.draw(model, evalav, train_loader, test_loader, criterion, 1.8, 100)
コード例 #5
0
def main():

    if torch.cuda.is_available():
        args.device = torch.device('cuda')
        args.cuda = True
    else:
        args.device = torch.device('cpu')
        args.cuda = False

    # Make as reproducible as possible.
    # Please note that pytorch does not let us make things completely reproducible across machines.
    # See https://pytorch.org/docs/stable/notes/randomness.html
    if args.seed is not None:
        print('setting seed', args.seed)
        torch.manual_seed(args.seed)
        if args.cuda:
            torch.cuda.manual_seed(args.seed)
        np.random.seed(args.seed)
        random.seed(args.seed)

    # Get data
    train_loader, train_eval_loader, val_loader, _ = data.get_loaders(args)
    args.n_groups = train_loader.dataset.n_groups

    # Get model
    model = utils.get_model(args, image_shape=train_loader.dataset.image_shape)
    model = model.to(args.device)

    # Loss Fn
    if args.use_robust_loss:
        loss_fn = nn.CrossEntropyLoss(reduction='none')
        loss_computer = LossComputer(loss_fn,
                                     is_robust=True,
                                     dataset=train_loader.dataset,
                                     step_size=args.robust_step_size,
                                     device=args.device,
                                     args=args)
    else:
        loss_fn = nn.CrossEntropyLoss()

    # Optimizer
    if args.optimizer == 'adam':  # This is used for MNIST.
        optimizer = torch.optim.Adam(model.parameters(), lr=args.learning_rate)
    elif args.optimizer == 'sgd':

        # From DRNN paper
        optimizer = torch.optim.SGD(filter(lambda p: p.requires_grad,
                                           model.parameters()),
                                    lr=args.learning_rate,
                                    momentum=0.9,
                                    weight_decay=args.weight_decay)

    # Train loop
    best_worst_case_acc = 0
    best_worst_case_acc_epoch = 0
    avg_val_acc = 0
    empirical_val_acc = 0

    for epoch in trange(args.num_epochs):
        total_loss = 0
        total_accuracy = 0
        total_examples = 0

        model.train()

        for batch_id, (images, labels, group_ids) in enumerate(
                tqdm(train_loader, desc='train loop')):

            # Put on GPU
            images = images.to(args.device)
            labels = labels.to(args.device)

            # Forward
            logits = model(images)
            if args.use_robust_loss:
                group_ids = group_ids.to(args.device)
                loss = loss_computer.loss(logits,
                                          labels,
                                          group_ids,
                                          is_training=True)
            else:
                loss = loss_fn(logits, labels)

            # Evaluate
            preds = np.argmax(logits.detach().cpu().numpy(), axis=1)
            accuracy = np.sum(
                preds == labels.detach().cpu().numpy().reshape(-1))
            total_accuracy += accuracy * labels.shape[0]
            total_loss += loss.item() * labels.shape[0]
            total_examples += labels.shape[0]

            # Backprop
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

        if epoch % args.epochs_per_eval == 0:

            worst_case_acc, stats = utils.evaluate_groups(
                args, model, val_loader, epoch, split='val')  # validation

            # Track early stopping values with respect to worst case.
            if worst_case_acc > best_worst_case_acc:
                best_worst_case_acc = worst_case_acc

                save_model(model, ckpt_dir, epoch, args.device)

            # Log early stopping values
            if args.log_wandb:
                wandb.log({
                    "Train Loss": total_loss / total_examples,
                    "Best Worst Case Val Acc": best_worst_case_acc,
                    "Train Accuracy": total_accuracy / total_examples,
                    "epoch": epoch
                })

            print(f"Epoch: ", epoch, "Worst Case Acc: ", worst_case_acc)
コード例 #6
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='/data',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='precomp',
                        help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin',
                        default=0.05,
                        type=float,
                        help='loss margin.')
    parser.add_argument('--temperature',
                        default=14,
                        type=int,
                        help='loss temperature.')
    parser.add_argument('--num_epochs',
                        default=9,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=2048,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=4,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='runs/runX',
                        help='Path to save the model and Tensorboard log.')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--img_dim',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--seed', default=1, type=int, help='random seed.')
    parser.add_argument('--use_atten', action='store_true', help='use_atten')
    parser.add_argument('--lambda_softmax',
                        default=9.,
                        type=float,
                        help='Attention softmax temperature.')
    parser.add_argument('--use_box', action='store_true', help='use_box')
    parser.add_argument('--use_label', action='store_true', help='use_label')
    parser.add_argument('--use_mmd', action='store_true', help='use_mmd')
    parser.add_argument('--score_path',
                        default='../user_data/score.npy',
                        type=str)

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    #tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    #vocab = pickle.load(open(os.path.join(
    #    opt.vocab_path, '%s_vocab.pkl' % opt.data_name), 'rb'))
    #vocab = deserialize_vocab(os.path.join(opt.vocab_path, 'kdd2020_caps_vocab_train_val_threshold2.json'))
    vocab = deserialize_vocab(
        os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    stoppath = os.path.join(opt.vocab_path, 'stopwords.txt')
    f_stop = open(stoppath, 'r')
    stops = f_stop.readlines()
    stopwords = []
    for sw in stops:
        sw = sw.strip()  #.encode('utf-8').decode('utf-8')
        stopwords.append(sw)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                stopwords, opt.batch_size,
                                                opt.workers, opt, True)

    # Construct the model
    model = VSRN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
コード例 #7
0
ファイル: runner.py プロジェクト: xbassi/lossurface
def main(args,asmv,counter):

	device =  torch.device("cuda" if torch.cuda.is_available() else "cpu")
	print(device)


	train_loader,test_loader = get_loaders(args)


	num_classes = 10
	
	# model = Mark001(num_classes).to(device)
	
	model = models.resnet18()
	model.fc = nn.Linear(512, num_classes)
	
	# model = nn.DataParallel(model)

	model = model.to(device)
	model.train()

	optimizer = torch.optim.Adam(
	    model.parameters(),
	    lr=args.lr_init,
	    amsgrad=True,
	    # momentum=args.momentum,
	    weight_decay=args.wd
	)

	# scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 10, gamma=0.9, last_epoch=-1)

	criterion = nn.CrossEntropyLoss()


	# summary(model, (3, 32,32))

	# s = Surface()

	score = 0

	for x in range(args.epochs):

		losses = AverageMeter('Loss', ':.6f')


		for i, (inputs, label) in enumerate(train_loader):

			inputs, label = inputs.to(device), label.to(device)

			output = model(inputs)
			
			loss = criterion(output, label)

			optimizer.zero_grad()
			loss.backward()
			optimizer.step()

			losses.update(loss.item(), inputs.size(0))

			# if i % args.surface_track_freq == 0: 
			# 	s.add(model,loss.item(),score)

			if i % args.print_freq == 0:
				print("\tStep: ",i,losses.__str__())

		# scheduler.step()
		print("Epoch: ",x,losses.__str__())
		score = validate(test_loader,model,criterion,device)
		# s.add(model,loss.item(),score)	
		torch.save(model.state_dict(), f"./{args.model_dir}/r18_10")

	# s.plot()

	asmv.draw(model,evalav,train_loader,test_loader,criterion,0.5,counter)
コード例 #8
0
ファイル: train_f30k.py プロジェクト: wayne980/CVSE
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()

    parser.add_argument('--data_path',
                        default='./Bottom_up_atten_feature/data',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='f30k_precomp',
                        help='{coco_precomp_original, f30k_precomp')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocabulary json files.')
    parser.add_argument('--orig_img_path',
                        default='./data/',
                        help='path to get the original image data')
    parser.add_argument('--orig_data_name', default='f30k', help='{coco,f30k}')
    parser.add_argument('--use_restval',
                        action='store_false',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=50,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=25,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=40,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=200,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=2000,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='./runs/f30k/CVSE_f30k/log',
                        help='Path to save Tensorboard log.')
    parser.add_argument('--model_name',
                        default='./runs/f30k/CVSE_f30k/',
                        help='Path to save the model.')
    parser.add_argument('--resume',
                        default='./runs/f30k/CVSE_f30k/model_best.pth.tar',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_false',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--no_txtnorm',
                        action='store_true',
                        help='Do not normalize the text embeddings.')
    parser.add_argument('--precomp_enc_type',
                        default="basic",
                        help='basic|weight_norm')
    parser.add_argument('--bi_gru',
                        action='store_false',
                        help='Use bidirectional GRU.')
    parser.add_argument('--use_BatchNorm',
                        action='store_false',
                        help='Whether to use BN.')
    parser.add_argument('--activation_type',
                        default='tanh',
                        help='choose type of activation functions.')
    parser.add_argument('--dropout_rate',
                        default=0.4,
                        type=float,
                        help='dropout rate.')
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--attribute_path',
                        default='data/f30k_annotations/Concept_annotations/',
                        help='path to get attribute json file'
                        )  # absolute path (get from path of SAN model)
    parser.add_argument('--num_attribute',
                        default=300,
                        type=int,
                        help='dimension of Attribute annotation')
    parser.add_argument('--input_channel',
                        default=300,
                        type=int,
                        help='dimension of initial word embedding')
    parser.add_argument(
        '--inp_name',
        default=
        'data/f30k_annotations/Concept_annotations/f30k_concepts_glove_word2vec.pkl',
        help='load the input glove word embedding file')
    parser.add_argument(
        '--adj_file',
        default=
        'data/f30k_annotations/Concept_annotations/f30k_adj_concepts.pkl',
        help='load the adj file')
    parser.add_argument('--learning_rate_MLGCN',
                        default=.0002,
                        type=float,
                        help='learning rate of module of MLGCN.')
    parser.add_argument('--lr_MLGCN_update',
                        default=10,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--Concept_label_ratio',
                        default=0.35,
                        type=float,
                        help='The ratio of concept label.')
    parser.add_argument(
        '--concept_name',
        default=
        'data/f30k_annotations/Concept_annotations/category_concepts.json',
        help='load the input concrete words of concepts')
    parser.add_argument('--norm_func_type',
                        default='sigmoid',
                        help='choose type of norm functions.')
    parser.add_argument(
        '--feature_fuse_type',
        default='weight_sum',
        help=
        'choose the fusing type for raw feature and attribute feature (multiple|concat|adap_sum|weight_sum))'
    )
    parser.add_argument('--wemb_type',
                        default='glove',
                        choices=('glove', 'fasttext', 'random_init'),
                        type=str,
                        help='Word embedding (glove|fasttext|random_init)')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(
        open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name),
             'rb'))
    opt.vocab_size = len(vocab)
    '''load the vocab word2idx'''
    word2idx = vocab.word2idx

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)

    # Construct the model
    # model = CVSE(opt)
    model = CVSE(word2idx, opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}.pth.tar'.format(epoch),
            prefix=opt.model_name + '/')
コード例 #9
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='/data/stud/jorgebjorn/data/',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='f8k_precomp',
                        help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path',
                        default='/data/stud/jorgebjorn/data/vocab/',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=30,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--crop_size',
                        default=224,
                        type=int,
                        help='Size of an image crop as the CNN input.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=15,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument(
        '--logger_name',
        default='/data/stud/jorgebjorn/runs/{}/{}'.format(
            getpass.getuser(),
            datetime.datetime.now().strftime("%d-%m-%y_%H:%M")),
        help='Path to save the model and Tensorboard log.')
    parser.add_argument('--selection',
                        default='uncertainty',
                        help='Active learning selection algorithm')
    parser.add_argument('--primary',
                        default='images',
                        help='Image- or caption-centric active learning')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=4096,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--finetune',
                        action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--device',
                        default=0,
                        type=int,
                        help='which gpu to use')
    parser.add_argument('--cnn_type',
                        default='vgg19',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--use_restval',
                        action='store_true',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--reset_train',
                        action='store_true',
                        help='Ensure the training is always done in '
                        'train mode (Not recommended).')
    parser.add_argument('--no_log',
                        action='store_true',
                        default=False,
                        help='Disable logging')
    opt = parser.parse_args()
    opt.logger_name += "_" + opt.selection + "_" + opt.primary
    print(opt)
    if torch.cuda.is_available():
        torch.cuda.set_device(opt.device)

    # Setup tensorboard logger
    if not opt.no_log:
        logging.basicConfig(format='%(asctime)s %(message)s',
                            level=logging.INFO)
        tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(
        open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name),
             'rb'))
    opt.vocab_size = len(vocab)

    # Load data loaders
    active_loader, train_loader, val_loader = data.get_loaders(
        opt.data_name, vocab, opt.crop_size, opt.batch_size, opt.workers, opt)

    # Construct the model
    model = VSE(opt)
    if torch.cuda.is_available():
        model.cuda()

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    n_rounds = 234

    if opt.selection == "uncertainty":
        selection = select_uncertainty
    elif opt.selection == "margin":
        selection = select_margin
    elif opt.selection == "random":
        selection = select_random
    elif opt.selection == "hybrid":
        selection = select_hybrid
    elif opt.selection == "all":
        selection = select_all
    elif opt.selection == "capsim":
        selection = select_captionSimilarity
    else:
        selection = select_uncertainty

    for r in range(n_rounds):
        best_indices = selection(r, model, train_loader)

        for index in best_indices:
            active_loader.dataset.add_single(train_loader.dataset[index][0],
                                             train_loader.dataset[index][1])

        train_loader.dataset.delete_indices(best_indices)

        # Train the Model
        print("Training on {} items ".format(len(active_loader)))

        # Reset the model
        model = VSE(opt)
        if torch.cuda.is_available():
            model.cuda()

        best_rsum = 0
        for epoch in range(opt.num_epochs):
            adjust_learning_rate(opt, model.optimizer, epoch)

            # train for one epoch
            train(opt, active_loader, model, epoch, val_loader)

            # evaluate on validation set
        rsum = validate(opt, val_loader, model, not opt.no_log, r)
コード例 #10
0
ファイル: train.py プロジェクト: WangBenHui/CRGN
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--data_path',
        default='/data3/zhangyf/cross_modal_retrieval/SCAN/data',
        help='path to datasets')
    parser.add_argument('--data_name',
                        default='f30k_precomp',
                        help='{coco,f30k}_precomp')
    parser.add_argument(
        '--vocab_path',
        default='/data3/zhangyf/cross_modal_retrieval/SCAN/vocab/',
        help='Path to saved vocabulary json files.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=20,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--decoder_dim',
                        default=512,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=10,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=4,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=30,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='./runs/runX/log',
                        help='Path to save Tensorboard log.')
    parser.add_argument('--model_name',
                        default='./runs/runX/checkpoint',
                        help='Path to save the model.')
    parser.add_argument(
        '--resume',
        default=
        '/data3/zhangyf/cross_modal_retrieval/vsepp_next_train_12_31_f30k/run/coco_vse++_ft_128_f30k_next/model_best.pth.tar',
        type=str,
        metavar='PATH',
        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--no_txtnorm',
                        action='store_true',
                        help='Do not normalize the text embeddings.')
    parser.add_argument('--precomp_enc_type',
                        default="basic",
                        help='basic|weight_norm')
    parser.add_argument('--reset_train',
                        action='store_true',
                        help='Ensure the training is always done in '
                        'train mode (Not recommended).')
    parser.add_argument('--finetune',
                        action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--cnn_type',
                        default='resnet152',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--crop_size',
                        default=224,
                        type=int,
                        help='Size of an image crop as the CNN input.')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(
        open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name),
             'rb'))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)

    # Construct the model
    model = SCAN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        bset_rsum = train(opt, train_loader, model, epoch, val_loader,
                          best_rsum)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}.pth.tar'.format(epoch),
            prefix=opt.model_name + '/')
コード例 #11
0
def main(premise_hidden_size,
         hypo_hidden_size,
         linear_hidden_size,
         interaction_type,
         device,
         kind,
         num_layers=1,
         bidirectional=True,
         kernel_size=3,
         lr=1e-4,
         test=False,
         model_dir='models'):
    valid_types = ('cat', 'element_wise_mult')
    if interaction_type not in valid_types:
        raise ValueError('interaction_type can only be: ', valid_types)

    # data
    batch_size = 32
    save_freq = 500
    max_epochs = 40
    train_loader, val_loader = data.get_loaders(batch_size, test=test)

    # model
    embed_size = 300
    ind2vec = data.get_table_lookup()
    if kind == 'rnn':
        model = models.SNLI_Model(ind2vec,
                                  embed_size,
                                  premise_hidden_size,
                                  hypo_hidden_size,
                                  linear_hidden_size,
                                  interaction_type,
                                  device,
                                  kind='rnn',
                                  num_layers=num_layers,
                                  bidirectional=bidirectional)
    else:
        model = models.SNLI_Model(ind2vec,
                                  embed_size,
                                  premise_hidden_size,
                                  hypo_hidden_size,
                                  linear_hidden_size,
                                  interaction_type,
                                  device,
                                  kind='cnn',
                                  kernel_size=kernel_size)
    model = model.to(device)
    optimizer = torch.optim.Adam(
        [param for param in model.parameters() if param.requires_grad], lr=lr)
    loss_fn = torch.nn.CrossEntropyLoss()

    model_name = f'{kind}_model_{premise_hidden_size}_{interaction_type}'
    model_dir = os.path.join(model_dir, model_name)
    train_helper = train_helpers.TrainHelper(device, model, loss_fn, optimizer,
                                             models.batch_params_key,
                                             model_dir, test)
    train_loss, val_loss, train_acc, val_acc = train_helper.train_loop(
        train_loader, val_loader, max_epochs=max_epochs, save_freq=save_freq)

    if 'cpu' in device:
        os.makedirs('figures', exist_ok=True)
        path = f'figures/{model_name}'
        utils.plot_curves(train_loss, val_loss, train_acc, val_acc, path)

    utils.save_pkl_data(train_loss, 'train_loss.p', data_dir=model_dir)
    utils.save_pkl_data(val_loss, 'val_loss.p', data_dir=model_dir)
    utils.save_pkl_data(train_acc, 'train_acc.p', data_dir=model_dir)
    utils.save_pkl_data(val_acc, 'val_acc.p', data_dir=model_dir)
コード例 #12
0
    model.load_state_dict(model_dict['best'])
else:
    model.load_state_dict(model_dict['last'] if 'last' in
                          model_dict else model_dict)

opt = torch.optim.SGD(model.parameters(), lr=0)  # needed for backprop only
if half_prec:
    model, opt = amp.initialize(model, opt, opt_level="O1")
utils.model_eval(model, half_prec)

eps, pgd_alpha, pgd_alpha_rr = eps / 255, pgd_alpha / 255, pgd_alpha_rr / 255

eval_batches_all = data.get_loaders(
    args.dataset,
    -1,
    args.batch_size_eval,
    train_set=True if args.set == 'train' else False,
    shuffle=False,
    data_augm=False)
eval_batches = data.get_loaders(
    args.dataset,
    n_eval,
    args.batch_size_eval,
    train_set=True if args.set == 'train' else False,
    shuffle=False,
    data_augm=False)

time_start = time.time()
acc_clean, loss_clean, _ = rob_acc(eval_batches, model, 0, 0, opt, half_prec,
                                   0, 1)
print('acc={:.2%}, loss={:.3f}'.format(acc_clean, loss_clean))
コード例 #13
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='./data/',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='precomp',
                        help='{coco, f30k}_precomp')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocablulary json files')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=30,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=64,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--word_embed_size',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding')
    parser.add_argument('--hidden_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=0.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=15,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log')
    parser.add_argument('--val_step',
                        default=1000,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='./runs/runX/log',
                        help='Path to save Tensorboard log.')
    parser.add_argument('--model_name',
                        default='./runs/runX/checkpoint',
                        help='Path to save the model')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default:none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss')
    parser.add_argument('--img_size',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding')
    parser.add_argument('--norm',
                        action='store_true',
                        help='normalize the text and image embedding')
    parser.add_argument(
        '--norm_func',
        default='clipped_l2norm',
        help='clipped_leaky_l2norm|clipped_l2norm|l2norm|'
        'clipped_leaky_l1norm|clipped_l1norm|l1norm|no_norm|softmax')
    parser.add_argument('--agg_func',
                        default='Mean',
                        help='LogSumExp|Mean|Max|Sum')
    parser.add_argument('--bi_gru',
                        action='store_true',
                        help='Use bidirectional GRU.')
    parser.add_argument('--lambda_lse',
                        default=6.,
                        type=float,
                        help='LogSumExp temp')
    parser.add_argument('--lambda_softmax',
                        default=9.,
                        type=float,
                        help='Attention softmax temperature')
    parser.add_argument(
        '--activation_func',
        default='relu',
        help='activation function: relu|gelu|no_activation_fun')
    parser.add_argument(
        '--alpha',
        default=0.5,
        type=float,
        help='the weight of final score between i2t score and t2i score')
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='take the absolute value of embedding vectors')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # load Vocabulary Wrapper
    vocab = deserialize_vocab(
        os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    # load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)

    # Construct the model
    model = CARRN(opt)
    best_rsum = 0
    start_epoch = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch'] + 1
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    for epoch in range(start_epoch, opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}.pth.tar'.format(epoch),
            prefix=opt.model_name + '/')
コード例 #14
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = ShuffleNetV2(scale=args.scaling,
                         c_tag=args.c_tag,
                         SE=args.SE,
                         residual=args.residual,
                         groups=args.groups)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(
            ShuffleNetV2, args.batch_size //
            len(args.gpus) if args.gpus is not None else args.batch_size,
            device, dtype, args.input_size, 3, args.scaling, 3, args.c_tag,
            1000, torch.nn.ReLU, args.SE, args.residual, args.groups)))

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size,
                                           args.batch_size, args.input_size,
                                           args.workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device,
                                dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.SE in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.SE]:
            claimed_acc1 = 1 - claimed_acc_top1[args.SE][args.scaling]
            csv_logger.write_text('Claimed accuracy is {:.2f}% top-1'.format(
                claimed_acc1 * 100.))
    train_network(args.start_epoch, args.epochs, scheduler, model,
                  train_loader, val_loader, optimizer, criterion, device,
                  dtype, args.batch_size, args.log_interval, csv_logger,
                  save_path, claimed_acc1, claimed_acc5, best_test)
コード例 #15
0
def run_experiment(xp, xp_count, n_experiments):

  t0 = time.time()
  print(xp)
  hp = xp.hyperparameters

  num_classes = {"cifar10" : 10, "cifar100" : 100}[hp["dataset"]]

  model_names = [model_name for model_name, k in hp["models"].items() for _ in range(k)]
  
  optimizer, optimizer_hp = getattr(torch.optim, hp["local_optimizer"][0]), hp["local_optimizer"][1]
  optimizer_fn = lambda x : optimizer(x, **{k : hp[k] if k in hp else v for k, v in optimizer_hp.items()})

  distill_optimizer, distill_optimizer_hp = getattr(torch.optim, hp["distill_optimizer"][0]), hp["distill_optimizer"][1]
  distill_optimizer_fn = lambda x : distill_optimizer(x, **{k : hp[k] if k in hp else v for k, v in distill_optimizer_hp.items()})


  train_data, test_data = data.get_data(hp["dataset"], args.DATA_PATH)
  all_distill_data = data.get_data(hp["distill_dataset"], args.DATA_PATH)

  np.random.seed(hp["random_seed"])
  torch.manual_seed(hp["random_seed"])

  n_distill = int(hp["n_distill_frac"] * len(all_distill_data))

  distill_data = data.IdxSubset(all_distill_data, np.random.permutation(len(all_distill_data))[:n_distill], return_index=True)
  public_data = data.IdxSubset(all_distill_data, np.random.permutation(len(all_distill_data))[n_distill:len(all_distill_data)], return_index=False)

  print(len(distill_data), len(public_data))

  client_loaders, test_loader = data.get_loaders(train_data, test_data, n_clients=len(model_names), 
        alpha=hp["alpha"], batch_size=hp["batch_size"], n_data=None, num_workers=0, seed=hp["random_seed"])
  distill_loader = torch.utils.data.DataLoader(distill_data, batch_size=hp["distill_batch_size"], shuffle=True, num_workers=8)
  public_loader = torch.utils.data.DataLoader(public_data, batch_size=128, shuffle=True, num_workers=8)

  clients = [Client(model_name, optimizer_fn, loader, idnum=i, num_classes=num_classes) for i, (loader, model_name) in enumerate(zip(client_loaders, model_names))]
  server = Server(np.unique(model_names), distill_optimizer_fn, test_loader, distill_loader, num_classes=num_classes)

  for client in clients:
    client.public_loader = public_loader
    client.distill_loader = distill_loader
    
  # print model
  models.print_model(clients[0].model)


  if "P" in hp["aggregation_mode"] or hp["aggregation_mode"] == "FedAUX":
    for model_name, model in server.model_dict.items():
      pretrained = hp["pretrained"] if hp["pretrained"] else "{}_{}.pth".format(model_name, hp["distill_dataset"])

      loaded_state = torch.load(args.CHECKPOINT_PATH + pretrained, map_location='cpu')
      loaded_layers = [key for key in loaded_state if key in model.state_dict()]
      model.load_state_dict(loaded_state, strict=False)      

    for client in clients:
      client.synchronize_with_server(server)

    print("Successfully loaded layers {} from".format(loaded_layers), pretrained)


    if hp["aggregation_mode"] == "FedAUX":
      print("Computing Scores...")

      for client in clients:
        client.scores = client.extract_features_and_compute_scores(client.loader, public_loader, distill_loader, lambda_reg=hp["lambda_reg_score"], eps_delt=hp["eps_delt"]) 
        
        if hp["save_scores"]:
          xp.log({"client_{}_scores".format(client.id) : client.scores.detach().cpu().numpy()})
      

  if "L" in hp["aggregation_mode"]:
    for client in clients:
      for name, param in client.model.named_parameters():
        if "classification_layer" not in name:
          param.requires_grad = False

    for model in server.model_dict.values():
      for name, param in model.named_parameters():
        if "classification_layer" not in name:
          param.requires_grad = False



  # Start Distributed Training Process
  print("Start Distributed Training..\n")
  t1 = time.time()
  xp.log({"prep_time" : t1-t0})

  xp.log({"server_val_{}".format(key) : value for key, value in server.evaluate_ensemble().items()})
  for c_round in range(1, hp["communication_rounds"]+1):

    participating_clients = server.select_clients(clients, hp["participation_rate"])
    xp.log({"participating_clients" : np.array([c.id for c in participating_clients])})

    for client in participating_clients:
      client.synchronize_with_server(server)

      train_stats = client.compute_weight_update(hp["local_epochs"], lambda_fedprox=hp["lambda_fedprox"] if "PROX" in hp["aggregation_mode"] else 0.0) 

      print(train_stats)


    # Averaging
    server.aggregate_weight_updates(participating_clients)
    avg_stats = server.evaluate_ensemble()

    xp.log({"averaging_{}".format(key) : value for key, value in avg_stats.items()})


    if hp["aggregation_mode"] in ["FedDF", "FedAUX", "FedDF+P"]:
      distill_mode = "weighted_logits_precomputed" if hp["aggregation_mode"]=="FedAUX" else "mean_logits"

      distill_stats = server.distill(participating_clients, hp["distill_epochs"], mode=distill_mode, num_classes=num_classes)
      xp.log({"distill_{}".format(key) : value for key, value in distill_stats.items()})


    # Logging
    if xp.is_log_round(c_round):
      print("Experiment: {} ({}/{})".format(args.schedule, xp_count+1, n_experiments))   
      
      xp.log({'communication_round' : c_round, 'epochs' : c_round*hp['local_epochs']})
      xp.log({key : clients[0].optimizer.__dict__['param_groups'][0][key] for key in optimizer_hp})
      
      # Evaluate  
      xp.log({"server_val_{}".format(key) : value for key, value in server.evaluate_ensemble().items()})

      xp.log({"epoch_time" : (time.time()-t1)/c_round})
      # Save results to Disk
      try:
        xp.save_to_disc(path=args.RESULTS_PATH, name=hp['log_path'])
      except:
        print("Saving results Failed!")

      # Timing
      e = int((time.time()-t1)/c_round*(hp['communication_rounds']-c_round))
      print("Remaining Time (approx.):", '{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60), e % 60), 
                "[{:.2f}%]\n".format(c_round/hp['communication_rounds']*100))

  # Save model to disk
  server.save_model(path=args.CHECKPOINT_PATH, name=hp["save_model"])
    
  # Delete objects to free up GPU memory
  del server; clients.clear()
  torch.cuda.empty_cache()
コード例 #16
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='/A/VSE/data/',
                        help='path to datasets')
    parser.add_argument(
        '--data_name',
        default='resnet152_precomp',
        help='{coco,f8k,f30k,10crop,irv2,resnet152}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin',
                        default=0.05,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=30,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument(
        '--embed_size',
        default=1024,
        type=int,
        help=
        'Dimensionality of the joint embedding. [NOTE: this is used only if <embed_size> differs from <gru_units>]'
    )
    parser.add_argument('--gru_units',
                        default=1024,
                        type=int,
                        help='Number of GRU neurons.')
    parser.add_argument('--grad_clip',
                        default=1.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--crop_size',
                        default=224,
                        type=int,
                        help='Size of an image crop as the CNN input.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.001,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=15,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='runs/runX',
                        help='Path to save the model and Tensorboard log.')
    parser.add_argument('--resume',
                        default='',
                        type=str,
                        metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=2048,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--finetune',
                        action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--cnn_type',
                        default='vgg19',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--use_restval',
                        action='store_true',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument(
        '--test_measure',
        default=None,
        help=
        'Similarity used for retrieval (None<same used for training>|cosine|order)'
    )
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--text_encoder',
                        default='seam-e',
                        choices=text_encoders.text_encoders_alias.keys())
    parser.add_argument(
        '--att_units',
        default=300,
        type=int,
        help=
        'Number of tanh neurons. When using --att_dim=None we apply a tanh directly to the att input. '
    )
    parser.add_argument('--att_hops',
                        default=30,
                        type=int,
                        help='Number of attention hops (viewpoints).')
    parser.add_argument(
        '--att_coef',
        default=0.,
        type=float,
        help='Influence of Frobenius divergence in the loss function.')

    opt = parser.parse_args()

    if opt.test_measure is None:
        opt.test_measure = opt.measure

    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    tokenizer, vocab_size = data.get_tokenizer(opt.vocab_path, opt.data_name)
    opt.vocab_size = vocab_size

    collate_fn = 'collate_fn'

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, tokenizer,
                                                opt.crop_size, opt.batch_size,
                                                opt.workers, opt, collate_fn)

    # Construct the model
    model = VSE(opt)
    print(model.txt_enc)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            prefix=opt.logger_name + '/')
コード例 #17
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', default='/data',
                        help='path to datasets')
    parser.add_argument('--data_name', default='precomp',
                        help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path', default='./vocab/',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin', default=0.2, type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs', default=30, type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size', default=128, type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim', default=300, type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size', default=2048, type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip', default=2., type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--crop_size', default=224, type=int,
                        help='Size of an image crop as the CNN input.')
    parser.add_argument('--num_layers', default=1, type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate', default=.0002, type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update', default=15, type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers', default=10, type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step', default=10, type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step', default=500, type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name', default='runs/runX',
                        help='Path to save the model and Tensorboard log.')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation', action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim', default=2048, type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--finetune', action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--cnn_type', default='vgg19',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--use_restval', action='store_true',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--measure', default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--use_abs', action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--no_imgnorm', action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--reset_train', action='store_true',
                        help='Ensure the training is always done in '
                        'train mode (Not recommended).')
    ### AM Parameters
    parser.add_argument('--text_number', default=15, type=int,
                        help='Number of ocr tokens used (max. 20).')
    parser.add_argument('--text_dim', default=300, type=int,
                        help='Dimension of scene text embedding - default 300')

    ###caption parameters
    parser.add_argument(
        '--dim_vid',
        type=int,
        default=2048,
        help='dim of features of video frames')
    parser.add_argument(
        '--dim_hidden',
        type=int,
        default=512,
        help='size of the rnn hidden layer')
    parser.add_argument(
        "--bidirectional",
        type=int,
        default=0,
        help="0 for disable, 1 for enable. encoder/decoder bidirectional.")
    parser.add_argument(
        '--input_dropout_p',
        type=float,
        default=0.2,
        help='strength of dropout in the Language Model RNN')
    parser.add_argument(
        '--rnn_type', type=str, default='gru', help='lstm or gru')

    parser.add_argument(
        '--rnn_dropout_p',
        type=float,
        default=0.5,
        help='strength of dropout in the Language Model RNN')

    parser.add_argument(
        '--dim_word',
        type=int,
        default=300,  # 512
        help='the encoding size of each token in the vocabulary, and the video.'
    )
    parser.add_argument(
        "--max_len",
        type=int,
        default=60,
        help='max length of captions(containing <sos>,<eos>)')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(open(os.path.join(
        opt.vocab_path, '%s_vocab.pkl' % opt.data_name), 'rb'))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(
        opt.data_name, vocab, opt.crop_size, opt.batch_size, opt.workers, opt)

    # Construct the model
    model = VSRN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})"
                  .format(opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0

    for epoch in range(opt.num_epochs):
        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        best_rsum = train(opt, train_loader, model, epoch, val_loader, best_rsum)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        save_checkpoint({
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'best_rsum': best_rsum,
            'opt': opt,
            'Eiters': model.Eiters,
        }, is_best, prefix=opt.logger_name + '/')
コード例 #18
0
ファイル: train.py プロジェクト: zhushaoquan/DASG_TCSVT2019
def main(opt):
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(
        open(os.path.join(opt.vocab_path, 'vg_c_precomp_vocab.pkl'), 'rb'))
    opt.vocab_size = len(vocab)
    print(opt.vocab_size)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.crop_size, opt.batch_size,
                                                opt.workers, opt)

    # Construct the model
    model = XRN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum, d_i2t = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        if is_best:
            np.save('d_i2t.npy', d_i2t)

        best_rsum = max(rsum, best_rsum)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            prefix=opt.logger_name + '_' + opt.model_name + '/')
コード例 #19
0
def main():
    args = parser.parse_args()

    if args.seed is None:
        args.seed = random.randint(1, 10000)
    print("Random Seed: ", args.seed)
    random.seed(args.seed)
    torch.manual_seed(args.seed)
    if args.gpus:
        torch.cuda.manual_seed_all(args.seed)

    time_stamp = datetime.now().strftime('%Y-%m-%d_%H-%M-%S')
    if args.evaluate:
        args.results_dir = '/tmp'
    if args.save is '':
        args.save = 'mar10_224_' + time_stamp
    save_path = os.path.join(args.results_dir, args.save)
    if not os.path.exists(save_path):
        os.makedirs(save_path)

    if args.gpus is not None:
        args.gpus = [int(i) for i in args.gpus.split(',')]
        device = 'cuda:' + str(args.gpus[0])
        cudnn.benchmark = True
    else:
        device = 'cpu'

    if args.type == 'float64':
        dtype = torch.float64
    elif args.type == 'float32':
        dtype = torch.float32
    elif args.type == 'float16':
        dtype = torch.float16
    else:
        raise ValueError('Wrong type!')  # TODO int8

    model = STN_MobileNet2(input_size=args.input_size,
                           scale=args.scaling,
                           shearing=args.shearing)
    # print(model.stnmod.fc_loc[0].bias.data)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)
    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(
            STN_MobileNet2, args.batch_size //
            len(args.gpus) if args.gpus is not None else args.batch_size,
            device, dtype, args.input_size, 3, args.scaling)))

    train_loader, val_loader, test_loader = get_loaders(
        args.dataroot, args.batch_size, args.batch_size, args.input_size,
        args.workers, args.b_weights)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    L1_criterion = torch.nn.L1Loss()
    PW_criterion = torch.nn.CosineSimilarity(dim=2, eps=1e-6)

    if args.gpus is not None:
        model = torch.nn.DataParallel(model, args.gpus)
    model.to(device=device, dtype=dtype)

    criterion.to(device=device, dtype=dtype)
    L1_criterion.to(device=device, dtype=dtype)
    PW_criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(),
                                args.learning_rate,
                                momentum=args.momentum,
                                weight_decay=args.decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        PW_criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=save_path)
        return

    if args.clr:
        print('Use CLR')
        scheduler = CyclicLR(optimizer,
                             base_lr=args.min_lr,
                             max_lr=args.max_lr,
                             step_size=args.epochs_per_step *
                             len(train_loader),
                             mode=args.mode)
    else:
        print('Use scheduler')
        scheduler = MultiStepLR(optimizer,
                                milestones=args.schedule,
                                gamma=args.gamma)

    best_val = 500

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            # args.start_epoch = checkpoint['epoch'] - 1
            # best_val = checkpoint['best_prec1']
            # best_test = checkpoint['best_prec1']
            args.start_epoch = 0
            best_val = 500
            state_dict = checkpoint['state_dict']

            # if weights from imagenet

            new_state_dict = OrderedDict()
            for k, v in state_dict.items():
                #     print(k, v.size())
                name = k
                if k == 'module.fc.bias':
                    new_state_dict[name] = torch.zeros(101)
                    continue
                elif k == 'module.fc.weight':
                    new_state_dict[name] = torch.ones(101, 1280)
                    continue
                else:
                    print('else:', name)
                    new_state_dict[name] = v

            model.load_state_dict(new_state_dict, strict=False)
            # optimizer.load_state_dict(checkpoint['optimizer'], strict=False)
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(args.resume, 'checkpoint.pth.tar')
            csv_path = os.path.join(args.resume, 'results.csv')
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_val = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optimizer.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, test_mae = test(model, predefined_points, 0, test_loader,
                              PW_criterion, device, dtype)  # TODO
        return

    csv_logger = CsvLogger(filepath=save_path, data=data)
    csv_logger.save_params(sys.argv, args)

    # claimed_acc1 = None
    # claimed_acc5 = None
    # if args.input_size in claimed_acc_top1:
    #     if args.scaling in claimed_acc_top1[args.input_size]:
    #         claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
    #         claimed_acc5 = claimed_acc_top5[args.input_size][args.scaling]
    #         csv_logger.write_text(
    #             'Claimed accuracies are: {:.2f}% top-1, {:.2f}% top-5'.format(claimed_acc1 * 100., claimed_acc5 * 100.))

    train_network(args.start_epoch, args.epochs, scheduler, model,
                  predefined_points, train_loader, val_loader, test_loader,
                  optimizer, PW_criterion, device, dtype, args.batch_size,
                  args.log_interval, csv_logger, save_path, best_val)
コード例 #20
0
# TODO: data parallel + normalisation + hidden sizes ? + flatten direct
# Ideas: binaryce +
import models
import data
import torch
import os
from tqdm import tqdm
import wandb
import numpy as np
import matplotlib.pyplot as plt
from utils import compute_accuracy, visualize_noise
from setup import (BATCH_SIZE, MODEL, DEVICE, LEARNING_RATE, NB_EPOCHS,
                   FILE_NAME, DROPOUT, WEIGHTS_INIT, ALPHA, KEEP_WEIGHTS,
                   SIGNAL_LENGTH, SUBSAMPLE)

train_loader, test_loader = data.get_loaders(BATCH_SIZE)

model = models.DeNoiser(dropout=DROPOUT)
if WEIGHTS_INIT:
    model.apply(models.init_weights)
model.to(DEVICE)

wandb.watch(model)

if KEEP_WEIGHTS and os.path.exists(f"weights/{FILE_NAME}.pt"):
    print("Weights found")
    model.load_state_dict(torch.load(f"weights/{FILE_NAME}.pt"))
else:
    os.makedirs("weights/", exist_ok=True)
    print("Weights not found")
コード例 #21
0
ファイル: test_model.py プロジェクト: dmesq/NLP_hw2
import utils
import models
import data
import train_helpers


model_dir = sys.argv[1]
hidden_size = int(sys.argv[2])
interaction_type = sys.argv[3]
kind = sys.argv[4]
epoch = sys.argv[5]
batch_ix = sys.argv[6]
batch_size = 32

ind2vec = utils.load_pkl_data('ind2vec.p', data_dir='vocab')
_, val_loader = data.get_loaders(batch_size, data_dir='hw2_data')
loss_fn = torch.nn.CrossEntropyLoss()
fmodel = f'epoch_{epoch}_batch_{batch_ix}.pt'
print('model: ' + fmodel)
model = models.SNLI_Model(ind2vec,
                            300,
                            hidden_size,
                            hidden_size,
                            80,
                            interaction_type,
                            'cpu',
                            kind,
                            num_layers=1,
                            bidirectional=True,
                            kernel_size=3)
model.load_state_dict(torch.load(f'{model_dir}/{fmodel}'))
コード例 #22
0
def main():
    args = get_args()
    os.environ["CUDA_VISIBLE_DEVICES"] = str(args.gpu)

    cur_timestamp = str(datetime.now())[:-3]  # we also include ms to prevent the probability of name collision
    model_width = {'linear': '', 'cnn': args.n_filters_cnn, 'lenet': '', 'resnet18': ''}[args.model]
    model_str = '{}{}'.format(args.model, model_width)
    model_name = '{} dataset={} model={} eps={} attack={} m={} attack_init={} fgsm_alpha={} epochs={} pgd={}-{} grad_align_cos_lambda={} lr_max={} seed={}'.format(
        cur_timestamp, args.dataset, model_str, args.eps, args.attack, args.minibatch_replay, args.attack_init, args.fgsm_alpha, args.epochs,
        args.pgd_alpha_train, args.pgd_train_n_iters, args.grad_align_cos_lambda, args.lr_max, args.seed)
    if not os.path.exists('models'):
        os.makedirs('models')
    logger = utils.configure_logger(model_name, args.debug)
    logger.info(args)
    half_prec = args.half_prec
    n_cls = 2 if 'binary' in args.dataset else 10

    np.random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    double_bp = True if args.grad_align_cos_lambda > 0 else False
    n_eval_every_k_iter = args.n_eval_every_k_iter
    args.pgd_alpha = args.eps / 4

    eps, pgd_alpha, pgd_alpha_train = args.eps / 255, args.pgd_alpha / 255, args.pgd_alpha_train / 255
    train_data_augm = False if args.dataset in ['mnist'] else True
    train_batches = data.get_loaders(args.dataset, -1, args.batch_size, train_set=True, shuffle=True, data_augm=train_data_augm)
    train_batches_fast = data.get_loaders(args.dataset, n_eval_every_k_iter, args.batch_size, train_set=True, shuffle=False, data_augm=False)
    test_batches = data.get_loaders(args.dataset, args.n_final_eval, args.batch_size_eval, train_set=False, shuffle=False, data_augm=False)
    test_batches_fast = data.get_loaders(args.dataset, n_eval_every_k_iter, args.batch_size_eval, train_set=False, shuffle=False, data_augm=False)

    model = models.get_model(args.model, n_cls, half_prec, data.shapes_dict[args.dataset], args.n_filters_cnn).cuda()
    model.apply(utils.initialize_weights)
    model.train()

    if args.model == 'resnet18':
        opt = torch.optim.SGD(model.parameters(), lr=args.lr_max, momentum=0.9, weight_decay=args.weight_decay)
    elif args.model == 'cnn':
        opt = torch.optim.Adam(model.parameters(), lr=args.lr_max, weight_decay=args.weight_decay)
    elif args.model == 'lenet':
        opt = torch.optim.Adam(model.parameters(), lr=args.lr_max, weight_decay=args.weight_decay)
    else:
        raise ValueError('decide about the right optimizer for the new model')

    if half_prec:
        if double_bp:
            amp.register_float_function(torch, 'batch_norm')
        model, opt = amp.initialize(model, opt, opt_level="O1")

    if args.attack == 'fgsm':  # needed here only for Free-AT
        delta = torch.zeros(args.batch_size, *data.shapes_dict[args.dataset][1:]).cuda()
        delta.requires_grad = True

    lr_schedule = utils.get_lr_schedule(args.lr_schedule, args.epochs, args.lr_max)
    loss_function = nn.CrossEntropyLoss()

    train_acc_pgd_best, best_state_dict = 0.0, copy.deepcopy(model.state_dict())
    start_time = time.time()
    time_train, iteration, best_iteration = 0, 0, 0
    for epoch in range(args.epochs + 1):
        train_loss, train_reg, train_acc, train_n, grad_norm_x, avg_delta_l2 = 0, 0, 0, 0, 0, 0
        for i, (X, y) in enumerate(train_batches):
            if i % args.minibatch_replay != 0 and i > 0:  # take new inputs only each `minibatch_replay` iterations
                X, y = X_prev, y_prev
            time_start_iter = time.time()
            # epoch=0 runs only for one iteration (to check the training stats at init)
            if epoch == 0 and i > 0:
                break
            X, y = X.cuda(), y.cuda()
            lr = lr_schedule(epoch - 1 + (i + 1) / len(train_batches))  # epoch - 1 since the 0th epoch is skipped
            opt.param_groups[0].update(lr=lr)

            if args.attack in ['pgd', 'pgd_corner']:
                pgd_rs = True if args.attack_init == 'random' else False
                n_eps_warmup_epochs = 5
                n_iterations_max_eps = n_eps_warmup_epochs * data.shapes_dict[args.dataset][0] // args.batch_size
                eps_pgd_train = min(iteration / n_iterations_max_eps * eps, eps) if args.dataset == 'svhn' else eps
                delta = utils.attack_pgd_training(
                    model, X, y, eps_pgd_train, pgd_alpha_train, opt, half_prec, args.pgd_train_n_iters, rs=pgd_rs)
                if args.attack == 'pgd_corner':
                    delta = eps * utils.sign(delta)  # project to the corners
                    delta = clamp(X + delta, 0, 1) - X

            elif args.attack == 'fgsm':
                if args.minibatch_replay == 1:
                    if args.attack_init == 'zero':
                        delta = torch.zeros_like(X, requires_grad=True)
                    elif args.attack_init == 'random':
                        delta = utils.get_uniform_delta(X.shape, eps, requires_grad=True)
                    else:
                        raise ValueError('wrong args.attack_init')
                else:  # if Free-AT, we just reuse the existing delta from the previous iteration
                    delta.requires_grad = True

                X_adv = clamp(X + delta, 0, 1)
                output = model(X_adv)
                loss = F.cross_entropy(output, y)
                if half_prec:
                    with amp.scale_loss(loss, opt) as scaled_loss:
                        grad = torch.autograd.grad(scaled_loss, delta, create_graph=True if double_bp else False)[0]
                        grad /= scaled_loss / loss  # reverse back the scaling
                else:
                    grad = torch.autograd.grad(loss, delta, create_graph=True if double_bp else False)[0]

                grad = grad.detach()

                argmax_delta = eps * utils.sign(grad)

                n_alpha_warmup_epochs = 5
                n_iterations_max_alpha = n_alpha_warmup_epochs * data.shapes_dict[args.dataset][0] // args.batch_size
                fgsm_alpha = min(iteration / n_iterations_max_alpha * args.fgsm_alpha, args.fgsm_alpha) if args.dataset == 'svhn' else args.fgsm_alpha
                delta.data = clamp(delta.data + fgsm_alpha * argmax_delta, -eps, eps)
                delta.data = clamp(X + delta.data, 0, 1) - X

            elif args.attack == 'random_corner':
                delta = utils.get_uniform_delta(X.shape, eps, requires_grad=False)
                delta = eps * utils.sign(delta)

            elif args.attack == 'none':
                delta = torch.zeros_like(X, requires_grad=False)
            else:
                raise ValueError('wrong args.attack')

            # extra FP+BP to calculate the gradient to monitor it
            if args.attack in ['none', 'random_corner', 'pgd', 'pgd_corner']:
                grad = get_input_grad(model, X, y, opt, eps, half_prec, delta_init='none',
                                      backprop=args.grad_align_cos_lambda != 0.0)

            delta = delta.detach()

            output = model(X + delta)
            loss = loss_function(output, y)

            reg = torch.zeros(1).cuda()[0]  # for .item() to run correctly
            if args.grad_align_cos_lambda != 0.0:
                grad2 = get_input_grad(model, X, y, opt, eps, half_prec, delta_init='random_uniform', backprop=True)
                grads_nnz_idx = ((grad**2).sum([1, 2, 3])**0.5 != 0) * ((grad2**2).sum([1, 2, 3])**0.5 != 0)
                grad1, grad2 = grad[grads_nnz_idx], grad2[grads_nnz_idx]
                grad1_norms, grad2_norms = l2_norm_batch(grad1), l2_norm_batch(grad2)
                grad1_normalized = grad1 / grad1_norms[:, None, None, None]
                grad2_normalized = grad2 / grad2_norms[:, None, None, None]
                cos = torch.sum(grad1_normalized * grad2_normalized, (1, 2, 3))
                reg += args.grad_align_cos_lambda * (1.0 - cos.mean())

            loss += reg

            if epoch != 0:
                opt.zero_grad()
                utils.backward(loss, opt, half_prec)
                opt.step()

            time_train += time.time() - time_start_iter
            train_loss += loss.item() * y.size(0)
            train_reg += reg.item() * y.size(0)
            train_acc += (output.max(1)[1] == y).sum().item()
            train_n += y.size(0)

            with torch.no_grad():  # no grad for the stats
                grad_norm_x += l2_norm_batch(grad).sum().item()
                delta_final = clamp(X + delta, 0, 1) - X  # we should measure delta after the projection onto [0, 1]^d
                avg_delta_l2 += ((delta_final ** 2).sum([1, 2, 3]) ** 0.5).sum().item()

            if iteration % args.eval_iter_freq == 0:
                train_loss, train_reg = train_loss / train_n, train_reg / train_n
                train_acc, avg_delta_l2 = train_acc / train_n, avg_delta_l2 / train_n

                # it'd be incorrect to recalculate the BN stats on the test sets and for clean / adversarial points
                utils.model_eval(model, half_prec)

                test_acc_clean, _, _ = rob_acc(test_batches_fast, model, eps, pgd_alpha, opt, half_prec, 0, 1)
                test_acc_fgsm, test_loss_fgsm, fgsm_deltas = rob_acc(test_batches_fast, model, eps, eps, opt, half_prec, 1, 1, rs=False)
                test_acc_pgd, test_loss_pgd, pgd_deltas = rob_acc(test_batches_fast, model, eps, pgd_alpha, opt, half_prec, args.attack_iters, 1)
                cos_fgsm_pgd = utils.avg_cos_np(fgsm_deltas, pgd_deltas)
                train_acc_pgd, _, _ = rob_acc(train_batches_fast, model, eps, pgd_alpha, opt, half_prec, args.attack_iters, 1)  # needed for early stopping

                grad_x = utils.get_grad_np(model, test_batches_fast, eps, opt, half_prec, rs=False)
                grad_eta = utils.get_grad_np(model, test_batches_fast, eps, opt, half_prec, rs=True)
                cos_x_eta = utils.avg_cos_np(grad_x, grad_eta)

                time_elapsed = time.time() - start_time
                train_str = '[train] loss {:.3f}, reg {:.3f}, acc {:.2%} acc_pgd {:.2%}'.format(train_loss, train_reg, train_acc, train_acc_pgd)
                test_str = '[test] acc_clean {:.2%}, acc_fgsm {:.2%}, acc_pgd {:.2%}, cos_x_eta {:.3}, cos_fgsm_pgd {:.3}'.format(
                    test_acc_clean, test_acc_fgsm, test_acc_pgd, cos_x_eta, cos_fgsm_pgd)
                logger.info('{}-{}: {}  {} ({:.2f}m, {:.2f}m)'.format(epoch, iteration, train_str, test_str,
                                                                      time_train/60, time_elapsed/60))

                if train_acc_pgd > train_acc_pgd_best:  # catastrophic overfitting can be detected on the training set
                    best_state_dict = copy.deepcopy(model.state_dict())
                    train_acc_pgd_best, best_iteration = train_acc_pgd, iteration

                utils.model_train(model, half_prec)
                train_loss, train_reg, train_acc, train_n, grad_norm_x, avg_delta_l2 = 0, 0, 0, 0, 0, 0

            iteration += 1
            X_prev, y_prev = X.clone(), y.clone()  # needed for Free-AT

        if epoch == args.epochs:
            torch.save({'last': model.state_dict(), 'best': best_state_dict}, 'models/{} epoch={}.pth'.format(model_name, epoch))
            # disable global conversion to fp16 from amp.initialize() (https://github.com/NVIDIA/apex/issues/567)
            context_manager = amp.disable_casts() if half_prec else utils.nullcontext()
            with context_manager:
                last_state_dict = copy.deepcopy(model.state_dict())
                half_prec = False  # final eval is always in fp32
                model.load_state_dict(last_state_dict)
                utils.model_eval(model, half_prec)
                opt = torch.optim.SGD(model.parameters(), lr=0)

                attack_iters, n_restarts = (50, 10) if not args.debug else (10, 3)
                test_acc_clean, _, _ = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, 0, 1)
                test_acc_pgd_rr, _, deltas_pgd_rr = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, attack_iters, n_restarts)
                logger.info('[last: test on 10k points] acc_clean {:.2%}, pgd_rr {:.2%}'.format(test_acc_clean, test_acc_pgd_rr))

                if args.eval_early_stopped_model:
                    model.load_state_dict(best_state_dict)
                    utils.model_eval(model, half_prec)
                    test_acc_clean, _, _ = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, 0, 1)
                    test_acc_pgd_rr, _, deltas_pgd_rr = rob_acc(test_batches, model, eps, pgd_alpha, opt, half_prec, attack_iters, n_restarts)
                    logger.info('[best: test on 10k points][iter={}] acc_clean {:.2%}, pgd_rr {:.2%}'.format(
                        best_iteration, test_acc_clean, test_acc_pgd_rr))

        utils.model_train(model, half_prec)

    logger.info('Done in {:.2f}m'.format((time.time() - start_time) / 60))
コード例 #23
0
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', default='data',
                        help='path to datasets')
    parser.add_argument('--data_name', default='f30k',
                        help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path', default='vocab',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin', default=0.2, type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs', default=30, type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size', default=128, type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim', default=300, type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size', default=1024, type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip', default=2., type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--crop_size', default=224, type=int,
                        help='Size of an image crop as the CNN input.')
    parser.add_argument('--num_layers', default=1, type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate', default=2e-4, type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update', default=15, type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers', default=10, type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step', default=100, type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step', default=500, type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name', default='runs/test',
                        help='Path to save the model and Tensorboard log.')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--img_dim', default=2048, type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--finetune', action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--use_restval', action='store_true',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--reset_train', action='store_true',
                        help='Ensure the training is always done in '
                        'train mode (Not recommended).')
    parser.add_argument('--K', default=2, type=int,help='num of JSR.')
    parser.add_argument('--feature_path', default='data/joint-pretrain/flickr30k/region_feat_gvd_wo_bgd/trainval/',
                        type=str, help='path to the pre-computed image features')
    parser.add_argument('--region_bbox_file',
                        default='data/joint-pretrain/flickr30k/region_feat_gvd_wo_bgd/flickr30k_detection_vg_thresh0.2_feat_gvd_checkpoint_trainvaltest.h5',
                        type=str, help='path to the region_bbox_file(.h5)')
    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = pickle.load(open(os.path.join(
        opt.vocab_path, '%s_vocab.pkl' % opt.data_name), 'rb'))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(
        opt.data_name, vocab, opt.crop_size, opt.batch_size, opt.workers, opt)

    # Construct the model
    model = VSE(opt)
    best_rsum = 0
    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})"
                  .format(opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
        del checkpoint
    # Train the Model

    for epoch in range(opt.num_epochs):
        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        save_checkpoint({
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'best_rsum': best_rsum,
            'opt': opt,
            'Eiters': model.Eiters,
        }, is_best, epoch, prefix=opt.logger_name + '/')
コード例 #24
0
ファイル: test.py プロジェクト: vishwerine/vsepp-project
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path',
                        default='/home/dcsaero01/data/datasets/vsepp/',
                        help='path to datasets')
    parser.add_argument('--data_name',
                        default='minicsdv2_precomp',
                        help='{coco,f8k,f30k,10crop}_precomp|coco|f8k|f30k')
    parser.add_argument('--vocab_path',
                        default='./vocab/',
                        help='Path to saved vocabulary pickle files.')
    parser.add_argument('--margin',
                        default=0.2,
                        type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs',
                        default=300,
                        type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size',
                        default=128,
                        type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim',
                        default=300,
                        type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size',
                        default=1024,
                        type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip',
                        default=2.,
                        type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--crop_size',
                        default=224,
                        type=int,
                        help='Size of an image crop as the CNN input.')
    parser.add_argument('--num_layers',
                        default=1,
                        type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate',
                        default=.0002,
                        type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update',
                        default=15,
                        type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument(
        '--dropout_value',
        default=0,
        type=float,
        help='Probability value for dropout after linear layer')
    parser.add_argument('--workers',
                        default=10,
                        type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step',
                        default=10,
                        type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step',
                        default=500,
                        type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name',
                        default='runs/runX',
                        help='Path to save the model and Tensorboard log.')
    parser.add_argument(
        '--resume',
        default=
        '/home/dcsaero01/data/projects/vsepp/runs/minicsdv2/checkpoint.pth.tar',
        type=str,
        metavar='PATH',
        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation',
                        action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim',
                        default=4096,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--text_dim',
                        default=6000,
                        type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--finetune',
                        action='store_true',
                        help='Fine-tune the image encoder.')
    parser.add_argument('--cnn_type',
                        default='vgg19',
                        help="""The CNN used for image encoder
                        (e.g. vgg19, resnet152)""")
    parser.add_argument('--use_restval',
                        action='store_true',
                        help='Use the restval data for training on MSCOCO.')
    parser.add_argument('--measure',
                        default='cosine',
                        help='Similarity measure used (cosine|order)')
    parser.add_argument('--use_abs',
                        action='store_true',
                        help='Take the absolute value of embedding vectors.')
    parser.add_argument('--no_imgnorm',
                        action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--reset_train',
                        action='store_true',
                        help='Ensure the training is always done in '
                        'train mode (Not recommended).')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    if opt.data_name == 'coco_st_precomp' or opt.data_name == 'coco_st_ner_precomp':
        vocab = None
        opt.vocab_size = 0
    else:
        vocab = pickle.load(
            open(os.path.join(opt.vocab_path, '%s_vocab.pkl' % opt.data_name),
                 'rb'))
        opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.crop_size, opt.batch_size,
                                                opt.workers, opt)

    # Construct the model
    model = VSE(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            validate(opt, train_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    """
コード例 #25
0
def run_experiment(xp, xp_count, n_experiments):

    print(xp)
    hp = xp.hyperparameters

    model_fn, optimizer, optimizer_hp = models.get_model(hp["net"])
    optimizer_fn = lambda x: optimizer(
        x, **{k: hp[k] if k in hp else v
              for k, v in optimizer_hp.items()})
    train_data, test_data = data.get_data(hp["dataset"], args.DATA_PATH)
    distill_data = data.get_data(hp["distill_dataset"], args.DATA_PATH)
    distill_data = torch.utils.data.Subset(
        distill_data,
        np.random.permutation(len(distill_data))[:hp["n_distill"]])

    client_loaders, test_loader = data.get_loaders(
        train_data,
        test_data,
        n_clients=hp["n_clients"],
        classes_per_client=hp["classes_per_client"],
        batch_size=hp["batch_size"],
        n_data=None)
    distill_loader = torch.utils.data.DataLoader(distill_data,
                                                 batch_size=128,
                                                 shuffle=False)

    clients = [
        Client(model_fn, optimizer_fn, loader) for loader in client_loaders
    ]
    server = Server(model_fn, lambda x: torch.optim.Adam(x, lr=0.001),
                    test_loader, distill_loader)
    server.load_model(path=args.CHECKPOINT_PATH, name=hp["pretrained"])

    # print model
    models.print_model(server.model)

    # Start Distributed Training Process
    print("Start Distributed Training..\n")
    t1 = time.time()
    for c_round in range(1, hp["communication_rounds"] + 1):

        participating_clients = server.select_clients(clients,
                                                      hp["participation_rate"])

        for client in tqdm(participating_clients):
            client.synchronize_with_server(server)
            train_stats = client.compute_weight_update(hp["local_epochs"])

        if hp["aggregate"]:
            server.aggregate_weight_updates(participating_clients)

        if hp["use_distillation"]:
            server.distill(participating_clients,
                           hp["distill_epochs"],
                           compress=hp["compress"])

        # Logging
        if xp.is_log_round(c_round):
            print("Experiment: {} ({}/{})".format(args.schedule, xp_count + 1,
                                                  n_experiments))

            xp.log({
                'communication_round': c_round,
                'epochs': c_round * hp['local_epochs']
            })
            xp.log({
                key: clients[0].optimizer.__dict__['param_groups'][0][key]
                for key in optimizer_hp
            })

            # Evaluate
            xp.log({
                "client_train_{}".format(key): value
                for key, value in train_stats.items()
            })
            xp.log({
                "server_val_{}".format(key): value
                for key, value in server.evaluate().items()
            })

            # Save results to Disk
            try:
                xp.save_to_disc(path=args.RESULTS_PATH, name=hp['log_path'])
            except:
                print("Saving results Failed!")

            # Timing
            e = int((time.time() - t1) / c_round *
                    (hp['communication_rounds'] - c_round))
            print(
                "Remaining Time (approx.):",
                '{:02d}:{:02d}:{:02d}'.format(e // 3600, (e % 3600 // 60),
                                              e % 60),
                "[{:.2f}%]\n".format(c_round / hp['communication_rounds'] *
                                     100))

    # Save model to disk
    server.save_model(path=args.CHECKPOINT_PATH, name=hp["save_model"])

    # Delete objects to free up GPU memory
    del server
    clients.clear()
    torch.cuda.empty_cache()
コード例 #26
0
def main():
    args = get_args()
    device, dtype = args.device, args.dtype

    train_loader, val_loader = get_loaders(args.dataroot, args.batch_size,
                                           args.batch_size, args.input_size,
                                           args.workers, args.world_size,
                                           args.local_rank)

    model = MnasNet(n_class=args.num_classes,
                    width_mult=args.scaling,
                    drop_prob=0.0,
                    num_steps=len(train_loader) * args.epochs)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    flops = flops_benchmark.count_flops(MnasNet,
                                        1,
                                        device,
                                        dtype,
                                        args.input_size,
                                        3,
                                        width_mult=args.scaling)
    if not args.child:
        print(model)
        print('number of parameters: {}'.format(num_parameters))
        print('FLOPs: {}'.format(flops))

    # define loss function (criterion) and optimizer
    criterion = CrossEntropyLoss()
    mixup = Mixup(args.num_classes, args.mixup, args.smooth_eps)

    model, criterion = model.to(device=device,
                                dtype=dtype), criterion.to(device=device,
                                                           dtype=dtype)
    if args.dtype == torch.float16:
        for module in model.modules():  # FP batchnorm
            if is_bn(module):
                module.to(dtype=torch.float32)

    if args.distributed:
        args.device_ids = [args.local_rank]
        dist.init_process_group(backend=args.dist_backend,
                                init_method=args.dist_init,
                                world_size=args.world_size,
                                rank=args.local_rank)
        model = torch.nn.parallel.DistributedDataParallel(
            model, device_ids=[args.local_rank], output_device=args.local_rank)
        print('Node #{}'.format(args.local_rank))
    else:
        model = torch.nn.parallel.DataParallel(model,
                                               device_ids=[args.local_rank],
                                               output_device=args.local_rank)

    optimizer_class = torch.optim.SGD
    optimizer_params = {
        "lr": args.learning_rate,
        "momentum": args.momentum,
        "weight_decay": args.decay,
        "nesterov": True
    }
    if args.find_clr:
        optimizer = torch.optim.SGD(model.parameters(),
                                    args.learning_rate,
                                    momentum=args.momentum,
                                    weight_decay=args.decay,
                                    nesterov=True)
        find_bounds_clr(model,
                        train_loader,
                        optimizer,
                        criterion,
                        device,
                        dtype,
                        min_lr=args.min_lr,
                        max_lr=args.max_lr,
                        step_size=args.epochs_per_step * len(train_loader),
                        mode=args.mode,
                        save_path=args.save_path)
        return

    if args.sched == 'clr':
        scheduler_class = CyclicLR
        scheduler_params = {
            "base_lr": args.min_lr,
            "max_lr": args.max_lr,
            "step_size": args.epochs_per_step * len(train_loader),
            "mode": args.mode
        }
    elif args.sched == 'multistep':
        scheduler_class = MultiStepLR
        scheduler_params = {"milestones": args.schedule, "gamma": args.gamma}
    elif args.sched == 'cosine':
        scheduler_class = CosineLR
        scheduler_params = {
            "max_epochs": args.epochs,
            "warmup_epochs": args.warmup,
            "iter_in_epoch": len(train_loader)
        }
    elif args.sched == 'gamma':
        scheduler_class = StepLR
        scheduler_params = {"step_size": 30, "gamma": args.gamma}
    else:
        raise ValueError('Wrong scheduler!')

    optim = OptimizerWrapper(model,
                             optimizer_class=optimizer_class,
                             optimizer_params=optimizer_params,
                             scheduler_class=scheduler_class,
                             scheduler_params=scheduler_params,
                             use_shadow_weights=args.dtype == torch.float16)
    best_test = 0

    # optionally resume from a checkpoint
    data = None
    if args.resume:
        if os.path.isfile(args.resume):
            print("=> loading checkpoint '{}'".format(args.resume))
            checkpoint = torch.load(args.resume, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                args.resume, checkpoint['epoch']))
        elif os.path.isdir(args.resume):
            checkpoint_path = os.path.join(
                args.resume, 'checkpoint{}.pth.tar'.format(args.local_rank))
            csv_path = os.path.join(args.resume,
                                    'results{}.csv'.format(args.local_rank))
            print("=> loading checkpoint '{}'".format(checkpoint_path))
            checkpoint = torch.load(checkpoint_path, map_location=device)
            args.start_epoch = checkpoint['epoch'] - 1
            best_test = checkpoint['best_prec1']
            model.load_state_dict(checkpoint['state_dict'])
            optim.load_state_dict(checkpoint['optimizer'])
            print("=> loaded checkpoint '{}' (epoch {})".format(
                checkpoint_path, checkpoint['epoch']))
            data = []
            with open(csv_path) as csvfile:
                reader = csv.DictReader(csvfile)
                for row in reader:
                    data.append(row)
        else:
            print("=> no checkpoint found at '{}'".format(args.resume))

    if args.evaluate:
        loss, top1, top5 = test(model, val_loader, criterion, device, dtype,
                                args.child)  # TODO
        return

    csv_logger = CsvLogger(filepath=args.save_path,
                           data=data,
                           local_rank=args.local_rank)
    csv_logger.save_params(sys.argv, args)

    claimed_acc1 = None
    claimed_acc5 = None
    if args.input_size in claimed_acc_top1:
        if args.scaling in claimed_acc_top1[args.input_size]:
            claimed_acc1 = claimed_acc_top1[args.input_size][args.scaling]
            if not args.child:
                csv_logger.write_text(
                    'Claimed accuracy is {:.2f}% top-1'.format(claimed_acc1 *
                                                               100.))
    train_network(args.start_epoch, args.epochs, optim, model, train_loader,
                  val_loader, criterion, mixup, device, dtype, args.batch_size,
                  args.log_interval, csv_logger, args.save_path, claimed_acc1,
                  claimed_acc5, best_test, args.local_rank, args.child)
コード例 #27
0
    log_interval = 100
    mode = 'triangular2'
    evaluate = 'false'

    model = MobileNet2(input_size=input_size, scale=scaling)
    num_parameters = sum([l.nelement() for l in model.parameters()])
    print(model)


    print('number of parameters: {}'.format(num_parameters))
    print('FLOPs: {}'.format(
        flops_benchmark.count_flops(MobileNet2,
                                    batch_size // len(gpus) if gpus is not None else batch_size,
                                    device, dtype, input_size, 3, scaling)))

    train_loader, val_loader = get_loaders(dataroot, batch_size, batch_size, input_size,
                                           workers)
    # define loss function (criterion) and optimizer
    criterion = torch.nn.CrossEntropyLoss()
    if gpus is not None:
        model = torch.nn.DataParallel(model, gpus)
    model.to(device=device, dtype=dtype)
    criterion.to(device=device, dtype=dtype)

    optimizer = torch.optim.SGD(model.parameters(), learning_rate, momentum=momentum, weight_decay=decay,
                                nesterov=True)
    if args.find_clr:
        find_bounds_clr(model, train_loader, optimizer, criterion, device, dtype, min_lr=min_lr,
                        max_lr=max_lr, step_size=epochs_per_step * len(train_loader), mode=mode,
                        save_path=save_path)
        return
コード例 #28
0
def main():
    # Hyper Parameters
    
    opt = opts.parse_opt()

    device_id = opt.gpuid
    device_count = len(str(device_id).split(","))
    #assert device_count == 1 or device_count == 2
    print("use GPU:", device_id, "GPUs_count", device_count, flush=True)
    os.environ['CUDA_VISIBLE_DEVICES']=str(device_id)
    device_id = 0
    torch.cuda.set_device(0)

    # Load Vocabulary Wrapper
    vocab = deserialize_vocab(os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(
        opt.data_name, vocab, opt.batch_size, opt.workers, opt)

    # Construct the model
    model = SCAN(opt)
    model.cuda()
    model = nn.DataParallel(model)

     # Loss and Optimizer
    criterion = ContrastiveLoss(opt=opt, margin=opt.margin, max_violation=opt.max_violation)
    mse_criterion = nn.MSELoss(reduction="batchmean")
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.learning_rate)

    # optionally resume from a checkpoint
    if not os.path.exists(opt.model_name):
        os.makedirs(opt.model_name)
    start_epoch = 0
    best_rsum = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})"
                  .format(opt.resume, start_epoch, best_rsum))
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    evalrank(model.module, val_loader, opt)

    print(opt, flush=True)
    
    # Train the Model
    for epoch in range(start_epoch, opt.num_epochs):
        message = "epoch: %d, model name: %s\n" % (epoch, opt.model_name)
        log_file = os.path.join(opt.logger_name, "performance.log")
        logging_func(log_file, message)
        print("model name: ", opt.model_name, flush=True)
        adjust_learning_rate(opt, optimizer, epoch)
        run_time = 0
        for i, (images, captions, lengths, masks, ids, _) in enumerate(train_loader):
            start_time = time.time()
            model.train()

            optimizer.zero_grad()

            if device_count != 1:
                images = images.repeat(device_count,1,1)

            score = model(images, captions, lengths, masks, ids)
            loss = criterion(score)

            loss.backward()
            if opt.grad_clip > 0:
                clip_grad_norm_(model.parameters(), opt.grad_clip)
            optimizer.step()
            run_time += time.time() - start_time
            # validate at every val_step
            if i % 100 == 0:
                log = "epoch: %d; batch: %d/%d; loss: %.4f; time: %.4f" % (epoch, 
                            i, len(train_loader), loss.data.item(), run_time / 100)
                print(log, flush=True)
                run_time = 0
            if (i + 1) % opt.val_step == 0:
                evalrank(model.module, val_loader, opt)

        print("-------- performance at epoch: %d --------" % (epoch))
        # evaluate on validation set
        rsum = evalrank(model.module, val_loader, opt)
        #rsum = -100
        filename = 'model_' + str(epoch) + '.pth.tar'
        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        save_checkpoint({
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'best_rsum': best_rsum,
            'opt': opt,
        }, is_best, filename=filename, prefix=opt.model_name + '/')
コード例 #29
0
def main(opt):
    logging.basicConfig(format='%(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Construct the model
    model = UDAG(opt)

    # optionally resume from a checkpoint
    if opt.evaluation:
        val_loader = data.get_test_loader(opt.data_name, opt.batch_size,
                                          opt.workers, opt)

        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            # Eiters is used to show logs as the continuation of another training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(
                opt.resume, start_epoch, best_rsum))
            _, sims = validate(opt, val_loader, model)
            np.save(opt.data_name + '_sims', sims)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    # if opt.resume:
    #     if os.path.isfile(opt.resume):
    #         print("=> loading checkpoint '{}'".format(opt.resume))
    #         checkpoint = torch.load(opt.resume)
    #         start_epoch = checkpoint['epoch']
    #         best_rsum = checkpoint['best_rsum']
    #         model.load_state_dict(checkpoint['model'])
    #         # Eiters is used to show logs as the continuation of another training
    #         model.Eiters = checkpoint['Eiters']
    #         print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})".format(opt.resume, start_epoch, best_rsum))
    #         validate(opt, val_loader, model)
    #     else:
    #         print("=> no checkpoint found at '{}'".format(opt.resume))
    else:
        # Train the Model
        # Load data loaders
        train_loader, val_loader = data.get_loaders(opt.data_name,
                                                    opt.batch_size,
                                                    opt.workers, opt)
        best_rsum = 0
        for epoch in range(opt.num_epochs):
            adjust_learning_rate(opt, model.optimizer, epoch)

            # rsum = validate(opt, val_loader, model)

            # train for one epoch
            train(opt, train_loader, model, epoch, val_loader)

            # evaluate on validation set
            rsum = validate(opt, val_loader, model)

            # remember best R@ sum and save checkpoint
            is_best = rsum > best_rsum
            best_rsum = max(rsum, best_rsum)
            save_checkpoint(
                {
                    'epoch': epoch + 1,
                    'model': model.state_dict(),
                    'best_rsum': best_rsum,
                    'opt': opt,
                    'Eiters': model.Eiters,
                },
                is_best,
                prefix=opt.logger_name + '_' + opt.model_name + '/')
コード例 #30
0
ファイル: train.py プロジェクト: yangxiao2/GSMN
def main():
    # Hyper Parameters
    parser = argparse.ArgumentParser()
    parser.add_argument('--data_path', default='./data/',
                        help='path to datasets')
    parser.add_argument('--data_name', default='precomp',
                        help='{coco,f30k}_precomp')
    parser.add_argument('--vocab_path', default='./vocab/',
                        help='Path to saved vocabulary json files.')
    parser.add_argument('--margin', default=0.2, type=float,
                        help='Rank loss margin.')
    parser.add_argument('--num_epochs', default=30, type=int,
                        help='Number of training epochs.')
    parser.add_argument('--batch_size', default=128, type=int,
                        help='Size of a training mini-batch.')
    parser.add_argument('--word_dim', default=300, type=int,
                        help='Dimensionality of the word embedding.')
    parser.add_argument('--embed_size', default=1024, type=int,
                        help='Dimensionality of the joint embedding.')
    parser.add_argument('--grad_clip', default=2., type=float,
                        help='Gradient clipping threshold.')
    parser.add_argument('--num_layers', default=1, type=int,
                        help='Number of GRU layers.')
    parser.add_argument('--learning_rate', default=.0002, type=float,
                        help='Initial learning rate.')
    parser.add_argument('--lr_update', default=15, type=int,
                        help='Number of epochs to update the learning rate.')
    parser.add_argument('--workers', default=10, type=int,
                        help='Number of data loader workers.')
    parser.add_argument('--log_step', default=100, type=int,
                        help='Number of steps to print and record the log.')
    parser.add_argument('--val_step', default=15000, type=int,
                        help='Number of steps to run validation.')
    parser.add_argument('--logger_name', default='./runs/runX/log',
                        help='Path to save Tensorboard log.')
    parser.add_argument('--model_name', default='./runs/runX/checkpoint',
                        help='Path to save the models.')
    parser.add_argument('--resume', default='', type=str, metavar='PATH',
                        help='path to latest checkpoint (default: none)')
    parser.add_argument('--max_violation', action='store_true',
                        help='Use max instead of sum in the rank loss.')
    parser.add_argument('--img_dim', default=2048, type=int,
                        help='Dimensionality of the image embedding.')
    parser.add_argument('--no_imgnorm', action='store_true',
                        help='Do not normalize the image embeddings.')
    parser.add_argument('--no_txtnorm', action='store_true',
                        help='Do not normalize the text embeddings.')
    parser.add_argument('--bi_gru', action='store_true',
                        help='Use bidirectional GRU.')
    parser.add_argument('--lambda_softmax', default=9., type=float,
                        help='Attention softmax temperature.')
    parser.add_argument('--feat_dim', default=16, type=int,
                        help='Dimensionality of the similarity embedding.')
    parser.add_argument('--num_block', default=16, type=int,
                        help='Dimensionality of the similarity embedding.')
    parser.add_argument('--hid_dim', default=32, type=int,
                        help='Dimensionality of the hidden state during graph convolution.')
    parser.add_argument('--out_dim', default=1, type=int,
                        help='Dimensionality of the hidden state during graph convolution.')
    parser.add_argument('--is_sparse', action='store_true',
                        help='Whether models the text as a fully connected graph.')

    opt = parser.parse_args()
    print(opt)

    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = deserialize_vocab(os.path.join(
        opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(
        opt.data_name, vocab, opt.batch_size, opt.workers, opt)

    # Construct the models
    model = GSMN(opt)

    # optionally resume from a checkpoint
    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['models'])
            # Eiters is used to show logs as the continuation of another
            # training
            model.Eiters = checkpoint['Eiters']
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})"
                  .format(opt.resume, start_epoch, best_rsum))
            validate(opt, val_loader, model)
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))

    # Train the Model
    best_rsum = 0
    for epoch in range(opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        rsum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint({
            'epoch': epoch + 1,
            'models': model.state_dict(),
            'best_rsum': best_rsum,
            'opt': opt,
            'Eiters': model.Eiters,
        }, is_best, filename='checkpoint_{}.pth.tar'.format(epoch), prefix=opt.model_name + '/')
コード例 #31
0
ファイル: test.py プロジェクト: nikitadhawan/arm
def test(args, eval_on):

    # Cuda
    args.device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    tags = ['supervised', f'{args.dataset}', f'use_context_{args.use_context}']

    if args.debug:
        tags.append('debug')

    if args.log_wandb:
        wandb.init(name=args.experiment_name,
                   project=f"arm_test_{args.dataset}",
                   tags=tags,
                   reinit=True)
        wandb.config.update(args, allow_val_change=True)

    # Get data
    train_loader, train_eval_loader, val_loader, test_loader = data.get_loaders(
        args)
    args.n_groups = train_loader.dataset.n_groups

    if args.seed is not None:
        print('setting seed', args.seed)
        torch.manual_seed(args.seed)
        if args.cuda:
            torch.cuda.manual_seed(args.seed)
        np.random.seed(args.seed)
        random.seed(args.seed)

    if args.binning:
        corner_dists = np.array(
            get_one_hot(range(args.n_groups), args.n_groups))
        binned_groups = get_binned_dists(args.n_test_dists)
        if eval_on == 'train':
            empirical_dist = [train_loader.dataset.group_dist]
        elif eval_on == 'val':
            empirical_dist = [val_loader.dataset.group_dist]
        elif eval_on == 'test':
            empirical_dist = [test_loader.dataset.group_dist]
        eval_dists = [corner_dists, binned_groups, empirical_dist]

    else:
        corner_dists = np.array(
            get_one_hot(range(args.n_groups), args.n_groups))
        if eval_on == 'train':
            empirical_dist = [train_loader.dataset.group_dist]
        elif eval_on == 'val':
            empirical_dist = [val_loader.dataset.group_dist]
        elif eval_on == 'test':
            empirical_dist = [test_loader.dataset.group_dist]

        if args.n_test_dists > 0:
            random_dists = np.random.dirichlet(np.ones(args.n_groups),
                                               size=args.n_test_dists)
            eval_dists = np.concatenate(
                [corner_dists, random_dists, empirical_dist])
        else:
            eval_dists = np.concatenate([corner_dists, empirical_dist])

    # Get model
    model = utils.get_model(args, image_shape=train_loader.dataset.image_shape)
    state_dict = torch.load(args.ckpt_path)

    # Remove unnecessary weights from the model.
    new_state_dict = state_dict[0]
    if "context_net.classifier.weight" in new_state_dict:
        del new_state_dict["context_net.classifier.weight"]

    if "context_net.classifier.bias" in new_state_dict:
        del new_state_dict["context_net.classifier.bias"]

    new_state_dict_2 = {}
    for key in new_state_dict.keys():
        new_key = key.replace('module.', '')
        new_state_dict_2[new_key] = new_state_dict[key]
    new_state_dict = new_state_dict_2

    model.load_state_dict(new_state_dict)

    model = model.to(args.device)
    model.eval()

    # Train
    if args.eval_deterministic:
        if eval_on == 'train':
            stats = utils.evaluate_each_corner(args,
                                               model,
                                               train_eval_loader,
                                               split='train')
        elif eval_on == 'val':
            stats = utils.evaluate_each_corner(args,
                                               model,
                                               val_loader,
                                               split='val')
        elif eval_on == 'test':
            stats = utils.evaluate_each_corner(args,
                                               model,
                                               test_loader,
                                               split='test')

    else:
        if eval_on == 'train':
            worst_case_acc, avg_acc, empirical_case_acc, stats = utils.evaluate_mixtures(
                args, model, train_eval_loader, eval_dists, split='train')
        elif eval_on == 'val':
            worst_case_acc, avg_acc, empirical_case_acc, stats = utils.evaluate_mixtures(
                args, model, val_loader, eval_dists, split='val')
        elif eval_on == 'test':
            worst_case_acc, avg_acc, empirical_case_acc, stats = utils.evaluate_mixtures(
                args, model, test_loader, eval_dists, split='test')

    return stats