def __init__(self, config):

        # Config
        self.config = config

        self.start = 0  # Unless using pre-trained model

        # Create directories if not exist
        utils.make_folder(self.config.save_path)
        utils.make_folder(self.config.model_weights_path)
        utils.make_folder(self.config.sample_images_path)

        # Copy files
        utils.write_config_to_file(self.config, self.config.save_path)
        utils.copy_scripts(self.config.save_path)

        # Check for CUDA
        utils.check_for_CUDA(self)

        # Make dataloader
        self.dataloader, self.num_of_classes = utils.make_dataloader(
            self.config.batch_size_in_gpu, self.config.dataset,
            self.config.data_path, self.config.shuffle, self.config.drop_last,
            self.config.dataloader_args, self.config.resize,
            self.config.imsize, self.config.centercrop,
            self.config.centercrop_size)

        # Data iterator
        self.data_iter = iter(self.dataloader)

        # Build G and D
        self.build_models()

        if self.config.adv_loss == 'dcgan':
            self.criterion = nn.BCELoss()
Esempio n. 2
0
def main():
    args = get_args()
    torch.manual_seed(args.seed)

    shape = (224, 224, 3)
    """ define dataloader """
    train_loader, valid_loader, test_loader = make_dataloader(args)
    """ define model architecture """
    model = get_model(args, shape, args.num_classes)

    if torch.cuda.device_count() >= 1:
        print('Model pushed to {} GPU(s), type {}.'.format(
            torch.cuda.device_count(), torch.cuda.get_device_name(0)))
        model = model.cuda()
    else:
        raise ValueError('CPU training is not supported')
    """ define loss criterion """
    criterion = nn.CrossEntropyLoss().cuda()
    """ define optimizer """
    optimizer = make_optimizer(args, model)
    """ define learning rate scheduler """
    scheduler = make_scheduler(args, optimizer)
    """ define trainer, evaluator, result_dictionary """
    result_dict = {
        'args': vars(args),
        'epoch': [],
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'test_acc': []
    }
    trainer = Trainer(model, criterion, optimizer, scheduler)
    evaluator = Evaluator(model, criterion)

    if args.evaluate:
        """ load model checkpoint """
        model.load()
        result_dict = evaluator.test(test_loader, args, result_dict)
    else:
        evaluator.save(result_dict)
        """ define training loop """
        for epoch in range(args.epochs):
            result_dict['epoch'] = epoch
            result_dict = trainer.train(train_loader, epoch, args, result_dict)
            result_dict = evaluator.evaluate(valid_loader, epoch, args,
                                             result_dict)
            evaluator.save(result_dict)
            plot_learning_curves(result_dict, epoch, args)

        result_dict = evaluator.test(test_loader, args, result_dict)
        evaluator.save(result_dict)
        """ save model checkpoint """
        model.save()

    print(result_dict)
Esempio n. 3
0
def main(args):
    torch.manual_seed(0)

    train_fname = args.dataset_path + 'train.csv'
    test_fname = args.dataset_path + 'test.csv'

    dataloaders = make_dataloader(train_fname, test_fname)

    model = VDCNN(depth=args.depth, num_class=args.num_class)

    run_model(model, dataloaders, args.num_epochs)
Esempio n. 4
0
    def __init__(self, config):

        # Images data path & Output path
        self.dataset = config.dataset
        self.data_path = config.data_path
        self.save_path = os.path.join(config.save_path, config.name)

        # Training settings
        self.batch_size = config.batch_size
        self.total_step = config.total_step
        self.d_steps_per_iter = config.d_steps_per_iter
        self.g_steps_per_iter = config.g_steps_per_iter
        self.d_lr = config.d_lr
        self.g_lr = config.g_lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.inst_noise_sigma = config.inst_noise_sigma
        self.inst_noise_sigma_iters = config.inst_noise_sigma_iters
        self.start = 0  # Unless using pre-trained model

        # Image transforms
        self.shuffle = config.shuffle
        self.drop_last = config.drop_last
        self.resize = config.resize
        self.imsize = config.imsize
        self.centercrop = config.centercrop
        self.centercrop_size = config.centercrop_size
        self.tanh_scale = config.tanh_scale
        self.normalize = config.normalize

        # Step size
        self.log_step = config.log_step
        self.sample_step = config.sample_step
        self.model_save_step = config.model_save_step
        self.save_n_images = config.save_n_images
        self.max_frames_per_gif = config.max_frames_per_gif

        # Pretrained model
        self.pretrained_model = config.pretrained_model

        # Misc
        self.manual_seed = config.manual_seed
        self.disable_cuda = config.disable_cuda
        self.parallel = config.parallel
        self.dataloader_args = config.dataloader_args

        # Output paths
        self.model_weights_path = os.path.join(self.save_path,
                                               config.model_weights_dir)
        self.sample_path = os.path.join(self.save_path, config.sample_dir)

        # Model hyper-parameters
        self.adv_loss = config.adv_loss
        self.z_dim = config.z_dim
        self.g_conv_dim = config.g_conv_dim
        self.d_conv_dim = config.d_conv_dim
        self.lambda_gp = config.lambda_gp

        # Model name
        self.name = config.name

        # Create directories if not exist
        utils.make_folder(self.save_path)
        utils.make_folder(self.model_weights_path)
        utils.make_folder(self.sample_path)

        # Copy files
        utils.write_config_to_file(config, self.save_path)
        utils.copy_scripts(self.save_path)

        # Check for CUDA
        utils.check_for_CUDA(self)

        # Make dataloader
        self.dataloader, self.num_of_classes = utils.make_dataloader(
            self.batch_size, self.dataset, self.data_path, self.shuffle,
            self.drop_last, self.dataloader_args, self.resize, self.imsize,
            self.centercrop, self.centercrop_size)

        # Data iterator
        self.data_iter = iter(self.dataloader)

        # Build G and D
        self.build_models()

        # Start with pretrained model (if it exists)
        if self.pretrained_model != '':
            utils.load_pretrained_model(self)

        if self.adv_loss == 'dcgan':
            self.criterion = nn.BCELoss()
Esempio n. 5
0
def main():
    global args
    args = get_config()
    args.commond = 'python ' + ' '.join(sys.argv)

    # Create saving directory
    if args.unigen:
        save_dir = './results_unigen/{0}/G{1}_glr{2}_dlr{3}_dstep{4}_zdim{5}_{6}/'.format(
            args.dataset, args.dec_dist, str(args.lr), str(args.lr_d),
            str(args.d_steps_per_iter), str(args.latent_dim), args.div)
    else:
        save_dir = './results/{0}/E{1}_G{2}_glr{3}_dlr{4}_gstep{5}_dstep{6}_zdim{7}_{8}/'.format(
            args.dataset, args.enc_dist, args.dec_dist, str(args.lr),
            str(args.lr_d), str(args.g_steps_per_iter),
            str(args.d_steps_per_iter), str(args.latent_dim), args.div)

    utils.make_folder(save_dir)
    utils.write_config_to_file(args, save_dir)

    global device
    device = torch.device('cuda')

    random.seed(args.seed)
    torch.manual_seed(args.seed)
    torch.cuda.manual_seed(args.seed)

    # Load datasets
    train_loader, test_loader = utils.make_dataloader(args)

    num_samples = len(train_loader.dataset)
    global num_iter_per_epoch
    num_iter_per_epoch = num_samples // args.batch_size

    # Losses file
    log_file_name = os.path.join(save_dir, 'log.txt')
    global log_file
    if args.resume:
        log_file = open(log_file_name, "at")
    else:
        log_file = open(log_file_name, "wt")

    # Build model
    if args.unigen:
        if args.dataset == 'mnist_stack':
            model = DCDecoder(args.latent_dim, 64, args.image_size, 3,
                              args.dec_dist)
            discriminator = DCDiscriminator(args.d_conv_dim, args.image_size)
        else:
            model = Generator(args.latent_dim, args.g_conv_dim,
                              args.image_size)
            discriminator = Discriminator(args.d_conv_dim, args.image_size)
        encoder_optimizer = None
        decoder_optimizer = optim.Adam(model.parameters(),
                                       lr=args.lr,
                                       betas=(args.beta1, args.beta2))
        D_optimizer = optim.Adam(discriminator.parameters(),
                                 lr=args.lr_d,
                                 betas=(args.beta1, args.beta2))

    else:
        if args.dataset == 'mog':
            model = ToyAE(data_dim=2,
                          latent_dim=args.latent_dim,
                          enc_hidden_dim=500,
                          dec_hidden_dim=500,
                          enc_dist=args.enc_dist,
                          dec_dist=args.dec_dist)
            discriminator = DiscriminatorMLP(data_dim=2,
                                             latent_dim=args.latent_dim,
                                             hidden_dim_x=400,
                                             hidden_dim_z=400,
                                             hidden_dim=400)
        elif args.dataset in ['mnist', 'mnist_stack']:
            image_channel = 3 if args.dataset == 'mnist_stack' else 1
            tanh = args.prior == 'uniform' and args.enc_dist == 'deterministic'
            model = DCAE(args.latent_dim, 64, args.image_size, image_channel,
                         args.enc_dist, args.dec_dist, tanh)
            discriminator = DCJointDiscriminator(args.latent_dim, 64,
                                                 args.image_size,
                                                 image_channel,
                                                 args.dis_fc_size)
        else:
            model = BGM(args.latent_dim, args.g_conv_dim, args.image_size, 3,
                        args.enc_dist, args.enc_arch, args.enc_fc_size,
                        args.enc_noise_dim, args.dec_dist)
            discriminator = BigJointDiscriminator(args.latent_dim,
                                                  args.d_conv_dim,
                                                  args.image_size,
                                                  args.dis_fc_size)
        encoder_optimizer = optim.Adam(model.encoder.parameters(),
                                       lr=args.lr,
                                       betas=(args.beta1, args.beta2))
        decoder_optimizer = optim.Adam(model.decoder.parameters(),
                                       lr=args.lr,
                                       betas=(args.beta1, args.beta2))
        D_optimizer = optim.Adam(discriminator.parameters(),
                                 lr=args.lr_d,
                                 betas=(args.beta1, args.beta2))

    # Load model from checkpoint
    if args.resume:
        ckpt_dir = args.ckpt_dir if args.ckpt_dir != '' else save_dir + 'model' + str(
            args.start_epoch - 1) + '.sav'
        checkpoint = torch.load(ckpt_dir)
        model.load_state_dict(checkpoint['model'])
        discriminator.load_state_dict(checkpoint['discriminator'])
        del checkpoint

    model = nn.DataParallel(model.to(device))
    discriminator = nn.DataParallel(discriminator.to(device))

    # Fixed noise from prior p_z for generating from G
    global fixed_noise
    if args.prior == 'gaussian':
        fixed_noise = torch.randn(args.save_n_samples,
                                  args.latent_dim,
                                  device=device)
    else:
        fixed_noise = torch.rand(
            args.save_n_samples, args.latent_dim, device=device) * 2 - 1

    # Train
    for i in range(args.start_epoch, args.start_epoch + args.n_epochs):
        train_age(i, model, discriminator, encoder_optimizer,
                  decoder_optimizer, D_optimizer, train_loader,
                  args.print_every, save_dir, args.sample_every, test_loader)
        if i % args.save_model_every == 0:
            torch.save(
                {
                    'model': model.module.state_dict(),
                    'discriminator': discriminator.module.state_dict()
                }, save_dir + 'model' + str(i) + '.sav')
def main():
    # experiment settings via command line
    args = parse_arguments()
    # setup logging
    experiment_name = 'random' if args.random_acq else args.acq_func_ID
    logdir = './logs/{}/seed{}'.format(experiment_name, args.seed)
    os.makedirs('./logs/{}/checkpts/'.format(experiment_name), exist_ok=True) # make dir for saving models at checkpoints
    # TensorBoard logging
    writer1 = SummaryWriter(log_dir=logdir+'-1')
    writer2 = SummaryWriter(log_dir=logdir+'-2')
    writers = [writer1, writer2]
    # python logging
    logging.basicConfig(filename='./logs/{}/seed{}.log'.format(
        experiment_name, args.seed), level=logging.INFO)
    logging.getLogger().addHandler(logging.StreamHandler()) # makes messages print to stderr, too
    logging.info('Running experiment with the following settings:')
    for arg in vars(args):
        logging.info('{}: {}'.format(arg, getattr(args, arg)))
    # for reproducibility
    torch.manual_seed(args.seed)
    np.random.seed(args.seed)
    random.seed(args.seed)

    # load data
    train_data, test_data = load_data(args)
    # get idx of pretrain and validation data from train data
    pool_idx = set(range(len(train_data)))
    pretrain_idx, pool_idx = balanced_sample(train_data, n_classes=10, k=2, 
        idx_possible=pool_idx)
    valid_idx, pool_idx = balanced_sample(train_data, n_classes=10, k=10, 
        idx_possible=pool_idx) # Gal doesn't mention if validation set is balanced
    acq_idx = pretrain_idx.copy() # first 20 acquisitions are the pretraining data
    log_acquisitions(pretrain_idx, train_data, mean_info_gain=None, i_round=0,
                     writers=writers, cumulative_acqs=0)
    
    assert len(pretrain_idx) == 20
    assert len(valid_idx) == 100
    assert len(pool_idx) == len(train_data) - 120

    # make dataloaders
    train_loader = make_dataloader(train_data, args.train_batch_size, idx=acq_idx, random=True)
    valid_loader = make_dataloader(train_data, args.valid_batch_size, idx=valid_idx)
    test_loader  = make_dataloader(test_data, args.test_batch_size) # pytorch MNIST example shuffles test set, but seems unnecessary
    
    # pretraining
    # repeat for various choices of lambda:
    #    initial training on 20 points (random but *balanced*)
    #    compute validation error on 100 points
    # select lamba/model with lowest validation error
    weight_decay = compute_weight_decay(train_loader, valid_loader, args, writers, i_round=0)
    # weight_decay = 1.0
    writer1.add_scalar('optimal_lambda', weight_decay, 0)
    model = BayesianCNN()
    optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=weight_decay)
    fit(model, optimizer, train_loader, test_loader, args, writers, i_round=0) # do pretraining on 20 examples (not quite clear if Gal does this here, but I think so)
    torch.save(model.state_dict(), './logs/{}/checkpts/model_0.pt'.format(
        experiment_name))    
    
    # for short code tests: [also use --rounds 2 --epochs 1 --dropout_samples 10]
    # pool_idx.difference_update(set(range(100,60000)))
    for i_round in range(1, args.rounds + 1):
        logging.info('\nBEGIN ROUND {}\n'.format(i_round))
        # acquire args.acqs_per_round points from train_data according to acq_func
        new_idx, mean_info_gain = make_acquisitions(train_data, pool_idx, model, args)        
        acq_idx.update(new_idx)
        pool_idx.difference_update(new_idx)
        # log some data about those acquisitions
        n_acqs_previous = args.acqs_pretrain + (i_round-1) * args.acqs_per_round
        log_acquisitions(new_idx, train_data, mean_info_gain, i_round, writers, cumulative_acqs=n_acqs_previous)
        # build new train_loader using updated acq_idx
        train_loader = make_dataloader(train_data, args.train_batch_size, idx=acq_idx, random=True)
        # reoptimize weight decay given updated, larger training set. Unclear if Gal does this, but seems natural
        weight_decay = compute_weight_decay(train_loader, valid_loader, args, writers, i_round)
        writer1.add_scalar('optimal_lambda', weight_decay, i_round)
        # reinitalise model
        model = BayesianCNN()
        optimizer = optim.Adam(model.parameters(), lr=args.lr, weight_decay=weight_decay)
        # train model to convergence on all points acquired so far, computing test error as we go
        oldw1 = list(model.parameters())[0][0][0][0][0].item()
        fit(model, optimizer, train_loader, test_loader, args, writers, i_round)
        torch.save(model.state_dict(), './logs/{}/checkpts/model_{}.pt'.format(
            experiment_name, i_round))
        neww1 = list(model.parameters())[0][0][0][0][0].item()
        assert oldw1 != neww1, "fit(.) didn't update model parameters"
Esempio n. 7
0
    utils.check_for_CUDA(args)

    # Load pth
    pth_dir_name = os.path.dirname(args.pth)
    print("Loading model", os.path.join(pth_dir_name, 'model.pth'))
    model = torch.load(os.path.join(pth_dir_name, 'model.pth'))
    print("Loading model state dict", args.pth)
    model.load_state_dict(torch.load(args.pth))
    model = model.to(args.device)

    # Make dataloader with Eval parameters
    print("Making dataloader")
    args.valid_split = 0
    args.centercrop = False
    args.shuffle = False
    args.drop_last = False
    eval_loader = utils.make_dataloader(args)

    # # Visualize
    # inputs, classes = next(iter(eval_loader))
    # import torchvision.utils
    # out = torchvision.utils.make_grid(inputs)
    # utils.imshow(out)
    # # Visualize end

    # Evaluate
    eval(args, model, eval_loader)

# EVAL
# python eval.py --eval --pth '/home/voletiv/EXPERIMENTS/CnD_experiments/20190208_223808_cnd_kaggle_pt_UNfreeze_ES/model_epoch_0136_batch_00000_of_00141.pth' --data_path '/home/voletiv/Datasets/CatsAndDogs/testset' --out_path '/home/voletiv/EXPERIMENTS/CnD_experiments/20190208_223808_cnd_kaggle_pt_UNfreeze_ES/' --no-cuda
Esempio n. 8
0
         'l1_channel': [0, 1e-4, 1e-3, 1e-2],
         'l1_spatial': [0, 1e-4, 1e-3, 1e-2],
         'l2': [0]
     }
 device = torch.device(
     'cuda' if use_cuda and torch.cuda.is_available() else 'cpu')
 torch.backends.cudnn.deterministic = True
 train_set = load_dataset(data_file, 'data_train', 'labels_train')
 valid_set = load_dataset(data_file, 'data_valid', 'labels_valid')
 test_set = load_dataset(data_file, 'data_test', 'labels_test')
 var_noise = load_var_noise(data_file, 'var_noise')
 input_channel, input_size = train_set.tensors[0].size(
     1), train_set.tensors[0].size(2)
 output_size = train_set.tensors[1].size(1)
 valid_loader = make_dataloader(valid_set,
                                'valid_set',
                                batch_size=max(param_grid['batch_size']))
 test_loader = make_dataloader(test_set,
                               'test_set',
                               batch_size=max(param_grid['batch_size']))
 criterion = nn.MSELoss()
 best_valid_RMSE = np.full(1, np.inf)
 for grid in ParameterGrid(param_grid):
     print(f"===> Hyper-parameters = {grid}:")
     # random.seed(seed)
     # numpy.random.seed(seed)
     torch.manual_seed(seed)
     torch.cuda.manual_seed_all(seed)
     importlib.reload(model)
     net = getattr(model,
                   model_name.upper())(input_channel=input_channel,
Esempio n. 9
0
from parameters import get_params

if __name__ == '__main__':

    # Get all parameters
    args = get_params()
    args.command = 'python ' + ' '.join(sys.argv)

    # CUDA
    utils.check_for_CUDA(args)

    # Seed
    torch.manual_seed(args.seed)

    # IMAGES DATALOADER
    train_loader, valid_loader = utils.make_dataloader(args)

    print(args)

    # OUT PATH
    if not os.path.exists(args.out_path):
        print("Making", args.out_path)
        os.makedirs(args.out_path)

    # Copy all scripts
    utils.copy_scripts(args.out_path)

    # Save all args
    utils.write_config_to_file(args, args.out_path)

    # MODEL
Esempio n. 10
0
def train_one(model,
              data,
              param,
              weight,
              first_layer_no_learn=False,
              show_every=1,
              return_model=False):

    tic = time.time()
    batch_size = param['batch_size']
    lr = param['lr']
    l1 = param['l1']
    l2 = param['l2']
    max_epoch = param['max_epoch']
    seed = param['seed']

    if seed != -1:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    input_channel, input_size = data[0].shape[1], data[0].shape[2]
    output_size = data[1].shape[0]
    train_loader = make_dataloader(data[0],
                                   data[1],
                                   batch_size=batch_size,
                                   is_train=True)
    valid_loader = make_dataloader(data[2],
                                   data[3],
                                   batch_size=batch_size,
                                   is_train=False)
    test_loader = make_dataloader(data[4],
                                  data[5],
                                  batch_size=batch_size,
                                  is_train=False)
    best_valCC = 0
    best_model = None

    if first_layer_no_learn:
        model = init_CNN(model, weight)
        optimizer = Adam([{
            'params': model.conv.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                         lr=lr,
                         l1=l1,
                         weight_decay=l2,
                         amsgrad=True)
    else:
        optimizer = Adam(model.parameters(),
                         lr=lr,
                         l1=l1,
                         weight_decay=l2,
                         amsgrad=True)
    loss = []
    val_corr = []
    for epoch in range(max_epoch):
        if (epoch + 1) % show_every == 0:
            print(f"Epoch {epoch + 1}:")
        loss.append(train(model, train_loader, optimizer))
        valid_CC = test(model, valid_loader, 'Validation')[1]
        valid_CC = sum(valid_CC) / len(valid_CC)
        val_corr.append(valid_CC)
        if (epoch + 1) % show_every == 0:
            print(valid_CC)
        if valid_CC > best_valCC:
            #recover the best model by validation set
            best_valCC = valid_CC
            del best_model
            best_model = copy.deepcopy(model)

    print("Done Training")
    res = test(best_model, test_loader, 'Test')
    test_corr = res[1]
    pred = res[-1]
    test_corr = sum(test_corr) / len(test_corr)
    print(test_corr)
    torch.cuda.empty_cache()
    print("Finished.")
    if return_model:
        return best_model, test_corr, loss, val_corr, pred
    else:
        return test_coic, loss, val_corr, pred
Esempio n. 11
0
def make_acquisitions(train_data, pool_idx, model, args):
    """
    Chooses pool points that maximise the acquisition function given in args.acq_func
    (Or acquires points uniformly at random if args.random_acq == True.)
    Returns indices of the top `args.acqs_per_round` points from the pool.
    The elements of train_data at pool_idx are the "pool points".

    Parameters
    ----------
    train_data: torch.utils.data.Dataset
        PyTorch dataset, superset of the pool data
    pool_idx: set
        indices specifying which points in train_data are in the pool
    model: torch.nn.Module
        find information gained about this PyTorch model
    args: Namespace object
        experiment arguments from argparse, including acq_func

    Returns
    -------
    new_idx: set
        indices of pool points which maximise acquisition function
        len(new_idx) = args.acqs_per_round
    mean_info: float (or None)
        the mean "informativenss" of these points, measuring using the given
        acqusition function (entropy, variation ratio or standard deviation)
    """
    if args.random_acq:
        new_idx = set(random.sample(
            pool_idx,
            k=args.acqs_per_round))  # random sample without replacement
        mean_info = None
    else:
        best_ent_idx = np.zeros(
            shape=(0, 2),
            dtype=np.float64)  # array for storing top 10 (entropy, idx) pairs
        start = time.time()
        pool_loader = make_dataloader(train_data,
                                      args.test_batch_size,
                                      idx=pool_idx)  # note 1
        with torch.no_grad():
            for data, _, idx in pool_loader:
                logging.info(
                    'Computing info gain for points with (original) indices {}-{} in pool'
                    .format(idx[0], idx[-1]))
                logprobs = model.forward_stochastic(
                    data, k=args.dropout_samples).double(
                    )  # do entropy calcs in double precision
                # model outputs logprobs (final layer is log_softmax(.))
                # this is for numerical stability in softmax computation
                # convert these back to probs to do entropy calculations
                probs = logprobs.exp()
                info = args.acq_func(probs)
                # add new (entropy, index) tuples to array of best so far
                new_ent_idx = np.column_stack((info, idx))
                all_ent_idx = np.concatenate((new_ent_idx, best_ent_idx),
                                             axis=0)
                # sort by entropy and take top 10 so far
                sorted_ind = all_ent_idx[:, 0].argsort()
                best_ent_idx = all_ent_idx[sorted_ind][-args.acqs_per_round:]

        assert best_ent_idx.shape == (args.acqs_per_round, 2)
        end = time.time()
        logging.info("Time taken for {} acquisitions: {:.1f}s".format(
            args.acqs_per_round, end - start))
        new_idx = set(best_ent_idx[:, 1].astype(int))
        mean_info = best_ent_idx[:, 0].mean()
    return new_idx, mean_info
Esempio n. 12
0
def main():
    args = get_args()
    torch.manual_seed(args.seed)

    shape = (224, 224, 3)
    """ define dataloader """
    train_loader, valid_loader, test_loader = make_dataloader(args)
    """ define model architecture """
    model = get_model(args, shape, args.num_classes)

    if torch.cuda.device_count() >= 1:
        print('Model pushed to {} GPU(s), type {}.'.format(
            torch.cuda.device_count(), torch.cuda.get_device_name(0)))
        model = model.cuda()
    else:
        raise ValueError('CPU training is not supported')
    """ define loss criterion """
    criterion = nn.CrossEntropyLoss().cuda()
    """ define optimizer """
    optimizer = make_optimizer(args, model)
    """ define learning rate scheduler """
    scheduler = make_scheduler(args, optimizer)
    """ define loss scaler for automatic mixed precision """
    scaler = torch.cuda.amp.GradScaler()
    """ define trainer, evaluator, result_dictionary """
    result_dict = {
        'args': vars(args),
        'epoch': [],
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'test_acc': []
    }
    trainer = Trainer(model, criterion, optimizer, scheduler, scaler)
    evaluator = Evaluator(model, criterion)

    train_time_list = []
    valid_time_list = []

    if args.evaluate:
        """ load model checkpoint """
        model.load()
        result_dict = evaluator.test(test_loader, args, result_dict)
    else:
        evaluator.save(result_dict)

        best_val_acc = 0.0
        """ define training loop """
        for epoch in range(args.epochs):
            result_dict['epoch'] = epoch

            torch.cuda.synchronize()
            tic1 = time.time()

            result_dict = trainer.train(train_loader, epoch, args, result_dict)

            torch.cuda.synchronize()
            tic2 = time.time()
            train_time_list.append(tic2 - tic1)

            torch.cuda.synchronize()
            tic3 = time.time()

            result_dict = evaluator.evaluate(valid_loader, epoch, args,
                                             result_dict)

            torch.cuda.synchronize()
            tic4 = time.time()
            valid_time_list.append(tic4 - tic3)

            if result_dict['val_acc'][-1] > best_val_acc:
                print("{} epoch, best epoch was updated! {}%".format(
                    epoch, result_dict['val_acc'][-1]))
                best_val_acc = result_dict['val_acc'][-1]
                model.save(checkpoint_name='best_model')

            evaluator.save(result_dict)
            plot_learning_curves(result_dict, epoch, args)

        result_dict = evaluator.test(test_loader, args, result_dict)
        evaluator.save(result_dict)
        """ calculate test accuracy using best model """
        model.load(checkpoint_name='best_model')
        result_dict = evaluator.test(test_loader, args, result_dict)
        evaluator.save(result_dict)

    print(result_dict)

    np.savetxt(os.path.join(model.checkpoint_dir, model.checkpoint_name,
                            'train_time_amp.csv'),
               train_time_list,
               delimiter=',',
               fmt='%s')
    np.savetxt(os.path.join(model.checkpoint_dir, model.checkpoint_name,
                            'valid_time_amp.csv'),
               valid_time_list,
               delimiter=',',
               fmt='%s')
def main():
    args = get_args()
    torch.manual_seed(args.seed)

    shape = (224, 224, 3)
    """ define dataloader """
    train_loader, valid_loader, test_loader = make_dataloader(args)
    """ define model architecture """
    model = get_model(args, shape, args.num_classes)

    if torch.cuda.device_count() >= 1:
        print('Model pushed to {} GPU(s), type {}.'.format(
            torch.cuda.device_count(), torch.cuda.get_device_name(0)))
        model = model.cuda()
    else:
        raise ValueError('CPU training is not supported')
    """ define loss criterion """
    criterion = nn.CrossEntropyLoss().cuda()
    """ define optimizer """
    optimizer = make_optimizer(args, model)
    """ define learning rate scheduler """
    scheduler = make_scheduler(args, optimizer)
    """ define trainer, evaluator, result_dictionary """
    result_dict = {
        'args': vars(args),
        'epoch': [],
        'train_loss': [],
        'train_acc': [],
        'val_loss': [],
        'val_acc': [],
        'test_acc': []
    }
    trainer = Trainer(model, criterion, optimizer, scheduler)
    evaluator = Evaluator(model, criterion)

    if args.evaluate:
        """ load model checkpoint """
        model.load("best_model")
        result_dict = evaluator.test(test_loader, args, result_dict, True)

        model.load("last_model")
        result_dict = evaluator.test(test_loader, args, result_dict, False)

    else:
        evaluator.save(result_dict)

        best_val_acc = 0.0
        """ define training loop """
        tolerance = 0
        for epoch in range(args.epochs):
            result_dict['epoch'] = epoch
            result_dict = trainer.train(train_loader, epoch, args, result_dict)
            result_dict = evaluator.evaluate(valid_loader, epoch, args,
                                             result_dict)

            tolerance += 1
            print("tolerance: ", tolerance)

            if result_dict['val_acc'][-1] > best_val_acc:
                tolerance = 0
                print("{} epoch, best epoch was updated! {}%".format(
                    epoch, result_dict['val_acc'][-1]))
                best_val_acc = result_dict['val_acc'][-1]
                model.save(checkpoint_name='best_model')

            evaluator.save(result_dict)
            plot_learning_curves(result_dict, epoch, args)

            if tolerance > 20:
                break

        result_dict = evaluator.test(test_loader, args, result_dict, False)
        evaluator.save(result_dict)
        """ save model checkpoint """
        model.save(checkpoint_name='last_model')
        """ calculate test accuracy using best model """
        model.load(checkpoint_name='best_model')
        result_dict = evaluator.test(test_loader, args, result_dict, True)
        evaluator.save(result_dict)

    print(result_dict)
Esempio n. 14
0
        self.eval = True
        self.imsize = 64
        self.seed = 29
        self.batch_size = 128
        self.shuffle = False
        self.drop_last = False
        self.val_data_path = ''
        self.kwargs = {}


# special_data_path = "/home/voletiv/Datasets/CatsAndDogs/special"
special_data_path = "/home/voletivi/scratch/catsndogs/data/special"

# Data
args = MyArgs(special_data_path)
dl = utils.make_dataloader(args)
data, classes = next(iter(dl))

# Model
# model_pth = '/home/voletiv/EXPERIMENTS/CnD_experiments/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model.pth'
# model_state_dict = '/home/voletiv/EXPERIMENTS/CnD_experiments/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model_epoch_0082_batch_00076_of_00282.pth'
model_pth = '/home/voletivi/scratch/catsndogs/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model.pth'
model_state_dict = '/home/voletivi/scratch/catsndogs/experiments/20190211_172747_cnd_kaggle_ResNet18_skip_expDecay/model_epoch_0082_batch_00076_of_00282.pth'
model = torch.load(model_pth)
model.load_state_dict(torch.load(model_state_dict))
# model.to('cpu')

occ_size = 7

# Data occ sample
data_occ = data.clone()
Esempio n. 15
0
def train_one(model,
              data,
              param,
              weight,
              first_layer_no_learn=False,
              return_model=False):

    tic = time.time()
    batch_size = param['batch_size']
    lr = param['lr']
    l1 = param['l1']
    l2 = param['l2']
    max_epoch = param['max_epoch']
    seed = param['seed']

    if seed != -1:
        torch.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    input_channel, input_size = data[0].shape[1], data[0].shape[2]
    output_size = data[1].shape[0]
    train_loader = make_dataloader(data[0], data[1], batch_size=batch_size)
    valid_loader = make_dataloader(data[2], data[3], batch_size=batch_size)
    test_loader = make_dataloader(data[4], data[5], batch_size=batch_size)
    best_valCC = 0
    best_model = None

    if first_layer_no_learn:
        model = init_CNN(model, weight)
        optimizer = Adam([{
            'params': model.conv.parameters()
        }, {
            'params': model.fc.parameters()
        }],
                         lr=lr,
                         l1=l1,
                         weight_decay=l2,
                         amsgrad=True)
    else:
        optimizer = Adam(model.parameters(),
                         lr=lr,
                         l1=l1,
                         weight_decay=l2,
                         amsgrad=True)
    for epoch in range(max_epoch):
        print(f"===> Training Epoch {epoch + 1}:")
        train(model, train_loader, optimizer)
        valid_CC = test(model, valid_loader, 'Validation')[-1]
        print(valid_CC)
        if valid_CC > best_valCC:
            best_valCC = valid_CC
            del best_model
            best_model = copy.deepcopy(model)

    print("===========>")
    test_corr = test(best_model, test_loader, 'Test')[-1][0]
    print(test_corr)
    torch.cuda.empty_cache()
    print("Finished.")
    toc = time.time()
    print("Elapsed time is {:.6f} seconds.".format(toc - tic))
    if return_model:
        return best_model, test_corr, toc - tic
    else:
        return test_corr, toc - tic