def dataset_split(X,
                  y,
                  perc=VALIDATION_PERCENTAGE,
                  random_state=RANDOM_SEED,
                  return_data='samples'):
    """
    Given two arrays of samples and label X and y, perform a random splitting in train and validation sets.
    :param X: numpy array of samples
    :param y: numpy array of labels
    :param perc: percentage of validation set
    :param random_state: random state of the splitter
    :param return_data: if True, return DataLoader objects instead of numpy arrays
    :return: (train_loader, val_loader) or (X_train, y_train), (X_val, y_val) or train_idx, val_idx
    """
    assert 0 <= perc <= 1

    sss = StratifiedShuffleSplit(n_splits=1,
                                 test_size=perc,
                                 random_state=random_state)
    train_idxs, valid_idxs = next(sss.split(X, y))

    X_train, X_valid = X[train_idxs], X[valid_idxs]
    y_train, y_valid = y[train_idxs], y[valid_idxs]

    if return_data == 'data_loader':
        return get_data_loader(X_train,
                               y_train), get_data_loader(X_valid, y_valid)
    elif return_data == 'samples':
        return (X_train, y_train), (X_valid, y_valid)
    elif return_data == 'indices':
        return train_idxs, valid_idxs
def load_dataloaders_from_dataset(dataset):
    (X_train, y_train), (X_test, y_test) = load_dataset(dataset)

    # Scale pixels values
    X_train, X_mean, X_std = image_preprocessing(X_train, scale_only=False)
    X_test, _, _ = image_preprocessing(X_test,
                                       seq_mean=X_mean,
                                       seq_std=X_std,
                                       scale_only=False)

    # Flatten for the dataloader
    y_train = y_train.flatten()
    y_test = y_test.flatten()

    # Stratified split of training and validation
    sss = StratifiedShuffleSplit(n_splits=1,
                                 test_size=VALIDATION_PERCENTAGE,
                                 random_state=RANDOM_SEED)
    train_idx, val_idx = next(sss.split(X_train, y_train))
    (X_train, X_valid) = X_train[train_idx], X_train[val_idx]
    (y_train, y_valid) = y_train[train_idx], y_train[val_idx]

    # Generating data loaders
    train_dl = get_data_loader(X_train, y_train)
    val_dl = get_data_loader(X_valid, y_valid, shuffle=False)
    test_dl = get_data_loader(X_test, y_test, shuffle=False)

    return train_dl, val_dl, test_dl
Exemple #3
0
def reparametrize_and_compute(dataset_name, break_after=-1):
    """
    Starting from a randomly initialized MLP,
    compute 1) approx radius of flatness before & after reparam
            2) local entropy before & after reparam
    """
    model = MnistMLP().to(DEVICE)
    network_params = model.state_dict()
    train_loader = get_data_loader(dataset_name, "train", 100)

    rad0 = np.mean(_compute_c_epsilon_flatness(model, train_loader, network_params,
                                               break_after=break_after))
    entr0 = _compute_local_entropy(model, train_loader, network_params,
                                   break_after=break_after)

    model = MnistMLP().to(DEVICE)
    network_params = model.state_dict()
    train_loader = get_data_loader(dataset_name, "train", 100)
    network_params["fc1.weight"] /= 5.
    network_params["fc2.weight"] *= 5.
    rad1 = np.mean(_compute_c_epsilon_flatness(model, train_loader, network_params,
                                               break_after=break_after))
    entr1 = _compute_local_entropy(model, train_loader, network_params,
                                   break_after=break_after)

    print("Radius of flatness before reparam: {:.3f}, after reparam: {:.3f}".format(rad0, rad1))
    print("Local entropy before reparam: {:.3f}, after reparam: {:.3f}".format(entr0, entr1))
Exemple #4
0
def train():
    mode = None
    if mode == "gan":
        D = get_sym_ful_conv_ae2((4,8,16,24,32,48,64),(4,)*7,None,(1,2,1,2,1,2,1),(32,1),enc_fn=nn.Sigmoid)[0]
        G = get_sym_ful_conv_ae2((4,8,16,24,32),(4,4,4,4,4),None,(1,2,1,2,1),(256,))[1]
        d,g = D(),G()
        train_gan(d,g,"CelebAGAN2",get_data_loader(CelebA,split="train"),get_data_loader(CelebA,split="test"),10,9,Optim5,0,1)
    elif mode == "ae":
        for tae in train_CelebA_aes[-1:]:
            tae.train()
            del tae._encoder, tae._decoder
            torch.cuda.empty_cache()
    elif mode == "vae":
        for E,D,name,epochs,data,Optim,loss_type in train_CelebA_vaes[-1:]:
            train_vae(E(),D(),get_data_loader(data), device, name, epochs, 9, Optim, loss_type,get_data_loader(data,split="test"))
Exemple #5
0
def visualize_interpolated_trajectory(exp_names, model_name, dataset_name, break_after=-1):
    """ repeat for three runs """
    for exp_name in exp_names:
        trajectory = load_history(exp_name)['trajectory']

        model = get_model(model_name).to(DEVICE)
        train_loader = get_data_loader(dataset_name, "train", 100)

        res = []
        epochs = []
        for i in range(len(trajectory)-1):
            ps = [trajectory[i], trajectory[i+1]]
            for alph in [0., 0.2, 0.4, 0.6, 0.8]:
                weights = [alph, 1-alph]
                network_params = average_with_weights(ps, weights)
                model.load_params(network_params)
                loss = compute_approx_train_loss(model, train_loader,
                                                 break_after)
                epochs.append(i+1+alph)
                res.append(loss)

        plt.plot(epochs, res)

    plt.xlabel("Interpolated epoch")
    plt.ylabel("Approx loss")
    plt.savefig("loss_interpolation_" + exp_name)
    plt.clf()
Exemple #6
0
def main(**attrs):
    window_min = attrs["window_min"]
    window_size = int(window_min * 60 * 1000 / 200)
    step_min = attrs["step_min"]
    step_size = int(step_min * 60 * 1000 / 200)
    attrs["window_size"] = window_size
    attrs["step_size"] = step_size
    attrs["start"] = 0
    data_paths = attrs["data"].split(',')
    data_paths = list(map(lambda x: os.path.join(data_dir, x), data_paths))
    attrs["data_paths"] = data_paths
    attrs.pop("window_min", None)
    attrs.pop("step_min", None)
    attrs.pop("data", None)
    print(attrs)
    train_loader, val_loader, _ = get_data_loader(attrs)
    #return train_loader,val_loader,test_loader
    img_size = window_size
    train(
        attrs["cfg"],
        #opt.data_cfg,
        train_loader,
        val_loader,
        img_size=img_size,
        resume=attrs["resume"],
        epochs=attrs["epochs"],
        batch_size=attrs["batch_size"],
        accumulated_batches=attrs["accumulated_batches"],
        weights=attrs["weights"],
        #multi_scale=opt.multi_scale,
        #freeze_backbone=opt.freeze,
        var=attrs["var"],
    )
Exemple #7
0
def reparam_and_local_entropy(exp_name, model_name, dataset_name,
                              gamma=100, n_trials=10, break_after=-1):
    model = get_model(model_name).to(DEVICE)
    network_params = model.state_dict()
    network_params["features.0.weight"] /= 5.
    network_params["features.3.weight"] *= 5.
    train_loader = get_data_loader(dataset_name, "train", 100)
    entr = _compute_local_entropy(model, train_loader, network_params,
                                  break_after=break_after)
    return entr
Exemple #8
0
def reparam_and_c_eps_flat(exp_name, model_name, dataset_name,
                           gamma=100, n_trials=10, break_after=-1):
    model = get_model(model_name).to(DEVICE)
    network_params = model.state_dict()
    network_params["features.0.weight"] /= 5.
    network_params["features.3.weight"] *= 5.
    train_loader = get_data_loader(dataset_name, "train", 100)
    rad = np.mean(_compute_c_epsilon_flatness(model, train_loader, network_params,
                                               break_after=break_after))
    return rad
Exemple #9
0
def tests():
    a = (MNISTEncoder4, MNISTDecoder4, "AE4")
    b = (MNISTEncoder4, MNISTDecoder4, "FashionAE4")
    dl_a = get_data_loader("MNIST")
    dl_b = get_data_loader("FashionMNIST")
    encoder_a, decoder_a = load_model(*a)
    encoder_b, decoder_b = load_model(*b)
    mean_a, cov_a = latent_space_pca(encoder_a, dl_a)
    mean_b, cov_b = latent_space_pca(encoder_b, dl_a)
    ls = encoder_a.latent_size
    plot_images2(decoder_a(sample_in_pc(9, mean_a, cov_a)).detach())
    plot_images2(
        decoder_a(normal_to_pc(get_sample_k_of_d(9, 4, ls) * 3, mean_a,
                               cov_a)).detach())
    plot_images2(
        decoder_a(normal_to_pc(torch.eye(ls) * 3, mean_a, cov_a)).detach())
    batch_a = next(iter(dl_a))[0]
    batch_b = next(iter(dl_b))[0]
    plot_images2(decoder_a(encoder_a(batch_a)).detach())
    plot_images2(decoder_a(encoder_a(batch_b)).detach())
    plot_images2(decoder_a(encoder_b(batch_a)).detach())
    plot_images2(decoder_a(encoder_b(batch_b)).detach())
    plot_images2(decoder_b(encoder_a(batch_a)).detach())
    plot_images2(decoder_b(encoder_a(batch_b)).detach())
    plot_images2(decoder_b(encoder_b(batch_a)).detach())
    plot_images2(decoder_b(encoder_b(batch_b)).detach())
    x_a = labeled_latent_space_pca(encoder_a, dl_a)
    plot_images2(decoder_a(sample_in_pc(9, *x_a[7])).detach())
    plot_images2(
        decoder_a(
            torch.stack([mean for label, (mean, cov) in x_a.items()
                         ]).view(-1, ls)).detach())
    x_b = labeled_latent_space_pca(encoder_b, dl_b)
    plot_images2(decoder_b(sample_in_pc(9, *x_b[9])).detach())
    plot_images2(
        decoder_b(
            torch.stack([mean for label, (mean, cov) in x_b.items()
                         ]).view(-1, ls)).detach())
Exemple #10
0
def train_model(model_name, dataset, batch_size, lr, n_epochs, check_name, model_dir, **kwargs):
    if check_name == "default":
        check_name = f"{model_name}_{dataset}"

    model = get_model(model_name)
    train_dataloader = get_data_loader(dataset, True, 
                                       batch_size=batch_size)
    val_dataloader = get_data_loader(dataset, False, 
                                     batch_size=batch_size)

    # stuff that could be adjusted
    opt = Adam(model.parameters(), lr=lr)
    scheduler = ReduceLROnPlateau(opt, patience=3, threshold=0.1, min_lr=1e-5)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(n_epochs):
        loss = train_epoch(model, train_dataloader, criterion, opt, scheduler)
        print("Finished epoch {}, avg loss {:.3f}".format(epoch+1, loss))
        val_loss, acc = validate(model, val_dataloader, criterion, scheduler)
        print("Validation loss: {}, accuracy: {}".format(val_loss, acc))
        model.save(check_name, model_dir)

    print("Model {} has been saved to {}".format(model_name, model_dir))
Exemple #11
0
def evaluate_sparsifier(model_name,
                        dataset,
                        check_name,
                        model_dir,
                        sparse,
                        method,
                        variance_based=False,
                        **kwargs):
    """ Run a single sparsification eval and return the result.
        TODO: add a custom sparsification caller.
    """
    if check_name == "default":
        check_name = f"{model_name}_{dataset}"

    model = get_model(model_name)
    model.load(check_name, model_dir)
    train_data = get_dataset(dataset, is_train=True)
    val_data = get_dataset(dataset, is_train=False)
    val_loader = get_data_loader(dataset, is_train=False)

    if method == "corenet":  # two cases to account for different nnz parameters computation
        sparse_model = sparsify_corenet(model, train_data, s_sparse=sparse)
        pre_nnz = model.count_nnz()
        post_nnz = sparse_model.count_nnz()
    elif method == "svd":
        #print(variance_based)
        sparse_model = sparsify_svd(model, sparse, variance_based)
        pre_nnz = compute_nnz_svd(model)
        post_nnz = compute_nnz_svd(sparse_model)
    else:
        raise ValueError(f"Method {method} not available")

    max_dev = evaluate_coverage(model, sparse_model, val_data, 0.5)

    pre_acc = evaluate_val_acc(model, val_loader)
    post_acc = evaluate_val_acc(sparse_model, val_loader)

    res = {
        'sparsification': {
            'pre_nnz': pre_nnz,
            'post_nnz': post_nnz
        },
        'accuracy': {
            'pre_acc': pre_acc,
            'post_acc': post_acc
        },
        'coverage': max_dev
    }

    return res
Exemple #12
0
def run(beta=10, seed=1234):
    save_dir = os.path.join(SAVE_DIR, DATASET_NAME)
    if os.path.exists(save_dir):
        shutil.rmtree(save_dir)
    os.makedirs(save_dir)

    torch.manual_seed(seed)
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    train_loader, ds = get_data_loader(DATASET_NAME, BATCH_SIZE, N_STEPS)
    model = BetaTCVAE(beta, IMG_CHANNELS, N_LATENTS, ds.size).to(device)
    optimizer = optim.Adam(model.parameters(),
                           lr=LEARNING_RATE,
                           betas=(ADAM_BETA1, ADAM_BETA2))
    ckpt_paths = train(train_loader, model, optimizer, device, save_dir)

    visual_dir = os.path.join(OUTPUT_DIR, DATASET_NAME, 'visual')
    if os.path.exists(visual_dir):
        shutil.rmtree(visual_dir)
    os.makedirs(visual_dir)

    for path in ckpt_paths:
        eval_loader, _ = get_data_loader(DATASET_NAME, 1, 100)
        eval_visual(eval_loader, model, device, path, visual_dir)
Exemple #13
0
def compute_spectral_sharpness(exp_name, model_name, dataset_name):
    """ Compute average top eigenvalue over a few batches """
    model = get_model(model_name).to(DEVICE)
    network_params = load_history(exp_name)['trajectory'][-1]
    model.load_params(network_params)
    train_loader = get_data_loader(dataset_name, "train", 100)

    first = True
    for data in train_loader:
        if first:
            inputs, targets = data
            first = False
    # criterion = compute_loss(model, inputs, targets)
    eigenvalue, eigenvector = get_eigen(model, inputs, targets,
                                        nn.CrossEntropyLoss(), maxIter=10, tol=1e-2)
    return eigenvalue
Exemple #14
0
def main(args):
    device = args.device if torch.cuda.is_available() else 'cpu'
    checkpoint = os.path.join(args.root, 'checkpoint.pt')
    with open(os.path.join(args.root, 'config.json')) as inp:
        config = json.load(inp)
    args.root = os.path.join(args.root, 'det_checkpoints')
    os.makedirs(args.root, exist_ok=True)
    model = get_model_from_config(config)
    model.load_state_dict(torch.load(checkpoint, map_location=device))
    model.to(device)
    det_config = config.copy()
    det_config['model_name'] = f"Det{model.__class__.__name__[3:]}"
    det_model = get_model_from_config(det_config)
    test_loader = get_data_loader(config['dataset'],
                                  args.batch_size,
                                  test_only=True)
    ece = ECELoss(args.ece_bins)
    results = []
    predictions = []
    for index in ['ones', 'mean'] + list(range(config['n_components'])):
        det_model = StoLayer.convert_deterministic(model, index, det_model)
        torch.save(det_model.state_dict(),
                   os.path.join(args.root, f'checkpoint_{index}.pt'))
        y_prob, y_true, acc, tnll, nll_miss = test_model_deterministic(
            det_model, test_loader, device)
        pred_entropy = entropy(y_prob, axis=1)
        ece_val = ece(torch.from_numpy(y_prob),
                      torch.from_numpy(y_true)).item()
        predictions.append(y_prob)
        result = {
            'checkpoint': index,
            'nll': float(tnll),
            'nll_miss': float(nll_miss),
            'ece': float(ece_val),
            'predictive_entropy': {
                'mean': float(pred_entropy.mean()),
                'std': float(pred_entropy.std())
            },
            **{f"top-{k}": float(a)
               for k, a in enumerate(acc, 1)}
        }
        results.append(result)
    results = pd.DataFrame(results)
    results.to_csv(os.path.join(args.root, 'results.csv'), index=False)
    np.save(os.path.join(args.root, 'preds.npy'), np.array(predictions))
Exemple #15
0
def compute_c_epsilon_flatness(exp_name, model_name, dataset_name,
                               eps=0.05, n_trials=100, break_after=-1):
    """
    Input: experiment name and parameter epsilon
    Returns: float flatness
    """
    model = get_model(model_name).to(DEVICE)
    # bs here is chosen by processing speed:
    train_loader = get_data_loader(dataset_name, "train", 100)
    network_params = load_history(exp_name)['trajectory'][-1]

    # call helper
    steps_to_border = _compute_c_epsilon_flatness(model, train_loader, network_params,
                                                  eps, n_trials, break_after)

    # Display results:
    print("Approximate radius of flatness: {:.3f} +/- {:.3f}".format(np.mean(steps_to_border),
                                                                     np.std(steps_to_border)))
    return np.mean(steps_to_border)
Exemple #16
0
def get_dataloader(batch_size, validation, validation_fraction, seed, dataset):
    return get_data_loader(dataset, batch_size, validation,
                           validation_fraction, seed)
                                                        return_data='samples')

    # Image pre-processing: scale pixel values
    X_train_noisy_sc, X_mean, X_std = image_preprocessing(X_train_noisy,
                                                          scale_only=False)
    X_valid_noisy_sc, _, _ = image_preprocessing(X_valid_noisy,
                                                 seq_mean=X_mean,
                                                 seq_std=X_std,
                                                 scale_only=False)
    X_test_noisy_sc, _, _ = image_preprocessing(X_test_noisy,
                                                seq_mean=X_mean,
                                                seq_std=X_std,
                                                scale_only=False)

    # Dataloaders
    train_noisy_dl = get_data_loader(X_train_noisy_sc, y_train, shuffle=True)
    valid_noisy_dl = get_data_loader(X_valid_noisy_sc, y_valid, shuffle=True)
    test_noisy_dl = get_data_loader(X_test_noisy_sc, y_test, shuffle=True)

    # Writer
    writer = SummaryWriter('runs/' +
                           '{}_{}_fine_tuning'.format(baseline, dataset))

    # Fine-tuning
    print('Fine-tuning...')
    model_finetune = copy.deepcopy(model_clean)
    n_classes = len(np.unique(y_train))
    criterion = torch.nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(model_finetune.parameters())
    train_losses, train_accuracies, val_accuracies, val_losses, _, _ = train(
        model_finetune,
Exemple #18
0
def compute_local_entropy(exp_name, model_name, dataset_name,
                          gamma=100., n_trials=100, break_after=-1):
    model = get_model(model_name).to(DEVICE)
    network_params = load_history(exp_name)['trajectory'][-1]
    train_loader = get_data_loader(dataset_name, "train", 100)
    return _compute_local_entropy(model, train_loader, network_params, gamma, n_trials)
Exemple #19
0
def main(args):

    # setting device
    if args.cuda and torch.cuda.is_available():
        """
        if argument is given and cuda is available
        """
        device = torch.device('cuda')
    else:
        device = torch.device('cpu')

    dataset_name = args.dataset.lower()
    if 'mnist' not in dataset_name and 'cifar' not in dataset_name:
        raise Exception('{} dataset not yet supported'.format(dataset_name))
    else:
        dset = get_cifar10_dataset if 'cifar' in dataset_name else get_mnist_dataset
    # getting the right dataset
    train, test = dset()
    dataloader = get_data_loader(train, test, batch_size=args.batch_size)
    trainloader, testloader = dataloader['train'], dataloader['test']
    batch, labels = next(iter(trainloader))
    # converting data to tensors
    batch_var = Variable(batch).to(device)
    labels_var = Variable(one_hotify(labels).to(device))

    # getting the right model
    model_name = args.model.lower()
    model_dict = {
        'resnet18': resnet.resnet18(num_classes=10, version=2),
        'resnet34': resnet.resnet34(num_classes=10, version=2),
        'resnet50': resnet.resnet50(num_classes=10, version=2),
        'resnet101': resnet.resnet101(num_classes=10, version=2),
        'resenet152': resnet.resnet152(num_classes=10, version=2),
        'capsnet': None
    }
    if model_name not in model_dict:
        raise Exception(
            '{} not implemented, try main.py --help for additional information'
            .foramt(model_name))
    else:
        model = model_dict[model_name]
    # temporary
    if not model:
        raise Exception('CapsNet in progress')
    ckpt_name = os.path.join(
        args.ckpt_dir, '{}_{}'.format(model_name, dataset_name) + '.pth.tar')
    if os.path.isfile(ckpt_name):
        base_trainer.load(filename=ckpt_name)
    model.to(device)  #model graph is placed
    fname = os.path.join('checkpoints',
                         '{}_{}'.format('resnet18_v2', 'cifar10') + '.pth.tar')
    base_loss = nn.CrossEntropyLoss()
    base_optimizer = optim.SGD(model.parameters(), lr=args.lr)
    base_trainer = Trainer(model,
                           base_optimizer,
                           base_loss,
                           trainloader,
                           testloader,
                           use_cuda=args.cuda)
    base_trainer.load_checkpoint(fname)
    # base_trainer.run(epochs=1)
    #base_trainer.save_checkpoint(ckpt_name)
    net = Solver(args, model, dataloader)
    net.generate(num_sample=args.batch_size,
                 target=args.target,
                 epsilon=args.epsilon,
                 alpha=args.alpha,
                 iteration=args.iteration)
Exemple #20
0
def visualize_checkpoint_simplex(exp_names, model_name, dataset_name,
                                 cutoff=3.5, mode="grid", break_after=-1):
    """
    Given three points, plot surface over their convex combinations.
    """
    ps = []
    for exp_name in exp_names:
        last_trajectory_point = load_history(exp_name)['trajectory'][-1]
        ps.append(last_trajectory_point)

    model = get_model(model_name).to(DEVICE)
    train_loader = get_data_loader(dataset_name, "train", 100)

    # TODO: use meshgrid instead
    if mode == "triangle":
        x_simplex, y_simplex, losses = [], [], []
        for simplex_sample in generate_simplex_combs(ps, 3):
            network_params, simplex_point = simplex_sample
            model.load_params(network_params)
            loss = compute_approx_train_loss(model, train_loader)

            x_simplex.append(simplex_point[0])
            y_simplex.append(simplex_point[1])
            losses.append(loss)

        tri = mtri.Triangulation(x_simplex, y_simplex)

        fig = plt.figure()
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_trisurf(x_simplex, y_simplex, losses, triangles=tri.triangles, 
                        cmap=plt.cm.Spectral, label="loss surface interpolation")
        plt.savefig("surf_" + "_".join(s for s in exp_names))

    else:
        x = np.linspace(-0.4, 1.3, 50)
        y = np.linspace(-0.4, 1.3, 50)

        X, Y = np.meshgrid(x, y)
        Z = 1 - X - Y
        grid = simplex_grid(3, 25) / 25
        grid_val = []

        Z_ = []
        for i in tqdm(range(X.shape[0])):
            Z_ += [[]]
            for j in range(Y.shape[0]):
                weights = [X[i, j], Y[i, j], Z[i, j]]
                network_params = average_with_weights(ps, weights)
                model.load_params(network_params)
                loss = compute_approx_train_loss(model, train_loader,
                                                 break_after)
                Z_[i].append(loss)

        losses = np.array(Z_)

        # backup everything everything
        np.save("./data/X_" + "_".join(s for s in exp_names), X)
        np.save("./data/Y_" + "_".join(s for s in exp_names), Y)
        np.save("./data/Z_" + "_".join(s for s in exp_names), Z_)

        losses[losses > cutoff] = cutoff

        fig = plt.figure()
        cmap = matplotlib.cm.coolwarm
        cmap.set_bad('white', 1.)
        cmap.set_over('white', alpha=.1)
        ax = fig.add_subplot(111, projection='3d')
        ax.plot_surface(X, Y, losses, vmax=cutoff, rstride=1, cstride=1,
                        cmap=cmap, edgecolor='none', antialiased=True)
        ax.view_init(50, 200)  #225

        plt.savefig("surf_" + "_".join(s for s in exp_names))
        plt.clf()
Exemple #21
0
    parser.add_argument('--in_channels', '-i', type=int, default=3)
    parser.add_argument('--classes', '-c', type=int, default=10)
    parser.add_argument('--batch_size', '-b', type=int, default=64)
    parser.add_argument('--ece_bins', type=int, default=15)
    parser.add_argument('--dropout', action='store_true')
    args = parser.parse_args()

    device = args.device if torch.cuda.is_available() else 'cpu'
    num_sample = args.num_samples
    checkpoint = os.path.join(args.root, 'checkpoint.pt')
    with open(os.path.join(args.root, 'config.json')) as inp:
        config = json.load(inp)
    args.root = os.path.join(args.root, config['dataset'])
    os.makedirs(args.root, exist_ok=True)
    text_path = os.path.join(args.root, f'{"dropout_" if args.dropout else ""}result.json')
    test_loader = get_data_loader(config['dataset'], args.batch_size, test_only=True)
    model = get_model_from_config(config)
    model.load_state_dict(torch.load(checkpoint, map_location=device))
    model.to(device)
    if model.__class__.__name__.startswith('Det'):
        if args.dropout:
            y_prob_all, y_prob, y_true, acc, tnll, nll_miss = test_dropout(model, test_loader, device, args.num_samples)
        else:
            y_prob, y_true, acc, tnll, nll_miss = test_model_deterministic(model, test_loader, device)
    elif model.__class__.__name__.startswith('Sto'):
        y_prob_all, y_prob, y_true, acc, tnll, nll_miss = test_stochastic(model, test_loader, device, args.num_samples)
    elif model.__class__.__name__.startswith('Bayesian'):
        y_prob_all, y_prob, y_true, acc, tnll, nll_miss = test_bayesian(model, test_loader, device, args.num_samples)
    pred_entropy = entropy(y_prob, axis=1)
    np.save(os.path.join(args.root, f'{"dropout_" if args.dropout else ""}predictions.npy'), y_prob)
    ece = ECELoss(args.ece_bins)
 def train_loader(self):
     if self._train_loader is None:
         self._train_loader = get_data_loader(self._dataset_name,
                                              self.label,
                                              split="train")
     return self._train_loader
 def val_loader(self):
     if self._val_loader is None:
         self._val_loader = get_data_loader(self._dataset_name,
                                            self.label,
                                            split="valid")
     return self._val_loader
Exemple #24
0
def run_training(model_name="vgg16",
                 dataset_name="cifar10",
                 batch_size=32, lr=1e-3, n_epochs=10,
                 save_hist_period=1, verbose=False):
    """
    For now only one model (vgg-16).

    Params:
    :model_name: "vgg{11,13,16,19}" or "lenet" (or "[...]_random")
    :dataset_name: "cifar10" or "mnist"
    :batch_size: int
    :lr: float
    :n_epochs: number of training epochs
    :save_hist_period: frequency with which points are saved

    """
    # name of current checkpoint/run
    check_name = record_experiment(model_name, dataset_name, batch_size, lr)

    # setup model, optimizer and logging
    model = get_model(model_name).to(DEVICE)
    optimizer = SGD(params=model.parameters(), lr=lr)
    # scheduler = ReduceLROnPlateau(optimizer, patience=3,
    #                               threshold=0.1, min_lr=1e-5)
    scheduler = StepLR(optimizer, step_size=10, gamma=0.1)

    cross_ent = nn.CrossEntropyLoss()

    # load data
    train_loader = get_data_loader(dataset_name, "train", batch_size)
    val_loader   = get_data_loader(dataset_name, "val", batch_size)

    history = init_history()

    update_history({"train_loss": float("inf"),
                    "val_acc": 0.,
                    "weights": deepcopy(model.state_dict())},
                     history, check_name)

    for epoch in range(n_epochs):
        model.train()
        if verbose: print("Starting training epoch {}".format(epoch+1))

        running_loss = 0.
        num_batches = len(train_loader)

        for (xs, ys) in train_loader:
            xs, ys = xs.to(DEVICE), ys.to(DEVICE)
            optimizer.zero_grad()
            logits = model(xs)

            loss = cross_ent(logits, ys)
            loss.backward()
            # torch.nn.utils.clip_grad_norm_(model.parameters(), 5.)
            optimizer.step()

            running_loss += loss.item()
            if np.isnan(running_loss):
                print("Loss is nan")
                exit(0)

        avg_loss = running_loss / num_batches
        scheduler.step(avg_loss)
        model.save(check_name)

        if verbose: print("Epoch {} loss: {:.3f}".format(epoch+1, avg_loss))

        if epoch % save_hist_period == 0:
            model.eval()
            accs = []
            for (xs, ys) in val_loader:
                xs, ys = xs.to(DEVICE), ys.to(DEVICE)
                logits = model(xs)
                y_pred = logits.argmax(dim=1)
                batch_acc = (y_pred == ys).float().mean().item()
                accs.append(batch_acc)

            if verbose: print("Validation accuracy: {:.3f}".format(np.mean(accs)))
            update_history({"train_loss": avg_loss,
                            "val_acc": np.mean(accs),
                            "weights": deepcopy(model.state_dict())},
                             history, check_name)

    print("Last avg loss {}, eval acc {}".format(avg_loss, np.mean(accs)))
Exemple #25
0
def train_baseline(dataset,
                   model_name,
                   noisy=False,
                   distortion_type='AWGN',
                   distortion_amount=25,
                   flatten=False,
                   verbose=True,
                   classes=None):
    """
    Train a baseline model using a specific dataset.
    :param dataset: dataset to train the model on
    :param model_name: name of the neural network model
    :param noisy: if True, train the model on
    :param distortion_type: either 'blur' or 'AWGN'
    :param distortion_amount: severity of the distortion
    :param flatten: flatten the input image to use it in a FF network model
    :param verbose: add verbosity
    :return:
    """
    assert dataset in DSETS

    # Set seeds
    torch.manual_seed(RANDOM_SEED)
    torch.cuda.manual_seed_all(RANDOM_SEED)

    # Set model path
    model_path = os.path.join('baselines', model_name + '.pt')

    # Train baseline on noisy data
    if noisy:
        (X_train, y_train), (X_test, y_test) = (np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset, distortion_type +
                                                                     '-' + str(distortion_amount),
                                                                     'X_train_noisy.npy')),
                                                np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset,
                                                                     distortion_type + '-' + str(distortion_amount),
                                                                     'y_train.npy'))), \
                                               (np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset,
                                                                     distortion_type + '-' + str(distortion_amount),
                                                                     'X_test_noisy.npy')),
                                                np.load(os.path.join(ROOT_DIR, DATA_DIR, dataset,
                                                                     distortion_type + '-' + str(distortion_amount),
                                                                     'y_test.npy')))

    # Train baseline on clean data
    else:
        if dataset == 'CIFAR_10':
            (X_train, y_train), (X_test, y_test) = load_CIFAR10()
            if classes is not None:
                X_train, y_train = select_classes(X_train,
                                                  y_train,
                                                  classes,
                                                  convert_labels=True)
                X_test, y_test = select_classes(X_test,
                                                y_test,
                                                classes,
                                                convert_labels=True)

            if len(np.unique(y_train)) > 2:
                baseline_net = SimpleBaselineNet(output_dim=len(classes))
            else:
                if model_name == 'SimpleBaselineBinaryNetTanh':
                    y_train = convert_labels(y_train, [0, 1], [-1, 1])
                    y_test = convert_labels(y_test, [0, 1], [-1, 1])
                    baseline_net = SimpleBaselineBinaryNet(activation='tanh')
                elif model_name == 'SimpleBaselineBinaryNet':
                    baseline_net = SimpleBaselineBinaryNet(
                        activation='sigmoid')
                elif model_name == 'SimplerBaselineBinaryNetTanh':
                    y_train = convert_labels(y_train, [0, 1], [-1, 1])
                    y_test = convert_labels(y_test, [0, 1], [-1, 1])
                    baseline_net = SimpleBaselineBinaryNet(activation='tanh',
                                                           num_conv=32,
                                                           num_ff=32)
        elif dataset == 'CIFAR_100':
            (X_train, y_train), (X_test, y_test) = load_CIFAR100()
            if model_name == 'SqueezeNetBaseline':
                baseline_net = squeezenet()
            else:
                baseline_net = ACNBaselineNet()

        elif dataset == 'MNIST':
            (X_train, y_train), (X_test, y_test) = load_MNIST()
            if classes is not None:
                X_train, y_train = select_classes(X_train,
                                                  y_train,
                                                  classes,
                                                  convert_labels=True)
                X_test, y_test = select_classes(X_test,
                                                y_test,
                                                classes,
                                                convert_labels=True)

            if len(np.unique(y_train)) > 2:
                baseline_net = FFSimpleNet()
            else:
                baseline_net = FFBinaryNet()
            flatten = True

        elif dataset == 'USPS':
            (X_train, y_train), (X_test, y_test) = load_USPS(resize=(28, 28))
            baseline_net = FFSimpleNet()
            flatten = True

        else:
            raise RuntimeError(
                "Dataset not in the predefined list: {}".format(DSETS))

    # Scale pixels values
    X_train, X_mean, X_std = image_preprocessing(X_train, scale_only=False)
    X_test, _, _ = image_preprocessing(X_test,
                                       seq_mean=X_mean,
                                       seq_std=X_std,
                                       scale_only=False)

    # Flatten for the dataloader
    y_train = y_train.flatten()
    y_test = y_test.flatten()

    # Stratified split of training and validation
    sss = StratifiedShuffleSplit(n_splits=1,
                                 test_size=VALIDATION_PERCENTAGE,
                                 random_state=RANDOM_SEED)
    train_idx, val_idx = next(sss.split(X_train, y_train))
    (X_train, X_valid) = X_train[train_idx], X_train[val_idx]
    (y_train, y_valid) = y_train[train_idx], y_train[val_idx]

    # Generating data loaders
    train_loader_clean = get_data_loader(X_train, y_train)
    val_loader_clean = get_data_loader(X_valid, y_valid, shuffle=False)
    test_loader_clean = get_data_loader(X_test, y_test, shuffle=False)

    # Logger
    if noisy:
        writer = SummaryWriter('runs/' + dataset + '_baseline_noisy')
    else:
        writer = SummaryWriter('runs/' + dataset + '_baseline_clean')

    # Optimizer and criterion
    optimizer = torch.optim.Adam(baseline_net.parameters())
    # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 60, gamma=0.02, last_epoch=-1)

    if len(np.unique(y_train)) > 2:
        criterion = nn.CrossEntropyLoss()
    elif len(np.unique(y_train)) == 2 and -1 in np.unique(y_train):
        criterion, _ = init_loss(X_train, loss='exp')
    else:
        criterion = nn.BCELoss()
    baseline_net.to(device)

    # Training and evaluation
    if verbose:
        print('Starting {} baseline training on {}'.format(
            baseline_net.__class__.__name__, dataset))

    train(model=baseline_net,
          train_loader=train_loader_clean,
          val_loader=val_loader_clean,
          test_loader=test_loader_clean,
          optimizer=optimizer,
          criterion=criterion,
          device=device,
          model_path=model_path,
          writer=writer,
          save_model=True,
          scheduler=None,
          flatten=flatten,
          early_stopping=True)
    acc = evaluate(baseline_net, test_loader_clean, device, flatten)

    if verbose:
        print('Your baseline accuracy on ' + dataset +
              ' (x_test_clean) = %.3f' % acc)
import torch
from torch import optim

from datasets import get_mnist_dataset, get_cifar10_dataset, get_data_loader
from utils import *

from models import *

trainset, testset = get_mnist_dataset()
trainloader, testloader = get_data_loader(trainset, testset)
batch, labels = next(iter(trainloader))
plot_batch(batch)
batch_var = Variable(batch.cuda())
labels_var = Variable(one_hotify(labels).cuda())

base_model = BaselineCNN().cuda()
print(count_params(base_model))

base_loss = nn.CrossEntropyLoss()
base_optimizer = optim.Adam(base_model.parameters())
base_trainer = Trainer(base_model,
                       base_optimizer,
                       base_loss,
                       trainloader,
                       testloader,
                       use_cuda=True)

base_trainer.run(epochs=10)
base_trainer.save_checkpoint('weights/baseline_mnist.pth.tar')