Ejemplo n.º 1
0
def load_omniglot_image(root='dataset',n_validation=1345, state=123):
    helpers.create(root, 'omniglot')
    droot = root+'/'+'omniglot'
    
    if not os.path.exists('{}/omniglot.amat'.format(droot)):
        from downloader import download_omniglot
        download_omniglot(droot)
    
    
    def reshape_data(data):
        return data.reshape((-1, 28, 28)).reshape((-1, 28*28), order='fortran')
    path = '{}/omniglot.amat'.format(droot)
    omni_raw = scipy.io.loadmat(path)

    train_data = reshape_data(omni_raw['data'].T.astype(floatX))
    test_data = reshape_data(omni_raw['testdata'].T.astype(floatX))

    n = train_data.shape[0]
    
    ind_va = np.random.RandomState(
        state).choice(n, n_validation, replace=False)
    
    ind_tr = np.delete(np.arange(n), ind_va)
    
    return train_data[ind_tr], train_data[ind_va], test_data
Ejemplo n.º 2
0
def load_mnist_image(root='dataset',n_validation=1345, state=123):
    helpers.create(root, 'bmnist')
    droot = root+'/'+'bmnist'
    
    if not os.path.exists('{}/train-images-idx3-ubyte'.format(droot)):
        from downloader import download_bmnist
        download_bmnist(droot)
    
    path_tr = '{}/train-images-idx3-ubyte'.format(droot)
    path_te = '{}/t10k-images-idx3-ubyte'.format(droot)
    train_x = np.loadtxt(path_tr).astype(floatX)
    test_x = np.loadtxt(path_te).astype(floatX)
    
    return train_x[:50000], train_x[50000:], test_x
Ejemplo n.º 3
0
def load_caltech101_image(root='dataset'):
    # binary
    # tr: 4100 x 28 x 28
    # va: 2264 x 28 x 28
    # te: 2307 x 28 x 28
    helpers.create(root, 'caltech101')
    droot = root+'/'+'caltech101'
    fn = 'caltech101_silhouettes_28_split1.mat'
    
    if not os.path.exists('{}/{}'.format(droot, fn)):
        from downloader import download_caltech101
        download_caltech101(droot)
    
    ds = scipy.io.loadmat('{}/{}'.format(droot, fn))
    ds = [ds['train_data'], ds['val_data'], ds['test_data']]
    
    return [d.astype(floatX) for d in ds]
Ejemplo n.º 4
0
def load_bmnist_image(root='dataset'):
    helpers.create(root, 'bmnist')
    droot = root + '/' + 'bmnist'

    if not os.path.exists('{}/binarized_mnist_train.amat'.format(droot)):
        from downloader import download_bmnist
        download_bmnist(droot)

    # Larochelle 2011
    path_tr = '{}/binarized_mnist_train.amat'.format(droot)
    path_va = '{}/binarized_mnist_valid.amat'.format(droot)
    path_te = '{}/binarized_mnist_test.amat'.format(droot)
    train_x = np.loadtxt(path_tr).astype(floatX).reshape(50000, 784)
    valid_x = np.loadtxt(path_va).astype(floatX).reshape(10000, 784)
    test_x = np.loadtxt(path_te).astype(floatX).reshape(10000, 784)

    return train_x, valid_x, test_x
Ejemplo n.º 5
0
def load_cifar10_image(root='dataset',labels=False):
    helpers.create(root, 'cifar10')
    droot = root+'/'+'cifar10'
    
    if not os.path.exists('{}/cifar10.pkl'.format(droot)):
        from downloader import download_cifar10
        download_cifar10(droot)
    
    f = lambda d:d.astype(floatX)
    filename = '{}/cifar10.pkl'.format(droot)
    tr_x, tr_y, te_x, te_y = pickle.load(open(filename,'r'))
    if tr_x.max() == 255:
        tr_x = tr_x / 256.
        te_x = te_x / 256.
        
    if labels:
        enc = OneHotEncoder(10)
        tr_y = enc.fit_transform(tr_y).toarray().reshape(50000,10).astype(int)
        te_y = enc.fit_transform(te_y).toarray().reshape(10000,10).astype(int)
        
        return (f(d) for d in [tr_x, tr_y, te_x, te_y])   
    else:
        return (f(d) for d in [tr_x, te_x])
Ejemplo n.º 6
0
    import autoencoders as aes
    from torch import optim, nn
    from itertools import chain
    import utils
    import numpy as np

    nmc = 3
    lr1 = 0.0015
    lr2 = 0.0003
    batch_size = 20
    zdim = 200
    epoch = 10
    print_every = 50

    droot, sroot, spath = helpers.getpaths()
    helpers.create(droot, 'mnist')

    ds_transforms = transforms.Compose(
        [transforms.ToTensor(), transforms_.binarize()])

    train_loader = torch.utils.data.DataLoader(datasets.MNIST(
        droot + '/mnist', download=True, train=True, transform=ds_transforms),
                                               batch_size=batch_size,
                                               shuffle=True)

    enc = aes.BinaryLinear(784, zdim)
    dec = aes.BinaryLinear(zdim, 784)
    prior = aes.BinaryPrior(zdim)
    iib = nn.parameter.Parameter(torch.zeros(1) - 200)

    optim1 = optim.Adam(chain(dec.parameters(), prior.parameters(), [iib]),
Ejemplo n.º 7
0
def main(args, save_args=True, log_=True):
    # create logger
    helpers.create(*args.saveroot.split("/"))
    logger = helpers.Logging(args.saveroot, "log.txt", log_)
    logger.info(args)

    # save args
    if save_args:
        with open(os.path.join(args.saveroot, "args.txt"), "w") as file:
            file.write(json.dumps(args.__dict__, indent=4))

    # dataset
    logger.info(f"getting data: {args.data}")
    ites, ate, w, t, y = get_data(args)

    # comet logging
    if args.comet:
        exp = Experiment(project_name="causal-benchmark",
                         auto_metric_logging=False)
        exp.add_tag(args.data)
        logger.info(f"comet url: {exp.url}")
    else:
        exp = None

    logger.info(f"ate: {ate}")

    # distribution of outcome (y)
    distribution = get_distribution(args)
    logger.info(distribution)

    # training params
    training_params = TrainingParams(lr=args.lr,
                                     batch_size=args.batch_size,
                                     num_epochs=args.num_epochs)
    logger.info(training_params.__dict__)

    # initializing model
    w_transform = preprocess.Preprocess.preps[args.w_transform]
    y_transform = preprocess.Preprocess.preps[args.y_transform]
    outcome_min = 0 if args.y_transform == "Normalize" else None
    outcome_max = 1 if args.y_transform == "Normalize" else None

    # model type
    additional_args = dict()
    if args.model_type == 'tarnet':
        Model = TarNet

        logger.info('model type: tarnet')
        mlp_params = MLPParams(
            n_hidden_layers=args.n_hidden_layers,
            dim_h=args.dim_h,
            activation=getattr(torch.nn, args.activation)(),
        )
        logger.info(mlp_params.__dict__)
        network_params = dict(
            mlp_params_w=mlp_params,
            mlp_params_t_w=mlp_params,
            mlp_params_y0_w=mlp_params,
            mlp_params_y1_w=mlp_params,
        )
    elif args.model_type == 'linear':
        Model = LinearModel

        logger.info('model type: linear model')
        network_params = dict()
    elif 'gp' in args.model_type:
        if args.model_type == 'gp':
            Model = GPModel
        elif args.model_type == 'targp':
            Model = TarGPModel
        else:
            raise Exception(f'model type {args.model_type} not implemented')
        logger.info('model type: linear model')

        kernel_t = gpytorch.kernels.__dict__[args.kernel_t]()
        kernel_y = gpytorch.kernels.__dict__[args.kernel_y]()
        var_dist = gpytorch.variational.__dict__[args.var_dist]
        network_params = dict(
            gp_t_w=GPParams(kernel=kernel_t, var_dist=var_dist),
            gp_y_tw=GPParams(kernel=kernel_y, var_dist=None),
        )
        logger.info(f'gp_t_w: {repr(network_params["gp_t_w"])}'
                    f'gp_y_tw: {repr(network_params["gp_y_tw"])}')
        additional_args['num_tasks'] = args.num_tasks
    else:
        raise Exception(f'model type {args.model_type} not implemented')

    if args.n_hidden_layers < 0:
        raise Exception(
            f'`n_hidden_layers` must be nonnegative, got {args.n_hidden_layers}'
        )

    model = Model(
        w,
        t,
        y,
        training_params=training_params,
        network_params=network_params,
        binary_treatment=True,
        outcome_distribution=distribution,
        outcome_min=outcome_min,
        outcome_max=outcome_max,
        train_prop=args.train_prop,
        val_prop=args.val_prop,
        test_prop=args.test_prop,
        seed=args.seed,
        early_stop=args.early_stop,
        patience=args.patience,
        ignore_w=args.ignore_w,
        grad_norm=args.grad_norm,
        w_transform=w_transform,
        y_transform=y_transform,  # TODO set more args
        savepath=os.path.join(args.saveroot, 'model.pt'),
        test_size=args.test_size,
        additional_args=additional_args)

    # TODO GPU support
    if args.train:
        model.train(print_=logger.info, comet_exp=exp)

    # evaluation
    if args.eval:
        summary, all_runs = evaluate(args, model)
        logger.info(summary)
        with open(os.path.join(args.saveroot, "summary.txt"), "w") as file:
            file.write(json.dumps(summary, indent=4))
        with open(os.path.join(args.saveroot, "all_runs.txt"), "w") as file:
            file.write(json.dumps(all_runs))

        model.plot_ty_dists()

    return model