Exemple #1
0
def get_dset_split(dataset, split, data_dir, return_means_stds=False):
    if dataset == "flights":
        X_train, X_test, _, _, y_train, y_test, y_means, y_stds = load_flight(base_dir=data_dir,
                                                                              k800=(split == "800k"))

        trainset = Datafeed(X_train, y_train, transform=None)
        testset = Datafeed(X_test, y_test, transform=None)

        N_train = X_train.shape[0]
        input_dim = X_train.shape[1]
        output_dim = y_train.shape[1]

    elif dataset in uci_names + uci_gap_names:
        gap = False
        if dataset in uci_gap_names:
            gap = True
            dataset = dataset[:-4]

        X_train, X_test, _, _, y_train, y_test, y_means, y_stds = \
            load_gap_UCI(base_dir=data_dir, dname=dataset, n_split=int(split), gap=gap)

        trainset = Datafeed(X_train, y_train, transform=None)
        testset = Datafeed(X_test, y_test, transform=None)

        N_train = X_train.shape[0]
        input_dim = X_train.shape[1]
        output_dim = y_train.shape[1]

    else:
        raise Exception("Dataset not implemented yet.")

    if return_means_stds:
        return trainset, testset, N_train, input_dim, output_dim, y_means, y_stds

    return trainset, testset, N_train, input_dim, output_dim
Exemple #2
0
        def __init__(self,
                     *args,
                     base_dir='nb_dir/data/',
                     prop_val=0.05,
                     k800=False,
                     early_stop=None,
                     **kwargs):
            super().__init__(*args,
                             network=network,
                             width=width,
                             batch_size=batch_size,
                             **kwargs)

            # setup dataset
            X_train, X_test, x_means, x_stds, y_train, y_test, y_means, y_stds = load_flight(
                base_dir, k800=k800)

            X_train = (X_train * x_stds) + x_means
            y_train = (y_train * y_stds) + y_means

            # print(X_train.shape)

            Ntrain = int(X_train.shape[0] * (1 - prop_val))
            X_val = X_train[Ntrain:]
            y_val = y_train[Ntrain:]
            X_train = X_train[:Ntrain]
            y_train = y_train[:Ntrain]

            # print(X_train.shape)

            x_means, x_stds = X_train.mean(axis=0), X_train.std(axis=0)
            y_means, y_stds = y_train.mean(axis=0), y_train.std(axis=0)

            x_stds[x_stds < 1e-10] = 1.

            X_train = ((X_train - x_means) / x_stds)
            y_train = ((y_train - y_means) / y_stds)

            X_val = ((X_val - x_means) / x_stds)
            y_val = ((y_val - y_means) / y_stds)

            self.trainset = Datafeed(X_train, y_train, transform=None)
            self.valset = Datafeed(X_val, y_val, transform=None)

            self.N_train = X_train.shape[0]
            self.input_dim = X_train.shape[1]
            self.output_dim = y_train.shape[1]
            self.early_stop = early_stop

            self.regression = True
Exemple #3
0
        def __init__(self, *args, early_stop=None, **kwargs):
            super().__init__(*args,
                             network=network,
                             width=width,
                             batch_size=batch_size,
                             **kwargs)

            # setup dataset
            X, y = gen_spirals(n_samples=2000,
                               shuffle=True,
                               noise=0.2,
                               random_state=1234,
                               n_arms=2,
                               start_angle=0,
                               stop_angle=720)

            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.8, random_state=1234)

            x_means, x_stds = X_train.mean(axis=0), X_train.std(axis=0)

            X_train = ((X_train - x_means) / x_stds).astype(np.float32)
            X_test = ((X_test - x_means) / x_stds).astype(np.float32)

            y_train = y_train.astype(np.float32)
            y_test = y_test.astype(np.float32)

            self.trainset = Datafeed(X_train, y_train, transform=None)
            self.valset = Datafeed(X_test, y_test, transform=None)

            self.N_train = X_train.shape[0]
            self.input_dim = 2
            self.output_dim = 2
            self.early_stop = early_stop

            self.regression = False
Exemple #4
0
    X_train, y_train, X_test, y_test = load_my_1d(args.datadir)
elif args.dataset == 'matern_1d':
    X_train, y_train = load_matern_1d(args.datadir)
elif args.dataset == 'agw_1d':
    X_train, y_train = load_agw_1d(args.datadir, get_feats=False)
elif args.dataset == 'andrew_1d':
    X_train, y_train = load_andrew_1d(args.datadir)
elif args.dataset == 'axis':
    X_train, y_train = load_axis(args.datadir)
elif args.dataset == 'origin':
    X_train, y_train = load_origin(args.datadir)
elif args.dataset == 'wiggle':
    X_train, y_train = load_wiggle()

trainset = Datafeed(torch.Tensor(X_train),
                    torch.Tensor(y_train),
                    transform=None)
if args.dataset == 'my_1d':
    valset = Datafeed(torch.Tensor(X_test),
                      torch.Tensor(y_test),
                      transform=None)
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
else:
    valset = Datafeed(torch.Tensor(X_train),
                      torch.Tensor(y_train),
                      transform=None)
    print(X_train.shape, y_train.shape)

trainloader = torch.utils.data.DataLoader(trainset,
                                          batch_size=batch_size,
                                          shuffle=True,
                               random_state=1234)

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.9,
                                                    random_state=1234)

x_means, x_stds = X_train.mean(axis=0), X_train.std(axis=0)

X_train = ((X_train - x_means) / x_stds).astype(np.float32)
X_test = ((X_test - x_means) / x_stds).astype(np.float32)

y_train = y_train.astype(np.float32)
y_test = y_test.astype(np.float32)

trainset = Datafeed(X_train, y_train, transform=None)
valset = Datafeed(X_test, y_test, transform=None)

# Declare model

for d in np.arange(0, 101):

    cuda = torch.cuda.is_available()

    input_dim = 2
    width = 20
    n_layers = d
    output_dim = 2

    # prior_probs = 0.85 ** (1 + np.arange(n_layers + 1))
    # prior_probs = prior_probs / prior_probs.sum()
Exemple #6
0
        def __init__(self,
                     dname,
                     *args,
                     base_dir='nb_dir/data/',
                     prop_val=0.15,
                     n_split=0,
                     early_stop=None,
                     **kwargs):
            super().__init__(*args,
                             network=network,
                             width=width,
                             batch_size=batch_size,
                             **kwargs)

            gap = False
            if dname in [
                    'boston', 'concrete', 'energy', 'power', 'wine', 'yacht',
                    'kin8nm', 'naval', 'protein'
            ]:
                pass
            elif dname in [
                    'boston_gap', 'concrete_gap', 'energy_gap', 'power_gap',
                    'wine_gap', 'yacht_gap', 'kin8nm_gap', 'naval_gap',
                    'protein_gap'
            ]:
                gap = True
                dname = dname[:-4]

            X_train, X_test, x_means, x_stds, y_train, y_test, y_means, y_stds = \
                load_gap_UCI(base_dir=base_dir, dname=dname, n_split=n_split, gap=gap)

            X_train = (X_train * x_stds) + x_means
            y_train = (y_train * y_stds) + y_means

            # print(X_train.shape)
            Ntrain = int(X_train.shape[0] * (1 - prop_val))
            X_val = X_train[Ntrain:]
            y_val = y_train[Ntrain:]
            X_train = X_train[:Ntrain]
            y_train = y_train[:Ntrain]

            # print(X_train.shape)
            x_means, x_stds = X_train.mean(axis=0), X_train.std(axis=0)
            y_means, y_stds = y_train.mean(axis=0), y_train.std(axis=0)

            x_stds[x_stds < 1e-10] = 1.

            X_train = ((X_train - x_means) / x_stds)
            y_train = ((y_train - y_means) / y_stds)

            X_val = ((X_val - x_means) / x_stds)
            y_val = ((y_val - y_means) / y_stds)

            self.trainset = Datafeed(X_train, y_train, transform=None)
            self.valset = Datafeed(X_val, y_val, transform=None)

            self.N_train = X_train.shape[0]
            self.input_dim = X_train.shape[1]
            self.output_dim = y_train.shape[1]
            self.early_stop = early_stop

            self.regression = True