예제 #1
0
def main(name, summary_dir, seed):

    summary_path = Path(summary_dir)  # .joinpath(name)
    summary_path.mkdir(parents=True, exist_ok=True)

    r = SugiyamaKrauledatMuellerDensityRatioMarginals()

    rows = []

    for seed in range(num_seeds):

        # (X_train, y_train), (X_test, y_test) = r.train_test_split(X, y, seed=seed)

        (X_train, y_train), (X_test, y_test) = r.make_covariate_shift_dataset(
          num_test, num_train, class_posterior_fn=class_posterior, threshold=0.5,
          seed=seed)
        X, s = make_classification_dataset(X_test, X_train)

        # # Uniform
        # acc = metric(X_train, y_train, X_test, y_test, random_state=seed)
        # rows.append(dict(weight="uniform", acc=acc, seed=seed))

        # # Exact
        # acc = metric(X_train, y_train, X_test, y_test,
        #              sample_weight=r.ratio(X_train).numpy(), random_state=seed)
        # rows.append(dict(weight="exact", acc=acc, seed=seed))

        for epochs in [500, 1000, 1500, 2000]:

            # for sparsity_factor in sparsity_factors:

            num_inducing_points = int(len(X) * sparsity_factor)

            # Gaussian Processes
            gpdre = GaussianProcessDensityRatioEstimator(
                input_dim=num_features,
                kernel_cls=Matern52,
                use_ard=use_ard,
                num_inducing_points=num_inducing_points,
                inducing_index_points_initializer=KMeans(X, seed=seed),
                vgp_cls=SVGP,
                whiten=True,
                jitter=jitter,
                seed=seed)
            gpdre.compile(optimizer=optimizer)
            gpdre.fit(X_test, X_train, epochs=epochs, batch_size=batch_size,
                      buffer_size=buffer_size)

            for prop_name, prop in props.items():

                r_prop = gpdre.ratio(X_train, convert_to_tensor_fn=prop)
                acc = metric(X_train, y_train, X_test, y_test,
                             sample_weight=r_prop.numpy(), random_state=seed)
                rows.append(dict(weight=prop_name, acc=acc, seed=seed,
                                 sparsity_factor=sparsity_factor,
                                 use_ard=use_ard, epochs=epochs))

    data = pd.DataFrame(rows)
    data.to_csv(str(summary_path.joinpath(f"{name}.csv")))

    return 0
예제 #2
0
def main(name, summary_dir, seed):

    summary_path = Path(summary_dir).joinpath("sugiyama")
    summary_path.mkdir(parents=True, exist_ok=True)

    r = SugiyamaKrauledatMuellerDensityRatioMarginals()

    rows = []

    for seed in range(num_seeds):

        # (X_train, y_train), (X_test, y_test) = r.train_test_split(X, y, seed=seed)

        (X_train, y_train), (X_test, y_test) = r.make_covariate_shift_dataset(
            num_test,
            num_train,
            class_posterior_fn=class_posterior,
            threshold=0.5,
            seed=seed)
        X, s = make_classification_dataset(X_test, X_train)

        # Uniform
        acc = metric(X_train, y_train, X_test, y_test, random_state=seed)
        rows.append(
            dict(weight="uniform", acc=acc, seed=seed, dataset_seed=seed))

        # Exact
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=r.ratio(X_train).numpy(),
                     random_state=seed)
        rows.append(dict(weight="exact", acc=acc, seed=seed,
                         dataset_seed=seed))

        # RuLSIF
        r_rulsif = RuLSIFDensityRatioEstimator(alpha=1e-6)
        r_rulsif.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_rulsif.ratio(X_train))
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(
            dict(weight="rulsif", acc=acc, seed=seed, dataset_seed=seed))

        # KLIEP
        # sigmas = [0.1, 0.25, 0.5, 0.75, 1.0]
        sigmas = list(np.maximum(0.25 * np.arange(5), 0.1))
        r_kliep = KLIEPDensityRatioEstimator(sigmas=sigmas, seed=seed)
        r_kliep.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_kliep.ratio(X_train))
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="kliep", acc=acc, seed=seed,
                         dataset_seed=seed))

        # KMM
        r_kmm = KMMDensityRatioEstimator(B=1000.0)
        r_kmm.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_kmm.ratio(X_train))
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="kmm", acc=acc, seed=seed, dataset_seed=seed))

        # Logistic Regression (Linear)
        r_logreg = LogisticRegressionDensityRatioEstimator(seed=seed)
        r_logreg.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_logreg.ratio(X_train).numpy())
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(
            dict(weight="logreg", acc=acc, seed=seed, dataset_seed=seed))

        # Logistic Regression (MLP)
        r_mlp = MLPDensityRatioEstimator(num_layers=1,
                                         num_units=8,
                                         activation="tanh",
                                         seed=seed)
        r_mlp.compile(optimizer=optimizer, metrics=["accuracy"])
        r_mlp.fit(X_test, X_train, epochs=epochs, batch_size=batch_size)
        sample_weight = np.maximum(1e-6, r_mlp.ratio(X_train).numpy())
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="mlp", acc=acc, seed=seed, dataset_seed=seed))

        # Gaussian Processes
        gpdre = GaussianProcessDensityRatioEstimator(
            input_dim=num_features,
            kernel_cls=Matern52,
            num_inducing_points=num_inducing_points,
            inducing_index_points_initializer=KMeans(X, seed=seed),
            vgp_cls=SVGP,
            whiten=True,
            jitter=jitter,
            seed=seed)
        gpdre.compile(optimizer=optimizer)
        gpdre.fit(X_test,
                  X_train,
                  epochs=epochs,
                  batch_size=batch_size,
                  buffer_size=buffer_size)

        for prop_name, prop in props.items():

            r_prop = gpdre.ratio(X_train, convert_to_tensor_fn=prop)
            acc = metric(X_train,
                         y_train,
                         X_test,
                         y_test,
                         sample_weight=r_prop.numpy(),
                         random_state=seed)
            rows.append(
                dict(weight=prop_name, acc=acc, seed=seed, dataset_seed=seed))

    data = pd.DataFrame(rows)
    data.to_csv(str(summary_path.joinpath(f"{name}.csv")))

    return 0
예제 #3
0
ax.set_yticks([0, 1])
ax.set_yticklabels([r"$x_q \sim q(x)$", r"$x_p \sim p(x)$"])
ax.set_xlabel('$x$')

ax.legend()

plt.show()
# %%

gpdre = GaussianProcessDensityRatioEstimator(
    input_dim=num_features,
    num_inducing_points=num_inducing_points,
    inducing_index_points_initializer=KMeans(X_train, seed=seed),
    kernel_cls=kernel_cls, vgp_cls=SVGP, jitter=jitter, seed=seed)
gpdre.compile(optimizer=optimizer)
gpdre.fit(X_p, X_q, epochs=num_epochs, batch_size=batch_size,
          buffer_size=shuffle_buffer_size)
# %%

log_ratio_mean = gpdre.logit(X_grid, convert_to_tensor_fn=tfd.Distribution.mean)
log_ratio_stddev = gpdre.logit(X_grid, convert_to_tensor_fn=tfd.Distribution.stddev)

# %%

fig, ax = plt.subplots()

ax.plot(X_grid, r.logit(X_grid), c='k',
        label=r"$f(x) = \log p(x) - \log q(x)$")

ax.plot(X_grid, log_ratio_mean.numpy().T,
        label="posterior mean")
fill_between_stddev(X_grid.squeeze(),
예제 #4
0
def main(name, summary_dir, seed):

    summary_path = Path(summary_dir).joinpath("two_moons")
    summary_path.mkdir(parents=True, exist_ok=True)

    X, y = make_moons(num_samples, noise=0.05, random_state=dataset_seed)

    test = tfd.MultivariateNormalDiag(loc=[0.5, 0.25], scale_diag=[0.5, 0.5])
    train = tfd.MixtureSameFamily(
        mixture_distribution=tfd.Categorical(probs=[0.5, 0.5]),
        components_distribution=tfd.MultivariateNormalDiag(
            loc=[[-1., -0.5], [2., 1.0]], scale_diag=[0.5, 1.5]))
    r = DensityRatioMarginals(top=test, bot=train)

    rows = []

    for seed in trange(num_seeds):

        (X_train, y_train), (X_test, y_test) = r.train_test_split(X,
                                                                  y,
                                                                  seed=seed)

        # Uniform
        acc = metric(X_train, y_train, X_test, y_test, random_state=seed)
        rows.append(dict(weight="uniform", acc=acc, seed=seed))

        # Exact
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=r.ratio(X_train).numpy(),
                     random_state=seed)
        rows.append(dict(weight="exact", acc=acc, seed=seed))

        # RuLSIF
        r_rulsif = RuLSIFDensityRatioEstimator(alpha=1e-6)
        r_rulsif.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_rulsif.ratio(X_train))
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="rulsif", acc=acc, seed=seed))

        # KLIEP
        # sigmas = [0.1, 0.25, 0.5, 0.75, 1.0]
        sigmas = list(np.maximum(0.25 * np.arange(5), 0.1))
        r_kliep = KLIEPDensityRatioEstimator(sigmas=sigmas, seed=seed)
        r_kliep.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_kliep.ratio(X_train))
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="kliep", acc=acc, seed=seed))

        # KMM
        r_kmm = KMMDensityRatioEstimator(B=1000.0)
        r_kmm.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_kmm.ratio(X_train))
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="kmm", acc=acc, seed=seed, dataset_seed=seed))

        # Logistic Regression (Linear)
        r_logreg = LogisticRegressionDensityRatioEstimator(seed=seed)
        r_logreg.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_logreg.ratio(X_train).numpy())
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="logreg", acc=acc, seed=seed))

        # Logistic Regression (MLP)
        r_mlp = MLPDensityRatioEstimator(num_layers=1,
                                         num_units=8,
                                         activation="tanh",
                                         seed=seed)
        r_mlp.compile(optimizer=optimizer, metrics=["accuracy"])
        r_mlp.fit(X_test, X_train, epochs=epochs, batch_size=batch_size)
        sample_weight = np.maximum(1e-6, r_mlp.ratio(X_train).numpy())
        acc = metric(X_train,
                     y_train,
                     X_test,
                     y_test,
                     sample_weight=sample_weight,
                     random_state=seed)
        rows.append(dict(weight="mlp", acc=acc, seed=seed))

        # Gaussian Processes
        gpdre = GaussianProcessDensityRatioEstimator(
            input_dim=num_features,
            kernel_cls=Matern52,
            num_inducing_points=num_inducing_points,
            inducing_index_points_initializer=KMeans(X, seed=seed),
            vgp_cls=SVGP,
            whiten=True,
            jitter=jitter,
            seed=seed)
        gpdre.compile(optimizer=optimizer)
        gpdre.fit(X_test,
                  X_train,
                  epochs=epochs,
                  batch_size=batch_size,
                  buffer_size=buffer_size)

        for prop_name, prop in props.items():

            r_prop = gpdre.ratio(X_train, convert_to_tensor_fn=prop)
            acc = metric(X_train,
                         y_train,
                         X_test,
                         y_test,
                         sample_weight=r_prop.numpy(),
                         random_state=seed)
            rows.append(dict(weight=prop_name, acc=acc, seed=seed))

    data = pd.DataFrame(rows)
    data.to_csv(str(summary_path.joinpath(f"{name}.csv")))

    return 0
예제 #5
0
def main(name, summary_dir, seed):

    summary_path = Path(summary_dir).joinpath("shimodaira")
    summary_path.mkdir(parents=True, exist_ok=True)

    test = tfd.Normal(loc=0.0, scale=0.3)
    train = tfd.Normal(loc=0.5, scale=0.5)
    r = DensityRatioMarginals(top=test, bot=train)

    rows = []

    # for dataset_seed in range(9):

    for seed in range(num_seeds):

        (X_train,
         y_train), (X_test,
                    y_test) = r.make_regression_dataset(num_test,
                                                        num_train,
                                                        latent_fn=poly,
                                                        noise_scale=0.3,
                                                        seed=seed)

        # Uniform
        error = metric(X_train, y_train, X_test, y_test, random_state=seed)
        rows.append(
            dict(weight="uniform",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))

        # Exact
        error = metric(X_train,
                       y_train,
                       X_test,
                       y_test,
                       sample_weight=r.ratio(X_train).numpy().squeeze(),
                       random_state=seed)
        rows.append(
            dict(weight="exact",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))

        # RuLSIF
        r_rulsif = RuLSIFDensityRatioEstimator(alpha=1e-6)
        r_rulsif.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_rulsif.ratio(X_train))
        error = metric(X_train,
                       y_train,
                       X_test,
                       y_test,
                       sample_weight=sample_weight,
                       random_state=seed)
        rows.append(
            dict(weight="rulsif",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))

        # KLIEP
        # sigmas = [0.1, 0.25, 0.5, 0.75, 1.0]
        sigmas = list(np.maximum(0.25 * np.arange(5), 0.1))
        r_kliep = KLIEPDensityRatioEstimator(sigmas=sigmas, seed=seed)
        r_kliep.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_kliep.ratio(X_train))
        error = metric(X_train,
                       y_train,
                       X_test,
                       y_test,
                       sample_weight=sample_weight,
                       random_state=seed)
        rows.append(
            dict(weight="kliep",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))

        # KMM
        r_kmm = KMMDensityRatioEstimator(B=1000.0)
        r_kmm.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_kmm.ratio(X_train))
        error = metric(X_train,
                       y_train,
                       X_test,
                       y_test,
                       sample_weight=sample_weight,
                       random_state=seed)
        rows.append(
            dict(weight="kmm",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))

        # Logistic Regression (Linear)
        r_logreg = LogisticRegressionDensityRatioEstimator(seed=seed)
        r_logreg.fit(X_test, X_train)
        sample_weight = np.maximum(1e-6, r_logreg.ratio(X_train).numpy())
        error = metric(X_train,
                       y_train,
                       X_test,
                       y_test,
                       sample_weight=sample_weight,
                       random_state=seed)
        rows.append(
            dict(weight="logreg",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))
        # print("Found optimal C={}".format(r_logreg.model.C_))

        # Logistic Regression (MLP)
        r_mlp = MLPDensityRatioEstimator(num_layers=2,
                                         num_units=16,
                                         activation="relu",
                                         seed=seed)
        r_mlp.compile(optimizer="adam", metrics=["accuracy"])
        r_mlp.fit(X_test, X_train, epochs=200, batch_size=64)
        sample_weight = np.maximum(1e-6, r_mlp.ratio(X_train).numpy())
        error = metric(X_train,
                       y_train,
                       X_test,
                       y_test,
                       sample_weight=sample_weight,
                       random_state=seed)
        rows.append(
            dict(weight="mlp",
                 error=error,
                 dataset_seed=dataset_seed,
                 seed=seed))

        # Gaussian Processes
        gpdre = GaussianProcessDensityRatioEstimator(input_dim=num_features,
                                                     kernel_cls=kernel_cls,
                                                     vgp_cls=VGP,
                                                     jitter=jitter,
                                                     seed=seed)
        gpdre.compile(optimizer=optimizer)
        gpdre.fit(X_test, X_train)

        for prop_name, prop in props.items():

            r_prop = gpdre.ratio(X_train, convert_to_tensor_fn=prop)
            error = metric(X_train,
                           y_train,
                           X_test,
                           y_test,
                           sample_weight=r_prop.numpy(),
                           random_state=seed)
            rows.append(
                dict(weight=prop_name,
                     error=error,
                     dataset_seed=dataset_seed,
                     seed=seed))

    data = pd.DataFrame(rows)
    data.to_csv(str(summary_path.joinpath(f"{name}.csv")))

    return 0
예제 #6
0
def main(myname, summary_dir, seed):

    summary_path = Path(summary_dir).joinpath("liacc")
    summary_path.mkdir(parents=True, exist_ok=True)

    names = [
        "abalone", "ailerons", "bank32", "bank8", "cali", "cpuact",
        "elevators", "puma8"
    ][4:]

    rows = []

    for name in names:

        output_path = summary_path.joinpath(f"{name}")
        output_path.mkdir(parents=True, exist_ok=True)

        data_path = get_data_path(name)
        data_mat = loadmat(data_path)

        results_path = get_results_path(name)
        results_mat = loadmat(results_path)

        X_trains = get_splits(data_mat, key="X")
        y_trains = get_splits(data_mat, key="Y")

        X_tests = get_splits(data_mat, key="Xtest")
        y_tests = get_splits(data_mat, key="Ytest")

        weights = get_splits(data_mat, key="ideal")

        # X_train = X_trains[-1]
        # Y_train = y_trains[-1]

        # X_test = X_tests[-1]
        # Y_test = y_tests[-1]

        # weight = weights[-1]

        # proj = results_mat["W"]

        # X_train_low = X_train.dot(proj)
        # X_test_low = X_test.dot(proj)

        for split, (X_train, Y_train, X_test, Y_test, weight) in \
                enumerate(zip(X_trains, y_trains, X_tests, y_tests, weights)):

            X, _ = make_classification_dataset(X_test, X_train)
            # X_low, _ = make_classification_dataset(X_test_low, X_train_low)
            num_features = X.shape[-1]
            # num_features_low = X_low.shape[-1]

            y_train = Y_train.squeeze(axis=-1)
            y_test = Y_test.squeeze(axis=-1)
            # sample_weight = weight.squeeze(axis=-1)

            # error = regression_metric(X_train, y_train, X_test, y_test)
            # rows.append(dict(weight="uniform", name=name, error=error, projection="none"))
            # # rows.append(dict(weight="uniform", name=name, split=split, error=error))

            # error = regression_metric(X_train, y_train, X_test, y_test,
            #                           sample_weight=sample_weight)
            # rows.append(dict(weight="exact", name=name, error=error, projection="none"))
            # rows.append(dict(weight="exact", name=name, split=split, error=error))

            #     # # RuLSIF
            #     # r_rulsif = RuLSIFDensityRatioEstimator(alpha=1e-6)
            #     # r_rulsif.fit(X_test, X_train)
            #     # sample_weight = np.maximum(1e-6, r_rulsif.ratio(X_train))
            #     # error = regression_metric(X_train, y_train, X_test, y_test,
            #     #                           sample_weight=sample_weight)
            #     # rows.append(dict(weight="rulsif", name=name, split=split, error=error))

            #     # # # KLIEP
            #     # # r_kliep = KLIEPDensityRatioEstimator(seed=seed)
            #     # # r_kliep.fit(X_test, X_train)
            #     # # sample_weight = np.maximum(1e-6, r_kliep.ratio(X_train))
            #     # # error = regression_metric(X_train, y_train, X_test, y_test,
            #     # #                           sample_weight=sample_weight)
            #     # # rows.append(dict(weight="kliep", name=name, split=split, error=error))

            #     # # KMM
            #     # r_kmm = KMMDensityRatioEstimator(B=1000.0)
            #     # r_kmm.fit(X_test, X_train)
            #     # sample_weight = np.maximum(1e-6, r_kmm.ratio(X_train))
            #     # error = regression_metric(X_train, y_train, X_test, y_test,
            #     #                           sample_weight=sample_weight)
            #     # rows.append(dict(weight="kmm", name=name, split=split, error=error))

            #     # # Logistic Regression (Linear)
            #     # r_logreg = LogisticRegressionDensityRatioEstimator(C=1.0, seed=seed)
            #     # r_logreg.fit(X_test, X_train)
            #     # sample_weight = np.maximum(1e-6, r_logreg.ratio(X_train).numpy())
            #     # error = regression_metric(X_train, y_train, X_test, y_test,
            #     #                           sample_weight=sample_weight)
            #     # rows.append(dict(weight="logreg", name=name, split=split, error=error))

            #     # # Logistic Regression (MLP)
            #     # r_mlp = MLPDensityRatioEstimator(num_layers=2, num_units=32,
            #     #                                  activation="relu", seed=seed)
            #     # r_mlp.compile(optimizer=optimizer, metrics=["accuracy"])
            #     # r_mlp.fit(X_test, X_train, epochs=epochs, batch_size=batch_size)
            #     # sample_weight = np.maximum(1e-6, r_mlp.ratio(X_train).numpy())
            #     # error = regression_metric(X_train, y_train, X_test, y_test,
            #     #                           sample_weight=sample_weight)
            #     # rows.append(dict(weight="mlp", name=name, split=split, error=error))

            #     for whiten in [True]:

            for kernel_name, kernel_cls in kernels.items():

                for num_inducing_points in [100, 300, 500]:

                    for use_ard in [False, True]:

                        # # Gaussian Processes (low-dimensional)
                        # gpdre = GaussianProcessDensityRatioEstimator(
                        #     input_dim=num_features_low,
                        #     kernel_cls=kernel_cls,
                        #     num_inducing_points=num_inducing_points,
                        #     inducing_index_points_initializer=KMeans(X_low, seed=9),
                        #     vgp_cls=SVGP,
                        #     whiten=True,
                        #     jitter=jitter,
                        #     seed=9)
                        # gpdre.compile(optimizer=optimizer)
                        # gpdre.fit(X_test_low, X_train_low,
                        #           epochs=epochs,
                        #           batch_size=batch_size,
                        #           buffer_size=buffer_size)

                        # for prop_name, prop in props.items():

                        #     r_prop = gpdre.ratio(X_train_low, convert_to_tensor_fn=prop)
                        #     error = regression_metric(X_train, y_train, X_test, y_test,
                        #                               sample_weight=r_prop.numpy())
                        #     rows.append(dict(name=name, error=error, projection="low",
                        #                      kernel_name=kernel_name,
                        #                      # whiten=whiten,
                        #                      weight=prop_name))

                        # Gaussian Processes
                        gpdre = GaussianProcessDensityRatioEstimator(
                            input_dim=num_features,
                            kernel_cls=kernel_cls,
                            use_ard=use_ard,
                            num_inducing_points=num_inducing_points,
                            inducing_index_points_initializer=KMeans(
                                X, seed=split),
                            vgp_cls=SVGP,
                            whiten=True,
                            jitter=jitter,
                            seed=split)
                        gpdre.compile(optimizer=optimizer)
                        gpdre.fit(X_test,
                                  X_train,
                                  epochs=epochs,
                                  batch_size=batch_size,
                                  buffer_size=buffer_size)

                        for prop_name, prop in props.items():

                            r_prop = gpdre.ratio(X_train,
                                                 convert_to_tensor_fn=prop)
                            error = regression_metric(
                                X_train,
                                y_train,
                                X_test,
                                y_test,
                                sample_weight=r_prop.numpy())
                            rows.append(
                                dict(
                                    name=name,
                                    error=error,
                                    projection="none",
                                    kernel_name=kernel_name,
                                    split=split,
                                    # whiten=whiten,
                                    use_ard=use_ard,
                                    epochs=epochs,
                                    num_inducing_points=num_inducing_points,
                                    weight=prop_name))

                    data = pd.DataFrame(rows)
                    data.to_csv(str(summary_path.joinpath(f"{myname}.csv")))

    # data = pd.DataFrame(rows)
    # data.to_csv(str(summary_path.joinpath(f"{myname}.csv")))

    return 0
예제 #7
0
def main(myname, summary_dir, seed):

    summary_path = Path(summary_dir).joinpath("liacc")
    summary_path.mkdir(parents=True, exist_ok=True)

    names = [
        "abalone", "ailerons", "bank32", "bank8", "cali", "cpuact",
        "elevators", "puma8"
    ]

    rows = []

    for name in names:

        output_path = summary_path.joinpath(f"{name}")
        output_path.mkdir(parents=True, exist_ok=True)

        data_path = get_path(name, kind="data", data_home="results/")
        data_mat = loadmat(data_path)

        X_trains = get_splits(data_mat, key="X")
        Y_trains = get_splits(data_mat, key="Y")

        X_tests = get_splits(data_mat, key="Xtest")
        Y_tests = get_splits(data_mat, key="Ytest")

        sample_weights = get_splits(data_mat, key="ideal")

        result_path = get_path(name,
                               kind="results_CV",
                               data_home="results/20200530/")
        results_mat = loadmat(result_path)

        projs = get_splits(results_mat, key="all_W")

        for split, (X_train, Y_train, X_test, Y_test, proj, sample_weight) in \
                enumerate(zip(X_trains, Y_trains, X_tests, Y_tests, projs, sample_weights)):

            y_train = Y_train.squeeze(axis=-1)
            y_test = Y_test.squeeze(axis=-1)

            # X, _ = make_classification_dataset(X_test, X_train)
            # num_features = X.shape[-1]

            X_train_low = X_train.dot(proj)
            X_test_low = X_test.dot(proj)
            X_low, _ = make_classification_dataset(X_test_low, X_train_low)
            num_features_low = X_low.shape[-1]

            error = regression_metric(X_train_low, y_train, X_test_low, y_test)
            rows.append(
                dict(weight="uniform",
                     name=name,
                     split=split,
                     error=error,
                     projection="low"))

            error = regression_metric(
                X_train_low,
                y_train,
                X_test_low,
                y_test,
                sample_weight=sample_weight.squeeze(axis=-1))
            rows.append(
                dict(weight="exact",
                     name=name,
                     split=split,
                     error=error,
                     projection="low"))

            # # Gaussian Processes (full-dimensional)
            # gpdre = GaussianProcessDensityRatioEstimator(
            #     input_dim=num_features, kernel_cls=kernels["sqr_exp"],
            #     num_inducing_points=num_inducing_points,
            #     inducing_index_points_initializer=KMeans(X, seed=split),
            #     vgp_cls=SVGP, whiten=True, jitter=jitter, seed=split)
            # gpdre.compile(optimizer=optimizer)
            # gpdre.fit(X_test, X_train,
            #           epochs=epochs,
            #           batch_size=batch_size,
            #           buffer_size=buffer_size)

            # for prop_name, prop in props.items():
            #     r_prop = gpdre.ratio(X_train, convert_to_tensor_fn=prop)
            #     error = regression_metric(X_train, y_train, X_test, y_test,
            #                               sample_weight=r_prop.numpy())
            #     rows.append(dict(name=name, split=split, error=error,
            #                      projection="none", kernel_name="sqr_exp",
            #                      use_ard=True, epochs=epochs, weight=prop_name,
            #                      num_inducing_points=num_inducing_points,
            #                      num_features=num_features))

            # Gaussian Processes (low-dimensional)
            gpdre = GaussianProcessDensityRatioEstimator(
                input_dim=num_features_low,
                kernel_cls=kernels["sqr_exp"],
                num_inducing_points=num_inducing_points,
                inducing_index_points_initializer=KMeans(X_low, seed=split),
                vgp_cls=SVGP,
                whiten=True,
                jitter=jitter,
                seed=split)
            gpdre.compile(optimizer=optimizer)
            gpdre.fit(X_test_low,
                      X_train_low,
                      epochs=epochs,
                      batch_size=batch_size,
                      buffer_size=buffer_size)

            for prop_name, prop in props.items():
                r_prop = gpdre.ratio(X_train_low, convert_to_tensor_fn=prop)
                error = regression_metric(X_train_low,
                                          y_train,
                                          X_test_low,
                                          y_test,
                                          sample_weight=r_prop.numpy())
                rows.append(
                    dict(name=name,
                         split=split,
                         error=error,
                         projection="low",
                         kernel_name="sqr_exp",
                         use_ard=True,
                         epochs=epochs,
                         weight=prop_name,
                         num_inducing_points=num_inducing_points,
                         num_features=num_features_low))

            data = pd.DataFrame(rows)
            data.to_csv(str(summary_path.joinpath(f"{myname}.csv")))

    # data = pd.DataFrame(rows)
    # data.to_csv(str(summary_path.joinpath(f"{myname}.csv")))

    return 0
예제 #8
0
    rows.append(dict(weight="logreg_deep", acc=acc, seed=seed))

    # Gaussian Processes
    gpdre = GaussianProcessDensityRatioEstimator(
        input_dim=num_features,
        kernel_cls=Matern52,
        num_inducing_points=num_inducing_points,
        inducing_index_points_initializer=KMeans(X, seed=seed),
        vgp_cls=SVGP,
        whiten=True,
        jitter=jitter,
        seed=seed)
    gpdre.compile(optimizer=optimizer)
    gpdre.fit(X_val,
              X_train,
              epochs=epochs,
              batch_size=batch_size,
              buffer_size=buffer_size)

    for prop_name, prop in props.items():

        r_prop = gpdre.ratio(X_train, convert_to_tensor_fn=prop)
        acc = metric(X_train,
                     y_train,
                     X_val,
                     y_val,
                     sample_weight=r_prop.numpy(),
                     random_state=seed)
        rows.append(dict(weight=f"gp_{prop_name}", acc=acc, seed=seed))
# %%