Exemple #1
0
def test_normal_impute(make_missing_data, stats):
    """Test the fixed normal random imputation."""
    ab.set_hyperseed(100)
    _, m, X, _ = make_missing_data

    # This replicates the input layer behaviour
    def data_layer(**kwargs):
        return kwargs['X'], 0.0

    def mask_layer(**kwargs):
        return kwargs['M'], 0.0

    n, N, D = X.shape
    mean_array = 2. * stats[0]
    std_array = np.sqrt(0.001) * stats[1]
    impute = ab.NormalImpute(data_layer, mask_layer, mean_array, std_array)

    F, KL = impute(X=X, M=m)

    tc = tf.test.TestCase()
    with tc.test_session():
        tf.global_variables_initializer().run()
        X_imputed = F.eval()
        imputed_data = X_imputed[1, m]
        correct = np.array([1.94, 1.97, 1.93, 2.03, 2.02])
        assert np.allclose(imputed_data[-5:], correct, atol=0.1)
        assert KL.eval() == 0.0
Exemple #2
0
def test_fixed_gaussian_impute(make_missing_data):
    """Test the impute_mean."""
    ab.set_hyperseed(100)
    _, m, X, _ = make_missing_data

    # This replicates the input layer behaviour
    def data_layer(**kwargs):
        return kwargs['X'], 0.0

    def mask_layer(**kwargs):
        return kwargs['M'], 0.0

    n, N, D = X.shape
    mean_array = 2 * np.ones(D).astype(np.float32)
    std_array = np.sqrt(0.001) * np.ones(D).astype(np.float32)
    impute = ab.FixedNormalImpute(data_layer, mask_layer, mean_array,
                                  std_array)

    F, KL = impute(X=X, M=m)

    tc = tf.test.TestCase()
    with tc.test_session():
        X_imputed = F.eval()
        imputed_data = X_imputed[1, m]
        correct = np.array([1.94, 1.97, 1.93, 2.03, 2.02])
        assert np.allclose(imputed_data[-5:], correct, atol=0.1)
        assert KL.eval() == 0.0
Exemple #3
0
def test_extra_category_impute(make_missing_categories):
    """Test the impute that learns a scalar value to impute for each col."""
    ab.set_hyperseed(100)
    X, m, ncats = make_missing_categories
    X_true = np.copy(X)
    X_true[:, m[:, 0], 0] = ncats[0]
    X_true[:, m[:, 1], 1] = ncats[1]

    # This replicates the input layer behaviour
    def data_layer(**kwargs):
        return kwargs['X'], 0.0

    def mask_layer(**kwargs):
        return kwargs['M'], 0.0

    n, N, D = X.shape
    impute = ab.ExtraCategoryImpute(data_layer, mask_layer, ncats)

    F, KL = impute(X=X, M=m)

    tc = tf.test.TestCase()
    with tc.test_session():
        tf.global_variables_initializer().run()
        X_imputed = F.eval()
        assert np.all(X_imputed == X_true)
Exemple #4
0
def test_dropout(random, indep):
    """Test dropout layer."""
    samples, rows, cols = 3, 5, 1000
    keep_prob = 0.9
    X = (random.randn(samples, rows, cols) + 1).astype(np.float32)
    ab.set_hyperseed(666)
    drop = ab.DropOut(keep_prob, independent=indep)

    F, KL = drop(X)

    tc = tf.test.TestCase()
    with tc.test_session():
        f = F.eval()
        dropped = np.where(f == 0)

        # Check we dropout whole columns
        if not indep:
            for s, _, c in zip(*dropped):
                assert np.allclose(f[s, :, c], 0.)

        # Check the dropout proportions are approximately correct
        active = 1 - np.sum(f[:, 0, :] == 0) / (samples * cols)

        assert f.shape == X.shape
        assert (active >= keep_prob - 0.05) and (active <= keep_prob + 0.05)
        assert KL == 0
Exemple #5
0
def test_dropout(random):
    """Test dropout layer."""
    X = np.repeat(random.randn(1, 30, 20), 3, axis=0)
    ab.set_hyperseed(666)
    drop = ab.DropOut(0.5)

    F, KL = drop(X)

    tc = tf.test.TestCase()
    with tc.test_session():
        f = F.eval()
        prop_zero = np.sum(f == 0) / np.prod(f.shape)

        assert f.shape == X.shape
        assert (prop_zero >= 0.4) and (prop_zero <= 0.6)
        assert KL == 0
Exemple #6
0
def test_learned_normal_impute(make_missing_data):
    """Test the learned normal impute function."""
    ab.set_hyperseed(100)
    _, m, X, _ = make_missing_data

    # This replicates the input layer behaviour
    def data_layer(**kwargs):
        return kwargs['X'], 0.0

    def mask_layer(**kwargs):
        return kwargs['M'], 0.0

    n, N, D = X.shape
    impute = ab.LearnedNormalImpute(data_layer, mask_layer)

    F, KL = impute(X=X, M=m)

    tc = tf.test.TestCase()
    with tc.test_session():
        tf.global_variables_initializer().run()
        X_imputed = F.eval()
        assert KL.eval() == 0.0  # Might want to change this in the future
        assert (X_imputed.shape == X.shape)
Exemple #7
0
def test_scalar_impute(make_missing_data, scalar):
    """Test the impute that uses a scalar value to impute for each col."""
    ab.set_hyperseed(100)
    _, m, X, _ = make_missing_data

    # This replicates the input layer behaviour
    def data_layer(**kwargs):
        return kwargs['X'], 0.0

    def mask_layer(**kwargs):
        return kwargs['M'], 0.0

    n, N, D = X.shape
    impute = ab.ScalarImpute(data_layer, mask_layer, scalar)

    F, KL = impute(X=X, M=m)

    tc = tf.test.TestCase()
    with tc.test_session():
        tf.global_variables_initializer().run()
        X_imputed = F.eval()
        assert KL.eval() == 0.0
        assert (X_imputed.shape == X.shape)
        assert np.allclose(X_imputed[:, m], np.pi)
Exemple #8
0
# Data properties
COLUMNS = ["age", "workclass", "fnlwgt", "education", "education_num",
           "marital_status", "occupation", "relationship", "race", "gender",
           "capital_gain", "capital_loss", "hours_per_week", "native_country",
           "income_bracket"]
CATEGORICAL_COLUMNS = ["workclass", "education", "marital_status",
                       "occupation", "relationship", "race", "gender",
                       "native_country"]
CONTINUOUS_COLUMNS = ["age", "education_num", "capital_gain", "capital_loss",
                      "hours_per_week"]
LABEL_COLUMN = "label"


# Algorithm properties
RSEED = 666
ab.set_hyperseed(RSEED)

# Sample width of net
T_SAMPLES = 1  # Number of random samples to get from an Aboleth net
EMBED_DIMS = 5   # Number of dimensions to embed the categorical columns into

BSIZE = 50  # Mini batch size
NITER = 60000  # Number of iterations (mini-batch views)
P_SAMPLES = 50  # Number of samples to use for prediction

CONFIG = tf.ConfigProto(device_count={'GPU': 0})  # Use GPU ?


def main():
    """Run the demo."""
    # Get Continuous and categorical data
#! /usr/bin/env python3
import tensorflow as tf
import numpy as np

import aboleth as ab

tf.logging.set_verbosity(tf.logging.INFO)

rseed = 100
ab.set_hyperseed(rseed)

# Optimization
n_epochs = 50
batch_size = 100
config = tf.ConfigProto(device_count={'GPU': 0})  # Use GPU ?

reg = 0.1

l_samples = 5
p_samples = 5

# Network architecture
net = ab.stack(
    ab.InputLayer(name='X', n_samples=l_samples),  # LSAMPLES,BATCH_SIZE,28*28
    ab.Conv2D(filters=32, kernel_size=(5, 5),
              l2_reg=reg),  # LSAMPLES, BATCH_SIZE, 28, 28, 32
    ab.Activation(h=tf.nn.relu),
    ab.MaxPool2D(pool_size=(2, 2),
                 strides=(2, 2)),  # LSAMPLES, BATCH_SIZE, 14, 14, 32
    ab.Conv2D(filters=64, kernel_size=(5, 5),
              l2_reg=reg),  # LSAMPLES, BATCH_SIZE, 14, 14, 64