def test_normal_impute(make_missing_data, stats): """Test the fixed normal random imputation.""" ab.set_hyperseed(100) _, m, X, _ = make_missing_data # This replicates the input layer behaviour def data_layer(**kwargs): return kwargs['X'], 0.0 def mask_layer(**kwargs): return kwargs['M'], 0.0 n, N, D = X.shape mean_array = 2. * stats[0] std_array = np.sqrt(0.001) * stats[1] impute = ab.NormalImpute(data_layer, mask_layer, mean_array, std_array) F, KL = impute(X=X, M=m) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() X_imputed = F.eval() imputed_data = X_imputed[1, m] correct = np.array([1.94, 1.97, 1.93, 2.03, 2.02]) assert np.allclose(imputed_data[-5:], correct, atol=0.1) assert KL.eval() == 0.0
def test_fixed_gaussian_impute(make_missing_data): """Test the impute_mean.""" ab.set_hyperseed(100) _, m, X, _ = make_missing_data # This replicates the input layer behaviour def data_layer(**kwargs): return kwargs['X'], 0.0 def mask_layer(**kwargs): return kwargs['M'], 0.0 n, N, D = X.shape mean_array = 2 * np.ones(D).astype(np.float32) std_array = np.sqrt(0.001) * np.ones(D).astype(np.float32) impute = ab.FixedNormalImpute(data_layer, mask_layer, mean_array, std_array) F, KL = impute(X=X, M=m) tc = tf.test.TestCase() with tc.test_session(): X_imputed = F.eval() imputed_data = X_imputed[1, m] correct = np.array([1.94, 1.97, 1.93, 2.03, 2.02]) assert np.allclose(imputed_data[-5:], correct, atol=0.1) assert KL.eval() == 0.0
def test_extra_category_impute(make_missing_categories): """Test the impute that learns a scalar value to impute for each col.""" ab.set_hyperseed(100) X, m, ncats = make_missing_categories X_true = np.copy(X) X_true[:, m[:, 0], 0] = ncats[0] X_true[:, m[:, 1], 1] = ncats[1] # This replicates the input layer behaviour def data_layer(**kwargs): return kwargs['X'], 0.0 def mask_layer(**kwargs): return kwargs['M'], 0.0 n, N, D = X.shape impute = ab.ExtraCategoryImpute(data_layer, mask_layer, ncats) F, KL = impute(X=X, M=m) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() X_imputed = F.eval() assert np.all(X_imputed == X_true)
def test_dropout(random, indep): """Test dropout layer.""" samples, rows, cols = 3, 5, 1000 keep_prob = 0.9 X = (random.randn(samples, rows, cols) + 1).astype(np.float32) ab.set_hyperseed(666) drop = ab.DropOut(keep_prob, independent=indep) F, KL = drop(X) tc = tf.test.TestCase() with tc.test_session(): f = F.eval() dropped = np.where(f == 0) # Check we dropout whole columns if not indep: for s, _, c in zip(*dropped): assert np.allclose(f[s, :, c], 0.) # Check the dropout proportions are approximately correct active = 1 - np.sum(f[:, 0, :] == 0) / (samples * cols) assert f.shape == X.shape assert (active >= keep_prob - 0.05) and (active <= keep_prob + 0.05) assert KL == 0
def test_dropout(random): """Test dropout layer.""" X = np.repeat(random.randn(1, 30, 20), 3, axis=0) ab.set_hyperseed(666) drop = ab.DropOut(0.5) F, KL = drop(X) tc = tf.test.TestCase() with tc.test_session(): f = F.eval() prop_zero = np.sum(f == 0) / np.prod(f.shape) assert f.shape == X.shape assert (prop_zero >= 0.4) and (prop_zero <= 0.6) assert KL == 0
def test_learned_normal_impute(make_missing_data): """Test the learned normal impute function.""" ab.set_hyperseed(100) _, m, X, _ = make_missing_data # This replicates the input layer behaviour def data_layer(**kwargs): return kwargs['X'], 0.0 def mask_layer(**kwargs): return kwargs['M'], 0.0 n, N, D = X.shape impute = ab.LearnedNormalImpute(data_layer, mask_layer) F, KL = impute(X=X, M=m) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() X_imputed = F.eval() assert KL.eval() == 0.0 # Might want to change this in the future assert (X_imputed.shape == X.shape)
def test_scalar_impute(make_missing_data, scalar): """Test the impute that uses a scalar value to impute for each col.""" ab.set_hyperseed(100) _, m, X, _ = make_missing_data # This replicates the input layer behaviour def data_layer(**kwargs): return kwargs['X'], 0.0 def mask_layer(**kwargs): return kwargs['M'], 0.0 n, N, D = X.shape impute = ab.ScalarImpute(data_layer, mask_layer, scalar) F, KL = impute(X=X, M=m) tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() X_imputed = F.eval() assert KL.eval() == 0.0 assert (X_imputed.shape == X.shape) assert np.allclose(X_imputed[:, m], np.pi)
# Data properties COLUMNS = ["age", "workclass", "fnlwgt", "education", "education_num", "marital_status", "occupation", "relationship", "race", "gender", "capital_gain", "capital_loss", "hours_per_week", "native_country", "income_bracket"] CATEGORICAL_COLUMNS = ["workclass", "education", "marital_status", "occupation", "relationship", "race", "gender", "native_country"] CONTINUOUS_COLUMNS = ["age", "education_num", "capital_gain", "capital_loss", "hours_per_week"] LABEL_COLUMN = "label" # Algorithm properties RSEED = 666 ab.set_hyperseed(RSEED) # Sample width of net T_SAMPLES = 1 # Number of random samples to get from an Aboleth net EMBED_DIMS = 5 # Number of dimensions to embed the categorical columns into BSIZE = 50 # Mini batch size NITER = 60000 # Number of iterations (mini-batch views) P_SAMPLES = 50 # Number of samples to use for prediction CONFIG = tf.ConfigProto(device_count={'GPU': 0}) # Use GPU ? def main(): """Run the demo.""" # Get Continuous and categorical data
#! /usr/bin/env python3 import tensorflow as tf import numpy as np import aboleth as ab tf.logging.set_verbosity(tf.logging.INFO) rseed = 100 ab.set_hyperseed(rseed) # Optimization n_epochs = 50 batch_size = 100 config = tf.ConfigProto(device_count={'GPU': 0}) # Use GPU ? reg = 0.1 l_samples = 5 p_samples = 5 # Network architecture net = ab.stack( ab.InputLayer(name='X', n_samples=l_samples), # LSAMPLES,BATCH_SIZE,28*28 ab.Conv2D(filters=32, kernel_size=(5, 5), l2_reg=reg), # LSAMPLES, BATCH_SIZE, 28, 28, 32 ab.Activation(h=tf.nn.relu), ab.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # LSAMPLES, BATCH_SIZE, 14, 14, 32 ab.Conv2D(filters=64, kernel_size=(5, 5), l2_reg=reg), # LSAMPLES, BATCH_SIZE, 14, 14, 64