def nnet(X, Y): """Neural net with regularization.""" lambda_ = 1e-4 # Weight regularizer noise = .5 # Likelihood st. dev. net = ( ab.InputLayer(name="X", n_samples=1) >> ab.Dense(output_dim=40, l2_reg=lambda_) >> ab.Activation(tf.tanh) >> ab.Dense(output_dim=20, l2_reg=lambda_) >> ab.Activation(tf.tanh) >> ab.Dense(output_dim=10, l2_reg=lambda_) >> ab.Activation(tf.tanh) >> ab.Dense(output_dim=1, l2_reg=lambda_) ) f, reg = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.max_posterior(lkhood, reg) return f, loss
def my_model(features, labels, mode, params): N = params["N"] n_samples = NSAMPLES if mode == tf.estimator.ModeKeys.TRAIN \ else NPREDICTSAMPLES X = tf.feature_column.input_layer(features, params['feature_columns']) kernel = ab.RBF(LENSCALE, learn_lenscale=True) net = ( ab.InputLayer(name="X", n_samples=n_samples) >> ab.RandomFourier(n_features=NFEATURES, kernel=kernel) >> ab.Dense(output_dim=64, init_fn="autonorm") >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1, full=False, prior_std=1.0, learn_prior=True) ) phi, kl = net(X=X) std = ab.pos_variable(NOISE, name="noise") ll_f = tf.distributions.Normal(loc=phi, scale=std) predict_mean = ab.sample_mean(phi) # Compute predictions. if mode == tf.estimator.ModeKeys.PREDICT: predictions = { 'predictions': predict_mean, 'samples': phi } return tf.estimator.EstimatorSpec(mode, predictions=predictions) ll = ll_f.log_prob(labels) loss = ab.elbo(ll, kl, N) tf.summary.scalar('loss', loss) # Compute evaluation metrics. mse = tf.metrics.mean_squared_error(labels=labels, predictions=predict_mean, name='mse_op') r2 = r2_metric(labels, predict_mean) metrics = {'mse': mse, 'r2': r2} if mode == tf.estimator.ModeKeys.EVAL: return tf.estimator.EstimatorSpec( mode, loss=loss, eval_metric_ops=metrics) # Create training op. assert mode == tf.estimator.ModeKeys.TRAIN optimizer = tf.train.AdamOptimizer() train_op = optimizer.minimize(loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op)
def nnet_dropout(X, Y): """Neural net with dropout.""" lambda_ = 1e-3 # Weight prior noise = .5 # Likelihood st. dev. net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.Dense(output_dim=32, l2_reg=lambda_) >> ab.Activation(tf.nn.selu) >> ab.DropOut(keep_prob=0.9, independent=True) >> ab.Dense(output_dim=16, l2_reg=lambda_) >> ab.Activation(tf.nn.selu) >> ab.DropOut(keep_prob=0.95, independent=True) >> ab.Dense(output_dim=8, l2_reg=lambda_) >> ab.Activation(tf.nn.selu) >> ab.Dense(output_dim=1, l2_reg=lambda_) ) f, reg = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.max_posterior(lkhood, reg) return f, loss
def nnet_ncp(X, Y): """Noise contrastive prior network.""" noise = ab.pos_variable(.5) lstd = 1. perturb_noise = 10. net = ( ab.InputLayer(name="X", n_samples=n_samples_) >> ab.NCPContinuousPerturb(input_noise=perturb_noise) >> ab.Dense(output_dim=32) >> ab.Activation(tf.nn.selu) >> ab.Dense(output_dim=16) >> ab.Activation(tf.nn.selu) >> ab.Dense(output_dim=8) >> ab.Activation(tf.nn.selu) >> ab.DenseNCP(output_dim=1, prior_std=.1, latent_std=lstd) ) f, kl = net(X=X) lkhood = tf.distributions.Normal(loc=f, scale=noise).log_prob(Y) loss = ab.elbo(lkhood, kl, N) return f, loss
def linear(X, Y): """Linear regression with l2 regularization.""" lambda_ = 1e-4 # Weight regularizer noise = 1. # Likelihood st. dev. net = ( ab.InputLayer(name="X") >> ab.Dense(output_dim=1, l2_reg=lambda_) ) Xw, reg = net(X=X) lkhood = tf.distributions.Normal(loc=Xw, scale=noise).log_prob(Y) loss = ab.max_posterior(lkhood, reg) # loss = 0.5 * tf.reduce_mean((Y - Xw)**2) + reg return Xw, loss
def test_categorical_likelihood(make_data, likelihood): """Test aboleth with discrete likelihoods. Since these are kind of corner cases... """ x, y, _, = make_data like, K = likelihood N, _ = x.shape # Make two classes (K = 2) Y = np.zeros(len(y), dtype=np.int32) Y[y[:, 0] > 0] = 1 if K == 1: Y = Y[:, np.newaxis] X_ = tf.placeholder(tf.float32, x.shape) Y_ = tf.placeholder(tf.int32, Y.shape) n_samples_ = tf.placeholder(tf.int32) layers = ab.stack( ab.InputLayer(name='X', n_samples=n_samples_), ab.Dense(output_dim=K) ) nn, reg = layers(X=X_) like = like(logits=nn) log_like = like.log_prob(Y_) prob = like.prob(Y_) ELBO = ab.elbo(log_like, reg, N) MAP = ab.max_posterior(log_like, reg) fd = {X_: x, Y_: Y, n_samples_: 10} tc = tf.test.TestCase() with tc.test_session(): tf.global_variables_initializer().run() assert like.probs.eval(feed_dict=fd).shape == (10, N, K) assert prob.eval(feed_dict=fd).shape == (10,) + Y.shape L = ELBO.eval(feed_dict=fd) L = MAP.eval(feed_dict=fd) assert np.isscalar(L)
def svr(X, Y): """Support vector regressor, kind of...""" lambda_ = 1e-4 eps = 0.01 lenscale = 1. # Specify which kernel to approximate with the random Fourier features kern = ab.RBF(lenscale=lenscale) net = ( # ab.InputLayer(name="X", n_samples=n_samples_) >> ab.InputLayer(name="X", n_samples=1) >> ab.RandomFourier(n_features=50, kernel=kern) >> # ab.DropOut(keep_prob=0.9, independent=True) >> ab.Dense(output_dim=1, l2_reg=lambda_) ) f, reg = net(X=X) loss = tf.reduce_mean(tf.nn.relu(tf.abs(Y - f) - eps)) + reg return f, loss
def main(): """Run the demo.""" # Get Continuous and categorical data df_train, df_test = fetch_data() df = pd.concat((df_train, df_test)) X_con, X_cat, n_cats, Y = input_fn(df) n_samples_ = tf.placeholder_with_default(T_SAMPLES, []) # Define the continuous layers con_layer = ( ab.InputLayer(name='con', n_samples=n_samples_) >> ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >> ab.Dense(output_dim=16, init_fn="autonorm") ) # Now define the cateogrical layers, which we embed # Note every Embed call can be different, this is just "lazy" cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm") for i in n_cats] cat_layer = ( ab.InputLayer(name='cat', n_samples=n_samples_) >> ab.PerFeature(*cat_layer_list) >> # Assign columns to embedding layers ab.Activation(tf.nn.selu) >> ab.Dense(16, init_fn="autonorm") ) # Now we can feed the initial continuous and cateogrical layers to further # "joint" layers after we concatenate them net = ( ab.Concat(con_layer, cat_layer) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1) ) # Split data into training and testing Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0) Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0) Yt, Ys = np.split(Y, [len(df_train)], axis=0) # Graph place holders X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]]) X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]]) Y_ = tf.placeholder(tf.float32, [None, 1]) # Feed dicts train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt} test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES} # Make model N = len(Xt_con) nn, kl = net(con=X_con_, cat=X_cat_) likelihood = tf.distributions.Bernoulli(logits=nn) prob = ab.sample_mean(likelihood.probs) loss = ab.elbo(likelihood.log_prob(Y_), kl, N) optimizer = tf.train.AdamOptimizer() train = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session(config=CONFIG): init.run() # We're going to just use a feed_dict to feed in batches, which we # generate here batches = ab.batch( train_dict, batch_size=BSIZE, n_iter=NITER) for i, data in enumerate(batches): train.run(feed_dict=data) if i % 1000 == 0: loss_val = loss.eval(feed_dict=data) print("Iteration {}, loss = {}".format(i, loss_val)) # Predict Ep = prob.eval(feed_dict=test_dict) Ey = Ep > 0.5 # Max probability assignment acc = accuracy_score(Ys.flatten(), Ey.flatten()) logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep))) print("Accuracy = {}, log loss = {}".format(acc, logloss))
# Network architecture net = ab.stack( ab.InputLayer(name='X', n_samples=l_samples), # LSAMPLES,BATCH_SIZE,28*28 ab.Conv2D(filters=32, kernel_size=(5, 5), l2_reg=reg), # LSAMPLES, BATCH_SIZE, 28, 28, 32 ab.Activation(h=tf.nn.relu), ab.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # LSAMPLES, BATCH_SIZE, 14, 14, 32 ab.Conv2D(filters=64, kernel_size=(5, 5), l2_reg=reg), # LSAMPLES, BATCH_SIZE, 14, 14, 64 ab.Activation(h=tf.nn.relu), ab.MaxPool2D(pool_size=(2, 2), strides=(2, 2)), # LSAMPLES, BATCH_SIZE, 7, 7, 64 ab.Flatten(), # LSAMPLES, BATCH_SIZE, 7*7*64 ab.Dense(output_dim=1024, l2_reg=reg), # LSAMPLES, BATCH_SIZE, 1024 ab.Activation(h=tf.nn.relu), ab.DropOut(0.5), ab.Dense(output_dim=10, l2_reg=reg), # LSAMPLES, BATCH_SIZE, 10 ) def main(): # Dataset mnist_data = tf.contrib.learn.datasets.mnist.read_data_sets('./mnist_demo', reshape=False) N = mnist_data.train.images.shape[0] X, Y = tf.data.Dataset.from_tensor_slices(
# Optimization NITER = 20000 # Training iterations per fold BSIZE = 10 # mini-batch size CONFIG = tf.ConfigProto(device_count={'GPU': 0}) # Use GPU ? LSAMPLES = 1 # We're only using 1 dropout "sample" for learning to be more # like a MAP network PSAMPLES = 50 # Number of samples for prediction REG = 0.001 # weight regularizer # Network structure n_samples_ = tf.placeholder_with_default(LSAMPLES, []) net = ab.stack( ab.InputLayer(name='X', n_samples=n_samples_), ab.DropOut(0.95, alpha=True), ab.Dense(output_dim=128, l2_reg=REG, init_fn="autonorm"), ab.Activation(h=tf.nn.selu), ab.DropOut(0.9, alpha=True), ab.Dense(output_dim=64, l2_reg=REG, init_fn="autonorm"), ab.Activation(h=tf.nn.selu), ab.DropOut(0.9, alpha=True), ab.Dense(output_dim=32, l2_reg=REG, init_fn="autonorm"), ab.Activation(h=tf.nn.selu), ab.DropOut(0.9, alpha=True), ab.Dense(output_dim=1, l2_reg=REG, init_fn="autonorm"), ) def main(): """Run the demo.""" data = load_breast_cancer()