def test_batch(): """Test the batch feed dict generator.""" X = np.arange(100) fd = {'X': X} data = ab.batch(fd, batch_size=10, n_iter=10) # Make sure this is a generator assert isinstance(data, GeneratorType) # Make sure we get a dict back of a length we expect d = next(data) assert isinstance(d, dict) assert 'X' in d assert len(d['X']) == 10 # Test we get all of X back in one sweep of the data accum = list(d['X']) for ds in data: assert len(ds['X']) == 10 accum.extend(list(ds['X'])) assert len(accum) == len(X) assert set(X) == set(accum)
def main(): """Run the demo.""" # Get Continuous and categorical data df_train, df_test = fetch_data() df = pd.concat((df_train, df_test)) X_con, X_cat, n_cats, Y = input_fn(df) n_samples_ = tf.placeholder_with_default(T_SAMPLES, []) # Define the continuous layers con_layer = ( ab.InputLayer(name='con', n_samples=n_samples_) >> ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >> ab.Dense(output_dim=16, init_fn="autonorm") ) # Now define the cateogrical layers, which we embed # Note every Embed call can be different, this is just "lazy" cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm") for i in n_cats] cat_layer = ( ab.InputLayer(name='cat', n_samples=n_samples_) >> ab.PerFeature(*cat_layer_list) >> # Assign columns to embedding layers ab.Activation(tf.nn.selu) >> ab.Dense(16, init_fn="autonorm") ) # Now we can feed the initial continuous and cateogrical layers to further # "joint" layers after we concatenate them net = ( ab.Concat(con_layer, cat_layer) >> ab.Activation(tf.nn.selu) >> ab.DenseVariational(output_dim=1) ) # Split data into training and testing Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0) Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0) Yt, Ys = np.split(Y, [len(df_train)], axis=0) # Graph place holders X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]]) X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]]) Y_ = tf.placeholder(tf.float32, [None, 1]) # Feed dicts train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt} test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES} # Make model N = len(Xt_con) nn, kl = net(con=X_con_, cat=X_cat_) likelihood = tf.distributions.Bernoulli(logits=nn) prob = ab.sample_mean(likelihood.probs) loss = ab.elbo(likelihood.log_prob(Y_), kl, N) optimizer = tf.train.AdamOptimizer() train = optimizer.minimize(loss) init = tf.global_variables_initializer() with tf.Session(config=CONFIG): init.run() # We're going to just use a feed_dict to feed in batches, which we # generate here batches = ab.batch( train_dict, batch_size=BSIZE, n_iter=NITER) for i, data in enumerate(batches): train.run(feed_dict=data) if i % 1000 == 0: loss_val = loss.eval(feed_dict=data) print("Iteration {}, loss = {}".format(i, loss_val)) # Predict Ep = prob.eval(feed_dict=test_dict) Ey = Ep > 0.5 # Max probability assignment acc = accuracy_score(Ys.flatten(), Ey.flatten()) logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep))) print("Accuracy = {}, log loss = {}".format(acc, logloss))
def main(): """Run the demo.""" data = load_breast_cancer() X = data.data.astype(np.float32) y = data.target.astype(np.int32)[:, np.newaxis] X = StandardScaler().fit_transform(X).astype(np.float32) N, D = X.shape # Benchmark classifier bcl = RandomForestClassifier(random_state=RSEED) # Data with tf.name_scope("Input"): X_ = tf.placeholder(dtype=tf.float32, shape=(None, D)) Y_ = tf.placeholder(dtype=tf.float32, shape=(None, 1)) with tf.name_scope("Deepnet"): nn, reg = net(X=X_) lkhood = tf.distributions.Bernoulli(logits=nn) loss = ab.max_posterior(lkhood.log_prob(Y_), reg) prob = ab.sample_mean(lkhood.probs) with tf.name_scope("Train"): optimizer = tf.train.AdamOptimizer(learning_rate=0.001) train = optimizer.minimize(loss) kfold = KFold(n_splits=FOLDS, shuffle=True, random_state=RSEED) # Launch the graph. acc, acc_o, ll, ll_o = [], [], [], [] init = tf.global_variables_initializer() with tf.Session(config=CONFIG): for k, (r_ind, s_ind) in enumerate(kfold.split(X)): init.run() Xr, Yr = X[r_ind], y[r_ind] Xs, Ys = X[s_ind], y[s_ind] batches = ab.batch( {X_: Xr, Y_: Yr}, batch_size=BSIZE, n_iter=NITER) for i, data in enumerate(batches): train.run(feed_dict=data) if i % 1000 == 0: loss_val = loss.eval(feed_dict=data) print("Iteration {}, loss = {}".format(i, loss_val)) # Predict, NOTE: we use the mean of the likelihood to get the # probabilies ps = prob.eval(feed_dict={X_: Xs, n_samples_: PSAMPLES}) print("Fold {}:".format(k)) Ep = np.hstack((1. - ps, ps)) print_k_result(Ys, Ep, ll, acc, "BNN") bcl.fit(Xr, Yr.flatten()) Ep_o = bcl.predict_proba(Xs) print_k_result(Ys, Ep_o, ll_o, acc_o, "RF") print("-----") print_final_result(acc, ll, "BNN") print_final_result(acc_o, ll_o, "RF")