コード例 #1
0
def test_batch():
    """Test the batch feed dict generator."""
    X = np.arange(100)
    fd = {'X': X}

    data = ab.batch(fd, batch_size=10, n_iter=10)

    # Make sure this is a generator
    assert isinstance(data, GeneratorType)

    # Make sure we get a dict back of a length we expect
    d = next(data)
    assert isinstance(d, dict)
    assert 'X' in d
    assert len(d['X']) == 10

    # Test we get all of X back in one sweep of the data
    accum = list(d['X'])
    for ds in data:
        assert len(ds['X']) == 10
        accum.extend(list(ds['X']))

    assert len(accum) == len(X)
    assert set(X) == set(accum)
コード例 #2
0
ファイル: multi_input.py プロジェクト: vishalbelsare/aboleth
def main():
    """Run the demo."""
    # Get Continuous and categorical data
    df_train, df_test = fetch_data()
    df = pd.concat((df_train, df_test))
    X_con, X_cat, n_cats, Y = input_fn(df)

    n_samples_ = tf.placeholder_with_default(T_SAMPLES, [])

    # Define the continuous layers
    con_layer = (
        ab.InputLayer(name='con', n_samples=n_samples_) >>
        ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >>
        ab.Dense(output_dim=16, init_fn="autonorm")
    )

    # Now define the cateogrical layers, which we embed
    # Note every Embed call can be different, this is just "lazy"
    cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm")
                      for i in n_cats]
    cat_layer = (
        ab.InputLayer(name='cat', n_samples=n_samples_) >>
        ab.PerFeature(*cat_layer_list) >>  # Assign columns to embedding layers
        ab.Activation(tf.nn.selu) >>
        ab.Dense(16, init_fn="autonorm")
    )

    # Now we can feed the initial continuous and cateogrical layers to further
    # "joint" layers after we concatenate them
    net = (
        ab.Concat(con_layer, cat_layer) >>
        ab.Activation(tf.nn.selu) >>
        ab.DenseVariational(output_dim=1)
    )

    # Split data into training and testing
    Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0)
    Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0)
    Yt, Ys = np.split(Y, [len(df_train)], axis=0)

    # Graph place holders
    X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]])
    X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]])
    Y_ = tf.placeholder(tf.float32, [None, 1])

    # Feed dicts
    train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt}
    test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES}

    # Make model
    N = len(Xt_con)
    nn, kl = net(con=X_con_, cat=X_cat_)
    likelihood = tf.distributions.Bernoulli(logits=nn)
    prob = ab.sample_mean(likelihood.probs)

    loss = ab.elbo(likelihood.log_prob(Y_), kl, N)
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)
    init = tf.global_variables_initializer()

    with tf.Session(config=CONFIG):
        init.run()

        # We're going to just use a feed_dict to feed in batches, which we
        # generate here
        batches = ab.batch(
            train_dict,
            batch_size=BSIZE,
            n_iter=NITER)

        for i, data in enumerate(batches):
            train.run(feed_dict=data)
            if i % 1000 == 0:
                loss_val = loss.eval(feed_dict=data)
                print("Iteration {}, loss = {}".format(i, loss_val))

        # Predict
        Ep = prob.eval(feed_dict=test_dict)

    Ey = Ep > 0.5  # Max probability assignment

    acc = accuracy_score(Ys.flatten(), Ey.flatten())
    logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep)))

    print("Accuracy = {}, log loss = {}".format(acc, logloss))
コード例 #3
0
def main():
    """Run the demo."""
    data = load_breast_cancer()
    X = data.data.astype(np.float32)
    y = data.target.astype(np.int32)[:, np.newaxis]
    X = StandardScaler().fit_transform(X).astype(np.float32)
    N, D = X.shape

    # Benchmark classifier
    bcl = RandomForestClassifier(random_state=RSEED)

    # Data
    with tf.name_scope("Input"):
        X_ = tf.placeholder(dtype=tf.float32, shape=(None, D))
        Y_ = tf.placeholder(dtype=tf.float32, shape=(None, 1))

    with tf.name_scope("Deepnet"):
        nn, reg = net(X=X_)
        lkhood = tf.distributions.Bernoulli(logits=nn)
        loss = ab.max_posterior(lkhood.log_prob(Y_), reg)
        prob = ab.sample_mean(lkhood.probs)

    with tf.name_scope("Train"):
        optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
        train = optimizer.minimize(loss)

    kfold = KFold(n_splits=FOLDS, shuffle=True, random_state=RSEED)

    # Launch the graph.
    acc, acc_o, ll, ll_o = [], [], [], []
    init = tf.global_variables_initializer()

    with tf.Session(config=CONFIG):

        for k, (r_ind, s_ind) in enumerate(kfold.split(X)):
            init.run()

            Xr, Yr = X[r_ind], y[r_ind]
            Xs, Ys = X[s_ind], y[s_ind]

            batches = ab.batch(
                {X_: Xr, Y_: Yr},
                batch_size=BSIZE,
                n_iter=NITER)
            for i, data in enumerate(batches):
                train.run(feed_dict=data)
                if i % 1000 == 0:
                    loss_val = loss.eval(feed_dict=data)
                    print("Iteration {}, loss = {}".format(i, loss_val))

            # Predict, NOTE: we use the mean of the likelihood to get the
            # probabilies
            ps = prob.eval(feed_dict={X_: Xs, n_samples_: PSAMPLES})

            print("Fold {}:".format(k))
            Ep = np.hstack((1. - ps, ps))

            print_k_result(Ys, Ep, ll, acc, "BNN")

            bcl.fit(Xr, Yr.flatten())
            Ep_o = bcl.predict_proba(Xs)
            print_k_result(Ys, Ep_o, ll_o, acc_o, "RF")
            print("-----")

        print_final_result(acc, ll, "BNN")
        print_final_result(acc_o, ll_o, "RF")