Esempio n. 1
0
def test_concat(make_data):
    """Test concatenation layer."""
    x, _, X = make_data

    # This replicates the input layer behaviour
    f = ab.InputLayer('X', n_samples=3)
    g = ab.InputLayer('Y', n_samples=3)

    catlayer = ab.Concat(f, g)

    F, KL = catlayer(X=x, Y=x)

    tc = tf.test.TestCase()
    with tc.test_session():
        forked = F.eval()
        orig = X.eval()
        assert forked.shape == orig.shape[0:2] + (2 * orig.shape[2], )
        assert np.all(forked == np.dstack((orig, orig)))
        assert KL.eval() == 0.0
Esempio n. 2
0
def main():
    """Run the demo."""
    # Get Continuous and categorical data
    df_train, df_test = fetch_data()
    df = pd.concat((df_train, df_test))
    X_con, X_cat, n_cats, Y = input_fn(df)

    n_samples_ = tf.placeholder_with_default(T_SAMPLES, [])

    # Define the continuous layers
    con_layer = (
        ab.InputLayer(name='con', n_samples=n_samples_) >>
        ab.RandomFourier(100, kernel=ab.RBF(learn_lenscale=True)) >>
        ab.Dense(output_dim=16, init_fn="autonorm")
    )

    # Now define the cateogrical layers, which we embed
    # Note every Embed call can be different, this is just "lazy"
    cat_layer_list = [ab.Embed(EMBED_DIMS, i, init_fn="autonorm")
                      for i in n_cats]
    cat_layer = (
        ab.InputLayer(name='cat', n_samples=n_samples_) >>
        ab.PerFeature(*cat_layer_list) >>  # Assign columns to embedding layers
        ab.Activation(tf.nn.selu) >>
        ab.Dense(16, init_fn="autonorm")
    )

    # Now we can feed the initial continuous and cateogrical layers to further
    # "joint" layers after we concatenate them
    net = (
        ab.Concat(con_layer, cat_layer) >>
        ab.Activation(tf.nn.selu) >>
        ab.DenseVariational(output_dim=1)
    )

    # Split data into training and testing
    Xt_con, Xs_con = np.split(X_con, [len(df_train)], axis=0)
    Xt_cat, Xs_cat = np.split(X_cat, [len(df_train)], axis=0)
    Yt, Ys = np.split(Y, [len(df_train)], axis=0)

    # Graph place holders
    X_con_ = tf.placeholder(tf.float32, [None, Xt_con.shape[1]])
    X_cat_ = tf.placeholder(tf.int32, [None, Xt_cat.shape[1]])
    Y_ = tf.placeholder(tf.float32, [None, 1])

    # Feed dicts
    train_dict = {X_con_: Xt_con, X_cat_: Xt_cat, Y_: Yt}
    test_dict = {X_con_: Xs_con, X_cat_: Xs_cat, n_samples_: P_SAMPLES}

    # Make model
    N = len(Xt_con)
    nn, kl = net(con=X_con_, cat=X_cat_)
    likelihood = tf.distributions.Bernoulli(logits=nn)
    prob = ab.sample_mean(likelihood.probs)

    loss = ab.elbo(likelihood.log_prob(Y_), kl, N)
    optimizer = tf.train.AdamOptimizer()
    train = optimizer.minimize(loss)
    init = tf.global_variables_initializer()

    with tf.Session(config=CONFIG):
        init.run()

        # We're going to just use a feed_dict to feed in batches, which we
        # generate here
        batches = ab.batch(
            train_dict,
            batch_size=BSIZE,
            n_iter=NITER)

        for i, data in enumerate(batches):
            train.run(feed_dict=data)
            if i % 1000 == 0:
                loss_val = loss.eval(feed_dict=data)
                print("Iteration {}, loss = {}".format(i, loss_val))

        # Predict
        Ep = prob.eval(feed_dict=test_dict)

    Ey = Ep > 0.5  # Max probability assignment

    acc = accuracy_score(Ys.flatten(), Ey.flatten())
    logloss = log_loss(Ys.flatten(), np.hstack((1 - Ep, Ep)))

    print("Accuracy = {}, log loss = {}".format(acc, logloss))
Esempio n. 3
0
def main():
    """Run the imputation demo."""
    # Fetch data, one-hot targets and standardise data
    data = fetch_covtype()
    Xo = data.data[:, :10]
    Xc = data.data[:, 10:]
    Y = (data.target - 1)
    Xo[:, :10] = StandardScaler().fit_transform(Xo[:, :10])

    # Network construction
    n_samples_ = tf.placeholder_with_default(LSAMPLES, [])
    data_input = ab.InputLayer(name='Xo', n_samples=n_samples_)  # Data input

    # Run this with imputation
    if METHOD is not None:
        print("Imputation method {}.".format(METHOD))

        # Fake some missing data
        rnd = np.random.RandomState(RSEED)
        mask = rnd.rand(*Xo.shape) < FRAC_MISSING
        Xo[mask] = MISSING_VAL

        # Use Aboleth to imputate
        mask_input = ab.MaskInputLayer(name='M')  # Missing data mask input
        xm = np.ma.array(Xo, mask=mask)
        if METHOD == "LearnedNormalImpute":
            mean = tf.Variable(np.ma.mean(xm, axis=0).data.astype(np.float32))
            std = ab.pos_variable(np.ma.std(xm, axis=0)
                                  .data.astype(np.float32))
            input_layer = ab.NormalImpute(data_input, mask_input, mean, std)
        elif METHOD == "LearnedScalarImpute":
            scalar = tf.Variable(tf.zeros(Xo.shape[-1]))
            input_layer = ab.ScalarImpute(data_input, mask_input, scalar)
        elif METHOD == "FixedNormalImpute":
            mean = np.ma.mean(xm, axis=0).data.astype(np.float32)
            std = np.ma.std(xm, axis=0).data.astype(np.float32)
            input_layer = ab.NormalImpute(data_input, mask_input, mean, std)
        elif METHOD == "FixedScalarImpute":
            mean = np.ma.mean(xm, axis=0).data.astype(np.float32)
            input_layer = ab.ScalarImpute(data_input, mask_input, mean)
        elif METHOD == "MeanImpute":
            input_layer = ab.MeanImpute(data_input, mask_input)

        else:
            raise ValueError("Invalid method!")

    # Run this without imputation
    else:
        print("No missing data")
        input_layer = data_input
        mask = np.zeros_like(Xo)

    cat_layers = (
        ab.InputLayer(name='Xc', n_samples=n_samples_) >>
        ab.DenseVariational(output_dim=8)
    )

    con_layers = (
        input_layer >>
        ab.DenseVariational(output_dim=8)
    )

    net = (
        ab.Concat(cat_layers, con_layers) >>
        ab.Activation(tf.nn.selu) >>
        ab.DenseVariational(output_dim=NCLASSES)
    )

    # Split the training and testing data
    Xo_tr, Xo_ts, Xc_tr, Xc_ts, Y_tr, Y_ts, M_tr, M_ts = train_test_split(
        Xo.astype(np.float32),
        Xc.astype(np.float32),
        Y.astype(np.int32),
        mask,
        test_size=FRAC_TEST,
        random_state=RSEED
    )
    N_tr, Do = Xo_tr.shape
    _, Dc = Xc_tr.shape

    # Data
    with tf.name_scope("Input"):
        Xob, Xcb, Yb, Mb = batch_training(Xo_tr, Xc_tr, Y_tr, M_tr,
                                          n_epochs=NEPOCHS, batch_size=BSIZE)
        Xo_ = tf.placeholder_with_default(Xob, shape=(None, Do))
        Xc_ = tf.placeholder_with_default(Xcb, shape=(None, Dc))
        # Y_ has to be this dimension for compatability with Categorical
        Y_ = tf.placeholder_with_default(Yb, shape=(None,))
        if METHOD is not None:
            M_ = tf.placeholder_with_default(Mb, shape=(None, Do))

    with tf.name_scope("Deepnet"):
        if METHOD is not None:
            nn, kl = net(Xo=Xo_, Xc=Xc_, M=M_)
        else:
            nn, kl = net(Xo=Xo_, Xc=Xc_)

        lkhood = tf.distributions.Categorical(logits=nn)
        loss = ab.elbo(lkhood.log_prob(Y_), kl, N_tr)
        prob = ab.sample_mean(lkhood.probs)

    with tf.name_scope("Train"):
        optimizer = tf.train.AdamOptimizer()
        global_step = tf.train.create_global_step()
        train = optimizer.minimize(loss, global_step=global_step)

    # Logging learning progress
    log = tf.train.LoggingTensorHook(
        {'step': global_step, 'loss': loss},
        every_n_iter=1000
    )

    # This is the main training "loop"
    with tf.train.MonitoredTrainingSession(
            config=CONFIG,
            save_summaries_steps=None,
            save_checkpoint_secs=None,
            hooks=[log]
    ) as sess:
        try:
            while not sess.should_stop():
                sess.run(train)
        except tf.errors.OutOfRangeError:
            print('Input queues have been exhausted!')
            pass

        # Prediction
        feed_dict = {Xo_: Xo_ts, Xc_: Xc_ts, Y_: [0], n_samples_: PSAMPLES}
        if METHOD is not None:
            feed_dict[M_] = M_ts

        p = sess.run(prob, feed_dict=feed_dict)

    # Get mean of samples for prediction, and max probability assignments
    Ey = p.argmax(axis=1)

    # Score results
    acc = accuracy_score(Y_ts, Ey)
    ll = log_loss(Y_ts, p)
    conf = confusion_matrix(Y_ts, Ey)
    print("Final scores: {}".format(METHOD))
    print("\tAccuracy = {}\n\tLog loss = {}\n\tConfusion =\n{}".
          format(acc, ll, conf))