コード例 #1
0
def learn_separated(args, train_set, test_set, anlysis_flag=False):

    # Parameters
    n_hidd = 1000  # number of hidden units per layer
    n_epoch = args.n_epoch
    learning_rate = 0.001
    batch_size = 128

    hidden_layer = get_fc_layer_fn(l2_reg_scale=1e-4, depth=1)
    out_layer = get_fc_layer_fn(l2_reg_scale=1e-4)

    x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y']

    n_train = x_train.shape[0]
    x_dim = x_train.shape[1]

    batch_size = min(batch_size, n_train)

    # ------ Define Graph ---------------------#
    tf.reset_default_graph()
    # ------ Define Inputs ---------------------#
    # define placeholder which will receive data batches
    x_ph = tf.placeholder(tf.float32, [None, x_dim])
    t_ph = tf.placeholder(tf.float32, [None, 1])
    y_ph = tf.placeholder(tf.float32, [None, 1])

    n_ph = tf.shape(x_ph)[0]  # number of samples fed to placeholders

    # ------ Define generative model /decoder-----------------------#

    if anlysis_flag:
        z_t_dim = 1
        z_y_dim = 1
    else:
        # z_x_dim = 1
        z_t_dim = 2
        z_y_dim = 3
    # latent_dims = (z_x_dim, z_t_dim, z_y_dim)
    latent_dims = (z_t_dim, z_y_dim)
    # prior over latent variables:
    # p(zx) -
    # zx = Normal(loc=tf.zeros([n_ph, z_x_dim]), scale=tf.ones([n_ph, z_x_dim]))
    # p(zt) -
    zt = Normal(loc=tf.zeros([n_ph, z_t_dim]), scale=tf.ones([n_ph, z_t_dim]))
    # p(zy) -
    zy = Normal(loc=tf.zeros([n_ph, z_y_dim]), scale=tf.ones([n_ph, z_y_dim]))
    z = tf.concat([zt, zy], axis=1)

    # p(x|z) - likelihood of proxy X
    # z = tf.concat([zx, zt, zy], axis=1)

    hidden = hidden_layer(z, n_hidd, tf.nn.elu)
    x = Normal(loc=out_layer(hidden, x_dim, None),
               scale=out_layer(hidden, x_dim, tf.nn.softplus),
               name='gaussian_px_z')

    # p(t|zt)
    if args.model_type == 'separated_with_confounder':
        hidden = hidden_layer(z, n_hidd, tf.nn.elu)
    else:
        hidden = hidden_layer(zt, n_hidd, tf.nn.elu)
    probs = out_layer(hidden, 1, tf.nn.sigmoid)  # output in [0,1]
    t = Bernoulli(probs=probs, dtype=tf.float32, name='bernoulli_pt_z')

    # p(y|t,zy)
    hidden = hidden_layer(zy, n_hidd, tf.nn.elu)  # shared hidden layer
    mu_y_t0 = out_layer(hidden, 1, None)
    mu_y_t1 = out_layer(hidden, 1, None)
    # y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=tf.ones_like(mu_y_t0))
    sigma_y_t0 = out_layer(hidden, 1, tf.nn.softplus)
    sigma_y_t1 = out_layer(hidden, 1, tf.nn.softplus)
    y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0,
               scale=t * sigma_y_t1 + (1. - t) * sigma_y_t0)

    # ------ Define inference model - CEVAE variational approximation (encoder)

    # q(t|x)
    hqt = hidden_layer(x_ph, n_hidd, tf.nn.elu)
    probs_t = out_layer(hqt, 1, tf.nn.sigmoid)  # output in [0,1]
    qt = Bernoulli(probs=probs_t, dtype=tf.float32)

    # q(y|x,t)
    hqy = hidden_layer(x_ph, n_hidd, tf.nn.elu)  # shared hidden layer
    mu_qy_t0 = out_layer(hqy, 1, None)
    mu_qy_t1 = out_layer(hqy, 1, tf.nn.elu)
    sigma_qy_t1 = out_layer(hqy, 1, tf.nn.softplus)
    sigma_qy_t0 = out_layer(hqy, 1, tf.nn.softplus)
    # qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0))
    qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0,
                scale=qt * sigma_qy_t1 + (1. - qt) * sigma_qy_t0)

    # # q(z_x|x,t,y)
    # inpt2 = tf.concat([x_ph, qy], axis=1)
    # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu)  # shared hidden layer
    # muq_t0 = out_layer(hqz, z_x_dim, None)
    # sigmaq_t0 = out_layer(hqz, z_x_dim, tf.nn.softplus)
    # muq_t1 = out_layer(hqz, z_x_dim, None)
    # sigmaq_t1 = out_layer(hqz, z_x_dim, tf.nn.softplus)
    # qzx = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
    #             scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # shared hidden layer
    inpt2 = tf.concat([x_ph, qy], axis=1)
    hqz = out_layer(inpt2, n_hidd, tf.nn.elu)

    # q(zt|x,t,y)
    muq_t0 = out_layer(hqz, z_t_dim, None)
    sigmaq_t0 = out_layer(hqz, z_t_dim, tf.nn.softplus)
    muq_t1 = out_layer(hqz, z_t_dim, None)
    sigmaq_t1 = out_layer(hqz, z_t_dim, tf.nn.softplus)
    qzt = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
                 scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # q(zy|x,t,y)
    # inpt2 = tf.concat([x_ph, qy], axis=1)
    # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu)  # shared hidden layer
    muq_t0 = out_layer(hqz, z_y_dim, None)
    sigmaq_t0 = out_layer(hqz, z_y_dim, tf.nn.softplus)
    muq_t1 = out_layer(hqz, z_y_dim, None)
    sigmaq_t1 = out_layer(hqz, z_y_dim, tf.nn.softplus)
    qzy = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
                 scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # end graph def

    # ------ Criticism / evaluation graph:
    zy_learned = ed.copy(qzy, {x: x_ph})
    zt_learned = ed.copy(qzt, {x: x_ph})

    # sample posterior predictive for p(y|z_y,t)
    y_post = ed.copy(y, {zy: qzy, t: t_ph}, scope='y_post')
    # crude approximation of the above
    y_post_mean = ed.copy(y, {zy: qzy.mean(), t: t_ph}, scope='y_post_mean')

    # ------ Training -  Run variational inference

    # Create data dictionary for edward
    data = {x: x_ph, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph}

    batch_size = min(batch_size, n_train)
    n_iter_per_epoch = n_train // batch_size

    inference = ed.KLqp({zt: qzt, zy: qzy}, data=data)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    data_scaling = n_train / batch_size  # to scale likelihood againt prior
    inference.initialize(optimizer=optimizer,
                         n_samples=5,
                         n_iter=n_iter_per_epoch * n_epoch,
                         scale={
                             x: data_scaling,
                             t: data_scaling,
                             y: data_scaling
                         })

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for epoch in range(n_epoch):
            train_generator = batch_generator(np.random.permutation(n_train),
                                              batch_size)
            avg_loss = 0.0
            for j in range(n_iter_per_epoch):
                # Take batch:
                idx = next(train_generator)
                x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx]
                info_dict = inference.update(feed_dict={
                    x_ph: x_b,
                    t_ph: t_b,
                    y_ph: y_b
                })
                inference.print_progress(info_dict)
                avg_loss += info_dict['loss']
            avg_loss = avg_loss / n_iter_per_epoch
            avg_loss = avg_loss / batch_size
            # print('Epoch {}, avg loss {}'.format(epoch, avg_loss))

        # ------ Evaluation -
        x_test = test_set['X']
        H_test = test_set['H']

        z_y_test = sess.run(zy_learned.mean(), feed_dict={x_ph: x_test})
        z_t_test = sess.run(zt_learned.mean(), feed_dict={x_ph: x_test})
        z_y_train = sess.run(zy_learned.mean(), feed_dict={x_ph: x_train})

        if args.show_plots:
            treat_probs = sess.run(qt.mean(), feed_dict={x_ph: x_test})
            plt.scatter(z_t_test.flatten(),
                        treat_probs.flatten(),
                        label='Estimated Treatment Probability')
            plt.legend()
            plt.xlabel(r'$z_t$')
            plt.ylabel('Probability')
            plt.show()

            # plt.scatter(x_test[:, 1].flatten(), z_y_test.flatten())
            # plt.xlabel('X_1')
            # plt.ylabel('z_y')
            # plt.show()
            #
            plt.scatter(H_test.flatten(), z_y_test.flatten())
            plt.xlabel('H')
            plt.ylabel(r'$z_y$', fontsize=16)
            plt.show()

            plt.scatter(test_set['W'].flatten(), z_t_test.flatten())
            plt.xlabel('W')
            plt.ylabel(r'$z_t$')
            plt.show()

        # CATE estimation:
        if args.estimation_type == 'approx_posterior':
            forced_t = np.ones((args.n_test, 1))
            est_y0 = sess.run(y_post.mean(),
                              feed_dict={
                                  x_ph: x_test,
                                  t_ph: 0 * forced_t
                              })
            est_y1 = sess.run(y_post.mean(),
                              feed_dict={
                                  x_ph: x_test,
                                  t_ph: forced_t
                              })
            # std_y1 = sess.run(y_post.stddev(), feed_dict={x_ph: x_test, t_ph: forced_t})
        elif args.estimation_type == 'latent_matching':
            est_y0, est_y1 = matching_estimate(z_y_train, t_train, y_train,
                                               z_y_test, args.n_neighbours)
        else:
            raise ValueError('Unrecognised estimation_type')

        return evalaute_effect_estimate(
            est_y0,
            est_y1,
            test_set,
            args,
            model_name='Separated CEVAE - Latent dims:  ' + str(latent_dims),
            estimation_type=args.estimation_type)
コード例 #2
0
inference.finalize()

# CRITICISM
Z_pred = qZ.mean().eval().argmax(axis=1)
pi_pred = qPi.mean().eval();

#print("Actual");
#print(np.asarray(phi));
#print(membership_act);

#print("predicted")
#print(pi_pred);
#print(qgamma.mean().eval());

X_pred = np.array(X.mean().eval() > 0.5, dtype=int);
cnt = N*N;
correct = np.sum(X_data == X_pred);

plt.subplot(211);
plt.imshow(X_data, cmap='Greys');

plt.subplot(212)
plt.imshow(X_pred, cmap='Greys');
plt.show();

print("Correctly predicted: ", correct);
print("Total entries: ", cnt);

print("Train Accuracy: ", correct/cnt);