Example #1
0
    def test(self):
        self.model.eval()
        test_loss = 0
        test_loader = batch_generator(self.x_valid, self.y_valid,
                                      self.PopulationSize)
        for data, target in test_loader:
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data, volatile=True), Variable(target)
            output = self.model.forward(data)
            loss = calculateLoss(output,
                                 target,
                                 lossFunction=self.loss_function)
            test_loss += loss.data[0] * data.size()[0]

        test_loss /= self.x_valid.size()[0]
        print('Test Loss: {}'.format(test_loss))
        return test_loss
Example #2
0
def train(epoch):
    model.train()
    train_loader = batch_generator(x_train, y_train, args.batch_size)

    for batch_idx, (data, target) in enumerate(train_loader):
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data), Variable(target)
        optimizer.zero_grad()
        output = model(data)
        loss = F.cross_entropy(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % args.log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data),
                x_train.size()[0],
                100. * batch_idx * len(data) / x_train.size()[0],
                loss.data[0]))
Example #3
0
    def updateOutput(self, inputData, targets):
        self.model.eval()
        total_loss = 0
        data_loader = batch_generator(inputData, targets)
        for data, target in data_loader:
            if torch.cuda.is_available():
                data, target = data.cuda(), target.cuda()
            data, target = Variable(data, volatile=True), Variable(target)
            output = self.model.forward(data)
            loss = calculateLoss(output,
                                 target,
                                 lossFunction=self.loss_function)
            total_loss += loss.data[0] * data.size()[0]

        self.output = output

        self.loss = total_loss / inputData.size()[0]

        return [
            self.loss,
        ]
Example #4
0
 def train(self, epoch):
     self.model.train()
     train_loader = batch_generator(self.x_train, self.y_train,
                                    self.PopulationSize)
     for batch_idx, (data, target) in enumerate(train_loader):
         if torch.cuda.is_available():
             data, target = data.cuda(), target.cuda()
         data, target = Variable(data), Variable(target)
         self.optim.zero_grad()
         output = self.model.forward(data)
         loss = calculateLoss(output,
                              target,
                              lossFunction=self.loss_function)
         loss.backward()
         self.optim.step()
         if batch_idx % 10 == 0:
             print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                 epoch, batch_idx * len(data),
                 self.x_train.size()[0],
                 100. * batch_idx * len(data) / self.x_train.size()[0],
                 loss.data[0]))
Example #5
0
def test(epoch):
    model.eval()
    test_loss = 0
    correct = 0
    test_loader = batch_generator(x_valid, y_valid, args.batch_size)
    for data, target in test_loader:
        if args.cuda:
            data, target = data.cuda(), target.cuda()
        data, target = Variable(data, volatile=True), Variable(target)
        output = model(data)
        test_loss += F.nll_loss(output, target).data[0]
        pred = output.data.max(1)[
            1]  # get the index of the max log-probability
        correct += pred.eq(target.data).cpu().sum()

    test_loss = test_loss
    test_loss /= len(
        test_loader)  # loss function already averages over batch size
    len(test_loader)
    print(
        '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct,
            x_valid.size()[0], 100. * correct / x_valid.size()[0]))
Example #6
0
def learn_supervised(args, train_set, test_set):

    # Parameters
    n_hidd = 512  # number of hidden units
    n_epoch = args.n_epoch
    learning_rate = 0.001
    batch_size = 128

    fc_layer = get_fc_layer_fn(l2_reg_scale=1e-4)

    x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y']

    n_train = x_train.shape[0]
    x_dim = x_train.shape[1]

    batch_size = min(batch_size, n_train)

    # ------ Define Graph ---------------------#

    # ------ Define Inputs
    # define placeholder which will receive data batches
    x_ph = tf.placeholder(tf.float32, [None, x_dim])
    t_ph = tf.placeholder(tf.float32, [None, 1])
    y_ph = tf.placeholder(tf.float32, [None, 1])

    n_ph = tf.shape(x_ph)[0]  # number of samples fed to placeholders

    # ------  regression with a neural-network model y=NN(x,t)
    input = tf.concat([x_ph, t_ph], axis=1)
    hidden_layer = fc_layer(input, n_hidd, tf.nn.elu)
    hidden_layer = fc_layer(hidden_layer, n_hidd, tf.nn.elu)
    net_out = fc_layer(hidden_layer, 1, None)
    cost = tf.reduce_mean((net_out - y_ph)**2)

    # ------ Training

    batch_size = min(batch_size, n_train)
    n_iter_per_epoch = n_train // batch_size

    optimizer = tf.train.AdamOptimizer(
        learning_rate=learning_rate).minimize(cost)

    # end graph def

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for epoch in range(n_epoch):
            train_generator = batch_generator(np.random.permutation(n_train),
                                              batch_size)
            avg_loss = 0.0
            for j in range(n_iter_per_epoch):
                # Take batch:
                idx = next(train_generator)
                x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx]
                feed_dict = {x_ph: x_b, t_ph: t_b, y_ph: y_b}
                _, curr_cost = sess.run([optimizer, cost], feed_dict=feed_dict)

                avg_loss += curr_cost
            avg_loss = avg_loss / n_iter_per_epoch
            avg_loss = avg_loss / batch_size
            if epoch % 50 == 0:
                print('Epoch {}, avg loss {}'.format(epoch, avg_loss))

        # ------ Evaluation -

        x_test = test_set['X']

        # estimate CATE epr sample:
        forced_t = np.ones((args.n_test, 1))
        est_y1 = sess.run([net_out], feed_dict={
            x_ph: x_test,
            t_ph: forced_t
        })[0]
        est_y0 = sess.run([net_out],
                          feed_dict={
                              x_ph: x_test,
                              t_ph: 0 * forced_t
                          })[0]

        return evalaute_effect_estimate(est_y0,
                                        est_y1,
                                        test_set,
                                        args,
                                        model_name='supervised',
                                        estimation_type='')
Example #7
0
def learn_separated(args, train_set, test_set, anlysis_flag=False):

    # Parameters
    n_hidd = 1000  # number of hidden units per layer
    n_epoch = args.n_epoch
    learning_rate = 0.001
    batch_size = 128

    hidden_layer = get_fc_layer_fn(l2_reg_scale=1e-4, depth=1)
    out_layer = get_fc_layer_fn(l2_reg_scale=1e-4)

    x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y']

    n_train = x_train.shape[0]
    x_dim = x_train.shape[1]

    batch_size = min(batch_size, n_train)

    # ------ Define Graph ---------------------#
    tf.reset_default_graph()
    # ------ Define Inputs ---------------------#
    # define placeholder which will receive data batches
    x_ph = tf.placeholder(tf.float32, [None, x_dim])
    t_ph = tf.placeholder(tf.float32, [None, 1])
    y_ph = tf.placeholder(tf.float32, [None, 1])

    n_ph = tf.shape(x_ph)[0]  # number of samples fed to placeholders

    # ------ Define generative model /decoder-----------------------#

    if anlysis_flag:
        z_t_dim = 1
        z_y_dim = 1
    else:
        # z_x_dim = 1
        z_t_dim = 2
        z_y_dim = 3
    # latent_dims = (z_x_dim, z_t_dim, z_y_dim)
    latent_dims = (z_t_dim, z_y_dim)
    # prior over latent variables:
    # p(zx) -
    # zx = Normal(loc=tf.zeros([n_ph, z_x_dim]), scale=tf.ones([n_ph, z_x_dim]))
    # p(zt) -
    zt = Normal(loc=tf.zeros([n_ph, z_t_dim]), scale=tf.ones([n_ph, z_t_dim]))
    # p(zy) -
    zy = Normal(loc=tf.zeros([n_ph, z_y_dim]), scale=tf.ones([n_ph, z_y_dim]))
    z = tf.concat([zt, zy], axis=1)

    # p(x|z) - likelihood of proxy X
    # z = tf.concat([zx, zt, zy], axis=1)

    hidden = hidden_layer(z, n_hidd, tf.nn.elu)
    x = Normal(loc=out_layer(hidden, x_dim, None),
               scale=out_layer(hidden, x_dim, tf.nn.softplus),
               name='gaussian_px_z')

    # p(t|zt)
    if args.model_type == 'separated_with_confounder':
        hidden = hidden_layer(z, n_hidd, tf.nn.elu)
    else:
        hidden = hidden_layer(zt, n_hidd, tf.nn.elu)
    probs = out_layer(hidden, 1, tf.nn.sigmoid)  # output in [0,1]
    t = Bernoulli(probs=probs, dtype=tf.float32, name='bernoulli_pt_z')

    # p(y|t,zy)
    hidden = hidden_layer(zy, n_hidd, tf.nn.elu)  # shared hidden layer
    mu_y_t0 = out_layer(hidden, 1, None)
    mu_y_t1 = out_layer(hidden, 1, None)
    # y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=tf.ones_like(mu_y_t0))
    sigma_y_t0 = out_layer(hidden, 1, tf.nn.softplus)
    sigma_y_t1 = out_layer(hidden, 1, tf.nn.softplus)
    y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0,
               scale=t * sigma_y_t1 + (1. - t) * sigma_y_t0)

    # ------ Define inference model - CEVAE variational approximation (encoder)

    # q(t|x)
    hqt = hidden_layer(x_ph, n_hidd, tf.nn.elu)
    probs_t = out_layer(hqt, 1, tf.nn.sigmoid)  # output in [0,1]
    qt = Bernoulli(probs=probs_t, dtype=tf.float32)

    # q(y|x,t)
    hqy = hidden_layer(x_ph, n_hidd, tf.nn.elu)  # shared hidden layer
    mu_qy_t0 = out_layer(hqy, 1, None)
    mu_qy_t1 = out_layer(hqy, 1, tf.nn.elu)
    sigma_qy_t1 = out_layer(hqy, 1, tf.nn.softplus)
    sigma_qy_t0 = out_layer(hqy, 1, tf.nn.softplus)
    # qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0))
    qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0,
                scale=qt * sigma_qy_t1 + (1. - qt) * sigma_qy_t0)

    # # q(z_x|x,t,y)
    # inpt2 = tf.concat([x_ph, qy], axis=1)
    # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu)  # shared hidden layer
    # muq_t0 = out_layer(hqz, z_x_dim, None)
    # sigmaq_t0 = out_layer(hqz, z_x_dim, tf.nn.softplus)
    # muq_t1 = out_layer(hqz, z_x_dim, None)
    # sigmaq_t1 = out_layer(hqz, z_x_dim, tf.nn.softplus)
    # qzx = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
    #             scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # shared hidden layer
    inpt2 = tf.concat([x_ph, qy], axis=1)
    hqz = out_layer(inpt2, n_hidd, tf.nn.elu)

    # q(zt|x,t,y)
    muq_t0 = out_layer(hqz, z_t_dim, None)
    sigmaq_t0 = out_layer(hqz, z_t_dim, tf.nn.softplus)
    muq_t1 = out_layer(hqz, z_t_dim, None)
    sigmaq_t1 = out_layer(hqz, z_t_dim, tf.nn.softplus)
    qzt = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
                 scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # q(zy|x,t,y)
    # inpt2 = tf.concat([x_ph, qy], axis=1)
    # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu)  # shared hidden layer
    muq_t0 = out_layer(hqz, z_y_dim, None)
    sigmaq_t0 = out_layer(hqz, z_y_dim, tf.nn.softplus)
    muq_t1 = out_layer(hqz, z_y_dim, None)
    sigmaq_t1 = out_layer(hqz, z_y_dim, tf.nn.softplus)
    qzy = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0,
                 scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0)

    # end graph def

    # ------ Criticism / evaluation graph:
    zy_learned = ed.copy(qzy, {x: x_ph})
    zt_learned = ed.copy(qzt, {x: x_ph})

    # sample posterior predictive for p(y|z_y,t)
    y_post = ed.copy(y, {zy: qzy, t: t_ph}, scope='y_post')
    # crude approximation of the above
    y_post_mean = ed.copy(y, {zy: qzy.mean(), t: t_ph}, scope='y_post_mean')

    # ------ Training -  Run variational inference

    # Create data dictionary for edward
    data = {x: x_ph, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph}

    batch_size = min(batch_size, n_train)
    n_iter_per_epoch = n_train // batch_size

    inference = ed.KLqp({zt: qzt, zy: qzy}, data=data)
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    data_scaling = n_train / batch_size  # to scale likelihood againt prior
    inference.initialize(optimizer=optimizer,
                         n_samples=5,
                         n_iter=n_iter_per_epoch * n_epoch,
                         scale={
                             x: data_scaling,
                             t: data_scaling,
                             y: data_scaling
                         })

    with tf.Session() as sess:
        tf.global_variables_initializer().run()

        for epoch in range(n_epoch):
            train_generator = batch_generator(np.random.permutation(n_train),
                                              batch_size)
            avg_loss = 0.0
            for j in range(n_iter_per_epoch):
                # Take batch:
                idx = next(train_generator)
                x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx]
                info_dict = inference.update(feed_dict={
                    x_ph: x_b,
                    t_ph: t_b,
                    y_ph: y_b
                })
                inference.print_progress(info_dict)
                avg_loss += info_dict['loss']
            avg_loss = avg_loss / n_iter_per_epoch
            avg_loss = avg_loss / batch_size
            # print('Epoch {}, avg loss {}'.format(epoch, avg_loss))

        # ------ Evaluation -
        x_test = test_set['X']
        H_test = test_set['H']

        z_y_test = sess.run(zy_learned.mean(), feed_dict={x_ph: x_test})
        z_t_test = sess.run(zt_learned.mean(), feed_dict={x_ph: x_test})
        z_y_train = sess.run(zy_learned.mean(), feed_dict={x_ph: x_train})

        if args.show_plots:
            treat_probs = sess.run(qt.mean(), feed_dict={x_ph: x_test})
            plt.scatter(z_t_test.flatten(),
                        treat_probs.flatten(),
                        label='Estimated Treatment Probability')
            plt.legend()
            plt.xlabel(r'$z_t$')
            plt.ylabel('Probability')
            plt.show()

            # plt.scatter(x_test[:, 1].flatten(), z_y_test.flatten())
            # plt.xlabel('X_1')
            # plt.ylabel('z_y')
            # plt.show()
            #
            plt.scatter(H_test.flatten(), z_y_test.flatten())
            plt.xlabel('H')
            plt.ylabel(r'$z_y$', fontsize=16)
            plt.show()

            plt.scatter(test_set['W'].flatten(), z_t_test.flatten())
            plt.xlabel('W')
            plt.ylabel(r'$z_t$')
            plt.show()

        # CATE estimation:
        if args.estimation_type == 'approx_posterior':
            forced_t = np.ones((args.n_test, 1))
            est_y0 = sess.run(y_post.mean(),
                              feed_dict={
                                  x_ph: x_test,
                                  t_ph: 0 * forced_t
                              })
            est_y1 = sess.run(y_post.mean(),
                              feed_dict={
                                  x_ph: x_test,
                                  t_ph: forced_t
                              })
            # std_y1 = sess.run(y_post.stddev(), feed_dict={x_ph: x_test, t_ph: forced_t})
        elif args.estimation_type == 'latent_matching':
            est_y0, est_y1 = matching_estimate(z_y_train, t_train, y_train,
                                               z_y_test, args.n_neighbours)
        else:
            raise ValueError('Unrecognised estimation_type')

        return evalaute_effect_estimate(
            est_y0,
            est_y1,
            test_set,
            args,
            model_name='Separated CEVAE - Latent dims:  ' + str(latent_dims),
            estimation_type=args.estimation_type)