def test(self): self.model.eval() test_loss = 0 test_loader = batch_generator(self.x_valid, self.y_valid, self.PopulationSize) for data, target in test_loader: if torch.cuda.is_available(): data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = self.model.forward(data) loss = calculateLoss(output, target, lossFunction=self.loss_function) test_loss += loss.data[0] * data.size()[0] test_loss /= self.x_valid.size()[0] print('Test Loss: {}'.format(test_loss)) return test_loss
def train(epoch): model.train() train_loader = batch_generator(x_train, y_train, args.batch_size) for batch_idx, (data, target) in enumerate(train_loader): if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) optimizer.zero_grad() output = model(data) loss = F.cross_entropy(output, target) loss.backward() optimizer.step() if batch_idx % args.log_interval == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), x_train.size()[0], 100. * batch_idx * len(data) / x_train.size()[0], loss.data[0]))
def updateOutput(self, inputData, targets): self.model.eval() total_loss = 0 data_loader = batch_generator(inputData, targets) for data, target in data_loader: if torch.cuda.is_available(): data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = self.model.forward(data) loss = calculateLoss(output, target, lossFunction=self.loss_function) total_loss += loss.data[0] * data.size()[0] self.output = output self.loss = total_loss / inputData.size()[0] return [ self.loss, ]
def train(self, epoch): self.model.train() train_loader = batch_generator(self.x_train, self.y_train, self.PopulationSize) for batch_idx, (data, target) in enumerate(train_loader): if torch.cuda.is_available(): data, target = data.cuda(), target.cuda() data, target = Variable(data), Variable(target) self.optim.zero_grad() output = self.model.forward(data) loss = calculateLoss(output, target, lossFunction=self.loss_function) loss.backward() self.optim.step() if batch_idx % 10 == 0: print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format( epoch, batch_idx * len(data), self.x_train.size()[0], 100. * batch_idx * len(data) / self.x_train.size()[0], loss.data[0]))
def test(epoch): model.eval() test_loss = 0 correct = 0 test_loader = batch_generator(x_valid, y_valid, args.batch_size) for data, target in test_loader: if args.cuda: data, target = data.cuda(), target.cuda() data, target = Variable(data, volatile=True), Variable(target) output = model(data) test_loss += F.nll_loss(output, target).data[0] pred = output.data.max(1)[ 1] # get the index of the max log-probability correct += pred.eq(target.data).cpu().sum() test_loss = test_loss test_loss /= len( test_loader) # loss function already averages over batch size len(test_loader) print( '\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( test_loss, correct, x_valid.size()[0], 100. * correct / x_valid.size()[0]))
def learn_supervised(args, train_set, test_set): # Parameters n_hidd = 512 # number of hidden units n_epoch = args.n_epoch learning_rate = 0.001 batch_size = 128 fc_layer = get_fc_layer_fn(l2_reg_scale=1e-4) x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y'] n_train = x_train.shape[0] x_dim = x_train.shape[1] batch_size = min(batch_size, n_train) # ------ Define Graph ---------------------# # ------ Define Inputs # define placeholder which will receive data batches x_ph = tf.placeholder(tf.float32, [None, x_dim]) t_ph = tf.placeholder(tf.float32, [None, 1]) y_ph = tf.placeholder(tf.float32, [None, 1]) n_ph = tf.shape(x_ph)[0] # number of samples fed to placeholders # ------ regression with a neural-network model y=NN(x,t) input = tf.concat([x_ph, t_ph], axis=1) hidden_layer = fc_layer(input, n_hidd, tf.nn.elu) hidden_layer = fc_layer(hidden_layer, n_hidd, tf.nn.elu) net_out = fc_layer(hidden_layer, 1, None) cost = tf.reduce_mean((net_out - y_ph)**2) # ------ Training batch_size = min(batch_size, n_train) n_iter_per_epoch = n_train // batch_size optimizer = tf.train.AdamOptimizer( learning_rate=learning_rate).minimize(cost) # end graph def with tf.Session() as sess: tf.global_variables_initializer().run() for epoch in range(n_epoch): train_generator = batch_generator(np.random.permutation(n_train), batch_size) avg_loss = 0.0 for j in range(n_iter_per_epoch): # Take batch: idx = next(train_generator) x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx] feed_dict = {x_ph: x_b, t_ph: t_b, y_ph: y_b} _, curr_cost = sess.run([optimizer, cost], feed_dict=feed_dict) avg_loss += curr_cost avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / batch_size if epoch % 50 == 0: print('Epoch {}, avg loss {}'.format(epoch, avg_loss)) # ------ Evaluation - x_test = test_set['X'] # estimate CATE epr sample: forced_t = np.ones((args.n_test, 1)) est_y1 = sess.run([net_out], feed_dict={ x_ph: x_test, t_ph: forced_t })[0] est_y0 = sess.run([net_out], feed_dict={ x_ph: x_test, t_ph: 0 * forced_t })[0] return evalaute_effect_estimate(est_y0, est_y1, test_set, args, model_name='supervised', estimation_type='')
def learn_separated(args, train_set, test_set, anlysis_flag=False): # Parameters n_hidd = 1000 # number of hidden units per layer n_epoch = args.n_epoch learning_rate = 0.001 batch_size = 128 hidden_layer = get_fc_layer_fn(l2_reg_scale=1e-4, depth=1) out_layer = get_fc_layer_fn(l2_reg_scale=1e-4) x_train, t_train, y_train = train_set['X'], train_set['T'], train_set['Y'] n_train = x_train.shape[0] x_dim = x_train.shape[1] batch_size = min(batch_size, n_train) # ------ Define Graph ---------------------# tf.reset_default_graph() # ------ Define Inputs ---------------------# # define placeholder which will receive data batches x_ph = tf.placeholder(tf.float32, [None, x_dim]) t_ph = tf.placeholder(tf.float32, [None, 1]) y_ph = tf.placeholder(tf.float32, [None, 1]) n_ph = tf.shape(x_ph)[0] # number of samples fed to placeholders # ------ Define generative model /decoder-----------------------# if anlysis_flag: z_t_dim = 1 z_y_dim = 1 else: # z_x_dim = 1 z_t_dim = 2 z_y_dim = 3 # latent_dims = (z_x_dim, z_t_dim, z_y_dim) latent_dims = (z_t_dim, z_y_dim) # prior over latent variables: # p(zx) - # zx = Normal(loc=tf.zeros([n_ph, z_x_dim]), scale=tf.ones([n_ph, z_x_dim])) # p(zt) - zt = Normal(loc=tf.zeros([n_ph, z_t_dim]), scale=tf.ones([n_ph, z_t_dim])) # p(zy) - zy = Normal(loc=tf.zeros([n_ph, z_y_dim]), scale=tf.ones([n_ph, z_y_dim])) z = tf.concat([zt, zy], axis=1) # p(x|z) - likelihood of proxy X # z = tf.concat([zx, zt, zy], axis=1) hidden = hidden_layer(z, n_hidd, tf.nn.elu) x = Normal(loc=out_layer(hidden, x_dim, None), scale=out_layer(hidden, x_dim, tf.nn.softplus), name='gaussian_px_z') # p(t|zt) if args.model_type == 'separated_with_confounder': hidden = hidden_layer(z, n_hidd, tf.nn.elu) else: hidden = hidden_layer(zt, n_hidd, tf.nn.elu) probs = out_layer(hidden, 1, tf.nn.sigmoid) # output in [0,1] t = Bernoulli(probs=probs, dtype=tf.float32, name='bernoulli_pt_z') # p(y|t,zy) hidden = hidden_layer(zy, n_hidd, tf.nn.elu) # shared hidden layer mu_y_t0 = out_layer(hidden, 1, None) mu_y_t1 = out_layer(hidden, 1, None) # y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=tf.ones_like(mu_y_t0)) sigma_y_t0 = out_layer(hidden, 1, tf.nn.softplus) sigma_y_t1 = out_layer(hidden, 1, tf.nn.softplus) y = Normal(loc=t * mu_y_t1 + (1. - t) * mu_y_t0, scale=t * sigma_y_t1 + (1. - t) * sigma_y_t0) # ------ Define inference model - CEVAE variational approximation (encoder) # q(t|x) hqt = hidden_layer(x_ph, n_hidd, tf.nn.elu) probs_t = out_layer(hqt, 1, tf.nn.sigmoid) # output in [0,1] qt = Bernoulli(probs=probs_t, dtype=tf.float32) # q(y|x,t) hqy = hidden_layer(x_ph, n_hidd, tf.nn.elu) # shared hidden layer mu_qy_t0 = out_layer(hqy, 1, None) mu_qy_t1 = out_layer(hqy, 1, tf.nn.elu) sigma_qy_t1 = out_layer(hqy, 1, tf.nn.softplus) sigma_qy_t0 = out_layer(hqy, 1, tf.nn.softplus) # qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=tf.ones_like(mu_qy_t0)) qy = Normal(loc=qt * mu_qy_t1 + (1. - qt) * mu_qy_t0, scale=qt * sigma_qy_t1 + (1. - qt) * sigma_qy_t0) # # q(z_x|x,t,y) # inpt2 = tf.concat([x_ph, qy], axis=1) # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu) # shared hidden layer # muq_t0 = out_layer(hqz, z_x_dim, None) # sigmaq_t0 = out_layer(hqz, z_x_dim, tf.nn.softplus) # muq_t1 = out_layer(hqz, z_x_dim, None) # sigmaq_t1 = out_layer(hqz, z_x_dim, tf.nn.softplus) # qzx = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, # scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # shared hidden layer inpt2 = tf.concat([x_ph, qy], axis=1) hqz = out_layer(inpt2, n_hidd, tf.nn.elu) # q(zt|x,t,y) muq_t0 = out_layer(hqz, z_t_dim, None) sigmaq_t0 = out_layer(hqz, z_t_dim, tf.nn.softplus) muq_t1 = out_layer(hqz, z_t_dim, None) sigmaq_t1 = out_layer(hqz, z_t_dim, tf.nn.softplus) qzt = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # q(zy|x,t,y) # inpt2 = tf.concat([x_ph, qy], axis=1) # hqz = hidden_layer(inpt2, n_hidd, tf.nn.elu) # shared hidden layer muq_t0 = out_layer(hqz, z_y_dim, None) sigmaq_t0 = out_layer(hqz, z_y_dim, tf.nn.softplus) muq_t1 = out_layer(hqz, z_y_dim, None) sigmaq_t1 = out_layer(hqz, z_y_dim, tf.nn.softplus) qzy = Normal(loc=qt * muq_t1 + (1. - qt) * muq_t0, scale=qt * sigmaq_t1 + (1. - qt) * sigmaq_t0) # end graph def # ------ Criticism / evaluation graph: zy_learned = ed.copy(qzy, {x: x_ph}) zt_learned = ed.copy(qzt, {x: x_ph}) # sample posterior predictive for p(y|z_y,t) y_post = ed.copy(y, {zy: qzy, t: t_ph}, scope='y_post') # crude approximation of the above y_post_mean = ed.copy(y, {zy: qzy.mean(), t: t_ph}, scope='y_post_mean') # ------ Training - Run variational inference # Create data dictionary for edward data = {x: x_ph, y: y_ph, qt: t_ph, t: t_ph, qy: y_ph} batch_size = min(batch_size, n_train) n_iter_per_epoch = n_train // batch_size inference = ed.KLqp({zt: qzt, zy: qzy}, data=data) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) data_scaling = n_train / batch_size # to scale likelihood againt prior inference.initialize(optimizer=optimizer, n_samples=5, n_iter=n_iter_per_epoch * n_epoch, scale={ x: data_scaling, t: data_scaling, y: data_scaling }) with tf.Session() as sess: tf.global_variables_initializer().run() for epoch in range(n_epoch): train_generator = batch_generator(np.random.permutation(n_train), batch_size) avg_loss = 0.0 for j in range(n_iter_per_epoch): # Take batch: idx = next(train_generator) x_b, t_b, y_b = x_train[idx], t_train[idx], y_train[idx] info_dict = inference.update(feed_dict={ x_ph: x_b, t_ph: t_b, y_ph: y_b }) inference.print_progress(info_dict) avg_loss += info_dict['loss'] avg_loss = avg_loss / n_iter_per_epoch avg_loss = avg_loss / batch_size # print('Epoch {}, avg loss {}'.format(epoch, avg_loss)) # ------ Evaluation - x_test = test_set['X'] H_test = test_set['H'] z_y_test = sess.run(zy_learned.mean(), feed_dict={x_ph: x_test}) z_t_test = sess.run(zt_learned.mean(), feed_dict={x_ph: x_test}) z_y_train = sess.run(zy_learned.mean(), feed_dict={x_ph: x_train}) if args.show_plots: treat_probs = sess.run(qt.mean(), feed_dict={x_ph: x_test}) plt.scatter(z_t_test.flatten(), treat_probs.flatten(), label='Estimated Treatment Probability') plt.legend() plt.xlabel(r'$z_t$') plt.ylabel('Probability') plt.show() # plt.scatter(x_test[:, 1].flatten(), z_y_test.flatten()) # plt.xlabel('X_1') # plt.ylabel('z_y') # plt.show() # plt.scatter(H_test.flatten(), z_y_test.flatten()) plt.xlabel('H') plt.ylabel(r'$z_y$', fontsize=16) plt.show() plt.scatter(test_set['W'].flatten(), z_t_test.flatten()) plt.xlabel('W') plt.ylabel(r'$z_t$') plt.show() # CATE estimation: if args.estimation_type == 'approx_posterior': forced_t = np.ones((args.n_test, 1)) est_y0 = sess.run(y_post.mean(), feed_dict={ x_ph: x_test, t_ph: 0 * forced_t }) est_y1 = sess.run(y_post.mean(), feed_dict={ x_ph: x_test, t_ph: forced_t }) # std_y1 = sess.run(y_post.stddev(), feed_dict={x_ph: x_test, t_ph: forced_t}) elif args.estimation_type == 'latent_matching': est_y0, est_y1 = matching_estimate(z_y_train, t_train, y_train, z_y_test, args.n_neighbours) else: raise ValueError('Unrecognised estimation_type') return evalaute_effect_estimate( est_y0, est_y1, test_set, args, model_name='Separated CEVAE - Latent dims: ' + str(latent_dims), estimation_type=args.estimation_type)