# DATA. MNIST batches are fed at training time. mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) # MODEL z = Normal(mu=tf.zeros([M, d]), sigma=tf.ones([M, d])) logits = generative_network(z) x = Bernoulli(logits=logits) # INFERENCE x_ph = tf.placeholder(tf.float32, [M, 28 * 28]) mu, sigma = inference_network(x_ph) qz = Normal(mu=mu, sigma=sigma) # Bind p(x, z) and q(z | x) to the same placeholder for x. data = {x: x_ph} inference = ed.ReparameterizationKLKLqp({z: qz}, data) optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) hidden_rep = tf.sigmoid(logits) init = tf.global_variables_initializer() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0 pbar = Progbar(n_iter_per_epoch) for t in range(1, n_iter_per_epoch + 1):
def calSNP(self, train, testdata, load_path, n_sample1=100, traindir=None, task=None): from sklearn.metrics import * import time startt = time.time() startc = time.clock() sess = ed.get_session() result = self.net(self.inputs, calsnp=True) fine_tune = self.fine_tune(result, self.cnnoutput, task) fine_tune_ = Bernoulli(logits=(fine_tune)) inference = ed.ReparameterizationKLKLqp( self.diction, data={fine_tune_: tf.reshape(tf.cast(self.y, tf.int32), [-1])}) inference.initialize(n_samples=1, optimizer=self.optimizer, n_iter=5000, kl_scaling=self.scaldict) y_copy = ed.copy(fine_tune, self.diction) collection = [ item for item in tf.all_variables() if 'ww' not in item.name ] w = 0 n = len(testdata) result = [] pos = [item for item in testdata if item[-1] > 0] neg = [item for item in testdata if item[-1] < 1] b = len(neg) / len(pos) n1 = len(pos) / 5 n2 = len(neg) / 5 res = [] if task is None: task = self.n_task + 1 elif task > self.n_task: task = self.n_task + 1 test = testdata print "START FINE-TUNING" sess.run(tf.global_variables_initializer()) saver = tf.train.Saver(collection) saver.restore(sess, load_path) random.shuffle(train) for k in range(len(train) * 15 / 150 + 1): klist = self.random_sample(len(train), 150) temptrain = [train[w] for w in klist] trainseq = [] trainseq = [item[0] for item in temptrain ] + [item[1] for item in temptrain] traintag = [max(item[-1], 0) for item in temptrain] ttask = [task for item in trainseq] info_dict = inference.update({ self.inputs: trainseq, self.y: traintag, self.task: ttask }) trainseq = [item[1] for item in temptrain ] + [item[0] for item in temptrain] traintag = [max(item[-1], 0) for item in temptrain] ttask = [task for item in trainseq] info_dict = inference.update({ self.inputs: trainseq, self.y: traintag, self.task: ttask }) saver = tf.train.Saver() if traindir is not None and k % 200 == 0: iterr = sess.run(self.iter) sess.run(tf.assign(self.iter, iterr + 1 + i * 5 + 1)) saver.save(sess, traindir + 'model.ckpt', global_step=iterr) tempn = len(test) tw = 0 while (tw < tempn): temp = test[tw:tw + 150] inputs = [] inputs = [item[0] for item in temp] + [item[1] for item in temp] label = [item[-1] for item in temp] ttask = [task for item in inputs] pred = np.array([ sess.run(y_copy, { self.inputs: inputs, self.task: ttask }) for i in range(n_sample1) ]) mean = np.mean(pred, 0).tolist() var = np.var(pred, 0).tolist() res.extend([[v1, v2, v3] for v1, v2, v3 in zip(mean, var, label)]) del pred del mean del var tw += 150 endt = time.time() endc = time.clock() r = [(item[0]) for item in res] l = [item[-1] for item in res] fpr, tpr, t = roc_curve(l, r, pos_label=1) print "RUNNING TIME %f, AUC %f" % (endt - startt, auc(fpr, tpr)) sys.stdout.flush() return res
os.makedirs(IMG_DIR) # DATA. MNIST batches are fed at training time. mnist = input_data.read_data_sets(DATA_DIR, one_hot=True) # MODEL model = NormalBernoulli(d) # INFERENCE x_ph = tf.placeholder(tf.float32, [M, 28 * 28]) mu, sigma = inference_network(x_ph) qz = Normal(mu=mu, sigma=sigma) # Bind p(x, z) and q(z | x) to the same placeholder for x. data = {'x': x_ph} inference = ed.ReparameterizationKLKLqp({'z': qz}, data, model) with tf.variable_scope("model"): optimizer = tf.train.AdamOptimizer(0.01, epsilon=1.0) inference.initialize(optimizer=optimizer, use_prettytensor=True) with tf.variable_scope("model", reuse=True): p_rep = tf.sigmoid(model.sample_prior(M)) init = tf.global_variables_initializer() init.run() n_epoch = 100 n_iter_per_epoch = 1000 for epoch in range(n_epoch): avg_loss = 0.0
def train(self, traindata, iteration, batch_size, traindir, save_step=50, random_neg=True): import time start = time.time() start_c = time.clock() result = self.net(self.inputs) result = tf.matmul(result, self.w12) lossvalue = -result * tf.cast(tf.reshape(self.y, [-1, 1]), tf.float32) lossvalue = Bernoulli(logits=lossvalue) resultv = tf.zeros(tf.shape(lossvalue), dtype=tf.int32) inference = ed.ReparameterizationKLKLqp(self.diction, data={lossvalue: resultv}) random.shuffle(traindata) self.sess = ed.get_session() inference.initialize(n_samples=1, optimizer=self.optimizer, n_iter=5000, logdir="./log", kl_scaling=self.scaldict) saver = tf.train.Saver(max_to_keep=500) self.sess.run(tf.global_variables_initializer()) if tf.train.get_checkpoint_state(traindir): saver.restore(self.sess, tf.train.latest_checkpoint(traindir)) print "loading from train dir" else: print "building new model" trainneg_ = [[item for item in jtem if item[-1] < 1] for jtem in traindata] negset = {} trainpos = [[item for item in jtem if item[-1] > 0] for jtem in traindata] negset = [[item for item in jtem if item[-1] < 1] for jtem in traindata] sys.stdout.flush() for i in range(iteration): trainseq = [] traintag = [] task = [] for j in range(self.n_task): if random_neg: ilist = [(i * batch_size / 2 + w) % len(trainpos[j]) for w in range(batch_size / 2)] temptrain = [trainpos[j][w] for w in ilist] ilist = [(i * batch_size / 2 + w) % len(negset[j]) for w in range(batch_size / 2)] temptrain += [negset[j][w] for w in ilist] else: ilist = self.random_sample(len(traindata[j]), batch_size) temptrain = [traindata[j][w] for w in ilist] tseq = [item[0] for item in temptrain] ttag = [(item[-1] - 0.5) * 2 for item in temptrain] trainseq = trainseq + (tseq) traintag = traintag + (ttag) task = task + [j for item in tseq] info_dict = inference.update({ self.inputs: trainseq, self.y: traintag, self.task: task }) del trainseq del traintag iterr = self.sess.run(self.iter) self.sess.run(tf.assign(self.iter, iterr + 1)) if iterr % save_step == 0 or iterr == iteration - 1: saver.save(self.sess, os.path.join(traindir, 'model.ckpt'), global_step=iterr) end = time.time() end_c = time.clock() print "ITER %d TIME %f SAVING MODEL" % (iterr, end - start) sys.stdout.flush()