def padX(paths, max_d, max_h, max_w): X_pads = [] y_pads = [] shapes = [] for Xpath, ypath in paths: print(Xpath) with open(Xpath) as Xin, open(ypath) as yin: X = np.load(Xin) y = np.load(yin) assert list(X.shape) == list(y.shape) d, h, w = X.shape shapes.append(X.shape) print('before pad:', X.shape) X_pad = np.lib.pad(X, ((0, max_d - d), (0, max_h - h), (0, max_w - w)), 'constant', constant_values=0) X_pads.append(np.expand_dims(X_pad, -1)) # X_pads += rotations6(X_pad) y_pad = np.lib.pad(y, ((0, max_d - d), (0, max_h - h), (0, max_w - w)), 'constant', constant_values=0) y_pads.append(np.expand_dims(y_pad, -1)) print('after pad:', X_pad.shape) print('\n') return tg.SequentialIterator(X_pads, y_pads, shapes, batchsize=1)
def train(n_exp, h, w, c, nclass, batch_size=100, tgmodel=True): graph = tf.Graph() with graph.as_default(): # nr_train = data(n_exp, h, w, c, nclass, batch_size) X_data = np.random.rand(n_exp, h, w, c) y_data = np.random.rand(n_exp, nclass) data_iter = tg.SequentialIterator(X_data, y_data, batchsize=batch_size) X_ph = tf.placeholder('float32', [None, h, w, c]) y_ph = tf.placeholder('float32', [None, nclass]) if tgmodel: # tensorgraph model print('..using graph model') seq = TGModel(h, w, c, nclass) y_train_sb = seq.train_fprop(X_ph) else: # tensorflow model print('..using tensorflow model') y_train_sb = TFModel(X_ph, h, w, c, nclass) loss_train_sb = tg.cost.mse(y_train_sb, y_ph) accu_train_sb = tg.cost.accuracy(y_train_sb, y_ph) opt = tf.train.RMSPropOptimizer(0.001) # required for BatchNormalization layer update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_op = opt.minimize(loss_train_sb) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(graph=graph, config=config) as sess: sess.run(init_op) for epoch in range(2): pbar = tg.ProgressBar(n_exp) ttl_train_loss = 0 # for i in range(0, n_exp, batch_size): i = 0 for X_batch, y_batch in data_iter: pbar.update(i) i += len(X_batch) _, loss_train = sess.run([train_op, loss_train_sb], feed_dict={X_ph:X_batch, y_ph:y_batch}) ttl_train_loss += loss_train * batch_size pbar.update(n_exp) ttl_train_loss /= n_exp print('epoch {}, train loss {}'.format(epoch, ttl_train_loss))
def make_data(self): test_num = ['01', '02', '05', '06', '09'] train_num = ['21', '22', '25', '26', '29'] test_labels_path = [ '{}/{}_manual1.gif'.format(self.data_dir, num) for num in test_num ] test_images_path = [ '{}/{}_test.tif'.format(self.data_dir, num) for num in test_num ] train_labels_path = [ '{}/{}_manual1.gif'.format(self.data_dir, num) for num in train_num ] train_images_path = [ '{}/{}_training.tif'.format(self.data_dir, num) for num in train_num ] train_patches, train_lbl = self.extract_patches( train_images_path, train_labels_path) test_patches, test_lbl = self.extract_patches(test_images_path, test_labels_path) if self.shuffle: print('..shuffling') np.random.seed(1012) shf_idx = np.arange(len(train_patches)) np.random.shuffle(shf_idx) train_patches = train_patches[shf_idx] train_lbl = train_lbl[shf_idx] # num_train = float(self.train_valid[0]) / sum(self.train_valid) * len(img_patches) # num_train = int(num_train) train = tg.SequentialIterator(train_patches, train_lbl, batchsize=self.batchsize) valid = tg.SequentialIterator(test_patches, test_lbl, batchsize=self.batchsize) return train, valid
def run_inference(s2s, decoder_infer_tar, decoder_infer_pin, summary, news_con, vocab_inv, best_valid_path, best_epoch_path, train_dir): with s2s.graph.as_default(): s2s.build_graph() saver = tf.train.Saver() data_infer = tg.SequentialIterator(decoder_infer_tar, decoder_infer_pin, batchsize=500) def prediction(path, name): saver.restore(s2s.sess, path) for news_batch, newsc in data_infer: encoder_input, encoder_len = batch(news_batch) predict = s2s.run_inference(encoder_input, encoder_len) write_prediction(predict, summary, s2s, vocab_inv, train_dir, name) print "==== Restore model from best valid path" prediction(best_valid_path, "valid") print "==== Restore model from best epoch path" prediction(best_epoch_path, "epoch")
def CNN_Classifier(X_train, y_train, X_valid, y_valid): batchsize = 64 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) start = tg.StartNode(input_vars=[X_ph]) h, w = same(in_height=h, in_width=w, strides=(1,1), filters=(2,2)) h, w = same(in_height=h, in_width=w, strides=(2,2), filters=(2,2)) #h1, w1 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4)) dim = int(h * w * c * 10) h1_Node = tg.HiddenNode(prev=[start], layers=[Conv2D(input_channels=c, num_filters=10, padding='SAME', kernel_size=(2,2), stride=(1,1)), MaxPooling(poolsize=(2,2), stride=(2,2), padding='SAME'), Reshape(shape=(-1, dim))] ) h2_Node = tg.HiddenNode(prev=[h1_Node], layers=[Linear(prev_dim=dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) saver = tf.train.Saver() vardir = './var/1' if not os.path.exists(vardir): os.makedirs(vardir) with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: init = tf.global_variables_initializer() sess.run(init) max_epoch = 100 temp_acc = [] for epoch in range(max_epoch): # print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch # import pdb; pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) # print 'train mse', train_error/float(ttl_examples) # print 'train accuracy', train_accuracy/float(ttl_examples) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) # print 'valid mse', valid_error/float(ttl_examples) # print 'valid accuracy', valid_accuracy/float(ttl_examples) temp_acc.append(valid_accuracy/float(ttl_examples)) save_path = saver.save(sess, vardir + "/model.ckpt") print("Model saved in file: %s" % save_path) print 'max accuracy is:\t', max(temp_acc)
def make_data(self): X_patches = [] y_patches = [] print('creating patches') for X_path, y_path in self.paths: with open(X_path) as Xin, open(y_path) as yin: X_npy = np.expand_dims(np.load(Xin), -1) print('before shrinked:', X_npy.shape) x_f = 0 x_b = 0 y_f = 0 y_b = 0 z_f = 0 z_b = 0 while True: shrinked = False if X_npy[:10, :, :, :].sum() == 0: X_npy = X_npy[10:, :, :, :] shrinked = True x_f += 10 if X_npy[-10:, :, :, :].sum() == 0: X_npy = X_npy[:-10, :, :, :] shrinked = True x_b += 10 if X_npy[:, :10, :, :].sum() == 0: X_npy = X_npy[:, 10:, :, :] shrinked = True y_f += 10 if X_npy[:, -10:, :, :].sum() == 0: X_npy = X_npy[:, :-10, :, :] shrinked = True y_b += 10 if X_npy[:, :, :10, :].sum() == 0: X_npy = X_npy[:, :, 10:, :] shrinked = True z_f += 10 if X_npy[:, :, -10:, :].sum() == 0: X_npy = X_npy[:, :, :-10, :] shrinked = True z_b += 10 if not shrinked: break print('after shrinked:', X_npy.shape) X_npy = X_npy / 255.0 y_npy = np.expand_dims(np.load(yin), -1) y_npy = y_npy[x_f:, y_f:, z_f:, :] if x_b > 0: y_npy = y_npy[:-x_b, :, :, :] if y_b > 0: y_npy = y_npy[:, :-y_b, :, :] if z_b > 0: y_npy = y_npy[:, :, :-z_b, :] print('y_npy after shrinked:', y_npy.shape) y_npy = y_npy / 100.0 X_patch, y_patch = self.extract_patches(X_npy, y_npy) del X_npy, y_npy if len(X_patch) > 0: X_patches.append(X_patch) y_patches.append(y_patch) print("{} done! of {}".format(X_path, len(self.paths))) print('---------') # import pdb; pdb.set_trace() X_patches = np.concatenate(X_patches) y_patches = np.concatenate(y_patches) ridx = np.arange(len(X_patches)) np.random.shuffle(ridx) X_patches = X_patches[ridx] y_patches = y_patches[ridx] print('X patch shape', X_patches.shape) print('y patch shape', y_patches.shape) return tg.SequentialIterator(X_patches, y_patches, batchsize=self.batchsize)
def train(): learning_rate = 0.001 batchsize = 32 max_epoch = 300 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=3, percent_decrease=0) seq = model() X_train, y_train, X_test, y_test = Mnist(flatten=False, onehot=True, binary=True, datadir='.') iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, 28, 28, 1]) y_ph = tf.placeholder('float32', [None, 10]) y_train_sb = seq.train_fprop(X_ph) y_test_sb = seq.test_fprop(X_ph) train_cost_sb = entropy(y_ph, y_train_sb) test_cost_sb = entropy(y_ph, y_test_sb) test_accu_sb = accuracy(y_ph, y_test_sb) # required for BatchNormalization layer optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_ops = optimizer.minimize(train_cost_sb) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: init = tf.global_variables_initializer() sess.run(init) best_valid_accu = 0 for epoch in range(max_epoch): print('epoch:', epoch) pbar = tg.ProgressBar(len(iter_train)) ttl_train_cost = 0 ttl_examples = 0 print('..training') for X_batch, y_batch in iter_train: feed_dict = {X_ph: X_batch, y_ph: y_batch} _, train_cost = sess.run([train_ops, train_cost_sb], feed_dict=feed_dict) ttl_train_cost += len(X_batch) * train_cost ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_train_cost = ttl_train_cost / float(ttl_examples) print('\ntrain cost', mean_train_cost) ttl_valid_cost = 0 ttl_valid_accu = 0 ttl_examples = 0 pbar = tg.ProgressBar(len(iter_test)) print('..validating') for X_batch, y_batch in iter_test: feed_dict = {X_ph: X_batch, y_ph: y_batch} valid_cost, valid_accu = sess.run([test_cost_sb, test_accu_sb], feed_dict=feed_dict) ttl_valid_cost += len(X_batch) * valid_cost ttl_valid_accu += len(X_batch) * valid_accu ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_valid_cost = ttl_valid_cost / float(ttl_examples) mean_valid_accu = ttl_valid_accu / float(ttl_examples) print('\nvalid cost', mean_valid_cost) print('valid accu', mean_valid_accu) if best_valid_accu < mean_valid_accu: best_valid_accu = mean_valid_accu if es.continue_learning(valid_error=mean_valid_cost, epoch=epoch): print('epoch', epoch) print('best epoch last update:', es.best_epoch_last_update) print('best valid last update:', es.best_valid_last_update) print('best valid accuracy:', best_valid_accu) else: print('training done!') break
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 1000 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.05 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) #X_train, y_train, X_valid, y_valid = Cifar10() X_train, y_train, X_valid, y_valid = data_char() _, h, w, c = X_train.shape _, nclass = y_train.shape # c = 1 # train_embed, test_embed = text_embed(ch_embed_dim, sent_len, word_len) data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) # gan = AuGan(h, w, nclass, bottleneck_dim) gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): # X_oh = ph2onehot(X_ph) # train_mse = tf.reduce_mean((X_ph - G_train_s)**2) # valid_mse = tf.reduce_mean((X_ph - G_valid_s)**2) # gen_train_cost_sb = generator_cost(class_train_sb, judge_train_sb) # gen_valid_cost_sb = generator_cost(class_test_sb, judge_test_sb) gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) # dis_train_cost_sb = discriminator_cost(class_train_sb, judge_train_sb) # dis_valid_cost_sb = disciminator_cost(class_test_sb, judge_test_sb) # gen_train_img = put_kernels_on_grid(G_train_sb, batchsize) # gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm]) # gen_optimizer = tf.train.RMSPropOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) # dis_optimizer = tf.train.RMSPropOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list] init = tf.global_variables_initializer() gan.sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) # import pdb; pdb.set_trace() img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]})) img_writer.flush() img_writer.close() for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 error_writer.reopen() for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run(gen_optimizer, feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch}) fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() error_writer.close() ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) if save_path: # print('\n..saving best model to: {}'.format(save_path)) dname = os.path.dirname(save_path) if not os.path.exists(dname): os.makedirs(dname) print('saved to {}'.format(dname)) # gan.save(save_path) for X_batch, y_batch in data_train: if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict) train_writer = tf.summary.FileWriter('{}/experiment/{}'.format(logdir, epoch)) train_writer.add_summary(G_img) train_writer.flush() train_writer.close() break return save_path
def run_training(s2s, train_encoder_in, train_decoder_tar, train_lead, valid_encoder_in, valid_decoder_tar, vocab_inv, max_twee_len, train_dir, summary, news_con): with s2s.graph.as_default(): encoder_input, en_in_len, encoder_output, encoder_final_state = s2s.encoder() de_out, de_out_len, decoder_logits, decoder_prediction, loss_weight, \ decoder_target, decoder_prediction_inference, lead, lead_out_len, decoder_lead, \ decoder_outputs_train, decoder_state_train=\ s2s.decoder_adv(max_twee_len) vocab_size = len(vocab_inv) # loss, train_op = simple_cost(decoder_target, decoder_logits, vocab_size) loss, train_op, decoder_mask = s2s.multi_cost(args) train_cost = tf.summary.scalar('train_cost', loss) train_cost_his = tf.summary.histogram("histogram_cost", loss) summ_op = tf.summary.merge_all() saver = tf.train.Saver() if args.model_restore == True: saver.restore(s2s.sess, './log2_gru_embed21/pre_check_point-49') else: tf.set_random_seed(1) init = tf.global_variables_initializer() s2s.sess.run(init) cost_dir = train_dir + '/cost' summary_writer = tf.summary.FileWriter(cost_dir, s2s.sess.graph) data_train = tg.SequentialIterator(train_encoder_in, train_decoder_tar, batchsize=int(args.batch_size)) data_valid = tg.SequentialIterator(valid_encoder_in, valid_decoder_tar, batchsize=500) steps = 0 min_epoch_loss = float("inf") min_epoch = 0 best_valid_path = train_dir best_epoch_path = train_dir best_t3_path = train_dir best_t1_path = train_dir best_cat_path = train_dir max_pred = {"max_pred_cat": [], "max_pred_t1": [], "max_pred_t3": []} max_prf = {"max_rf_cat": 0.0, "max_rp_cat": 0.0, "max_rr_cat": 0.0, "max_rf_t1": 0.0, "max_rp_t1": 0.0, "max_rr_t1": 0.0, "max_rf_t3": 0.0, "max_rp_t3": 0.0, "max_rr_t3": 0.0, "max_epoch_cat": 0, "max_epoch_t1": 0, "max_epoch_t3": 0} for epoch in range(int(args.max_epoch)): print('epoch: ', epoch) print('..training') loss_epoch = [] for news_batch, tweet_batch in data_train: # , lead_batch encoder_in, encoder_len, _ = batch(news_batch) decoder_target, decoder_len, de_mask = batch(tweet_batch, max_sequence_length=(max_twee_len + 3)) # lead_target, lead_len = batch(lead_batch, max_twee_len+3) feed_dict = {encoder_input: encoder_in, en_in_len: encoder_len, de_out: decoder_target, de_out_len: decoder_len, decoder_mask: de_mask} # lead: lead_target, lead_out_len: lead_len loss_value, train_op_value, summ = s2s.sess.run([loss, train_op, summ_op], feed_dict) loss_epoch.append(loss_value) summary_writer.add_summary(summ) steps += 1 if steps % 100 == 0 and steps != 0: summary_writer.flush() predict = s2s.sess.run(decoder_prediction, feed_dict) for i, (inp, pred) in enumerate(zip(feed_dict[de_out].T, predict.T)): if i < 10: print('sample {}'.format(i + 1)) inp_text = to_text(inp, vocab_inv) print('input > {}'.format(inp_text)) pred_text = to_text(pred, vocab_inv) print('predict > {}'.format(pred_text)) max_prf, best_cat_path, best_t1_path, best_t3_path = run_inference(s2s, data_valid, encoder_input, en_in_len, decoder_prediction_inference, summary, news_con, vocab_inv, max_prf, max_pred, epoch, saver, train_dir, steps) print('epoch loss: {}'.format(sum(loss_epoch))) if sum(loss_epoch) < min_epoch_loss: min_epoch_loss = sum(loss_epoch) min_epoch = epoch """ saver.save(s2s.sess, train_dir + '/best_epoch_checkpoint', global_step=epoch) print 'Saving Epoch model' best_epoch_path = train_dir + '/best_epoch_checkpoint-' + str(epoch)""" if (epoch - min_epoch) >= 2: break summary_writer.close() # print("*** The best model tested by batch_loss achieved at {} epoch and min_loss is {}! ***". # format(min_epoch, min_epoch_loss)) print("*** The best rouge results are ", max_prf) return best_epoch_path, best_t1_path, best_t3_path
def train(): batchsize = 64 learning_rate = 0.001 max_epoch = 10 X_train = np.random.rand(1000, 32, 32, 3) M_train = np.random.rand(1000, 32, 32, 1) X_valid = np.random.rand(1000, 32, 32, 3) M_valid = np.random.rand(1000, 32, 32, 1) X_ph = tf.placeholder('float32', [None, 32, 32, 3]) M_ph = tf.placeholder('float32', [None, 32, 32, 1]) h, w = 32, 32 model = tg.Sequential() model.add( Conv2D(input_channels=3, num_filters=8, kernel_size=(5, 5), stride=(2, 2), padding='SAME')) h1, w1 = same(h, w, kernel_size=(5, 5), stride=(2, 2)) model.add(RELU()) model.add( Conv2D(input_channels=8, num_filters=16, kernel_size=(5, 5), stride=(2, 2), padding='SAME')) h2, w2 = same(h1, w1, kernel_size=(5, 5), stride=(2, 2)) model.add(RELU()) model.add( Conv2D_Transpose(input_channels=16, num_filters=8, output_shape=(h1, w1), kernel_size=(5, 5), stride=(2, 2), padding='SAME')) model.add(RELU()) model.add( Conv2D_Transpose(input_channels=8, num_filters=1, output_shape=(h, w), kernel_size=(5, 5), stride=(2, 2), padding='SAME')) model.add(RELU()) iter_model = tg.Sequential() iter_model.add( Conv2D(input_channels=1, num_filters=8, kernel_size=(5, 5), stride=(2, 2), padding='SAME')) iter_model.add(RELU()) iter_model.add( Conv2D_Transpose(input_channels=8, num_filters=1, output_shape=(h, w), kernel_size=(5, 5), stride=(2, 2), padding='SAME')) model.add(Iterative(sequential=iter_model, num_iter=10)) M_train_s = model.train_fprop(X_ph) M_valid_s = model.test_fprop(X_ph) train_mse = tf.reduce_mean((M_ph - M_train_s)**2) valid_mse = tf.reduce_mean((M_ph - M_valid_s)**2) data_train = tg.SequentialIterator(X_train, M_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, M_valid, batchsize=batchsize) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_mse) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(max_epoch): print('epoch:', epoch) print('..training') for X_batch, M_batch in data_train: sess.run(optimizer, feed_dict={X_ph: X_batch, M_ph: M_batch}) print('..validating') valid_mse_score = sess.run(valid_mse, feed_dict={ X_ph: X_valid, M_ph: M_valid }) print('valid mse score:', valid_mse_score)
def Vanilla_Classifier(X_train, y_train, X_valid, y_valid, restore): batchsize = 100 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape g = tf.Graph() with g.as_default(): data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) # y_ph = tf.placeholder('float32', [None, nclass]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) dim = int(h*w*c) scope = 'encoder' start = tg.StartNode(input_vars=[X_ph]) h1_Node = tg.HiddenNode(prev=[start], layers=[Sigmoid(), TFBatchNormalization(name= scope + '/vanilla1'), RELU(), Flatten(), Sigmoid(), TFBatchNormalization(name=scope + '/vanilla2')]) h2_Node = tg.HiddenNode(prev=[h1_Node], layers=[Linear(prev_dim=dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) saver = tf.train.Saver() vardir = './var/2' if not os.path.exists(vardir): os.makedirs(vardir) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) tf.set_random_seed(1) init = tf.global_variables_initializer() with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: # print '=======session start' sess.run(init) if restore == 1: re_saver = tf.train.Saver() re_saver.restore(sess, vardir + "/model.ckpt") print("Model restored.") max_epoch = 100 temp_acc = [] for epoch in range(max_epoch): train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) save_path = saver.save(sess, vardir + "/model.ckpt") # print("Model saved in file: %s" % save_path) temp_acc.append(valid_accuracy/float(ttl_examples)) print 'max accuracy is:\t', max(temp_acc)
def make_data(self, img_augment=False): images = glob.glob(self.input_dir + '/Image/*.jpg') X = [] y = [] _IMG_INPUT_DIM_ = (3, 128, 128) c, h, w = _IMG_INPUT_DIM_ if self.load_data: print('..loading data') # with open() as Xin, open() as yin: X = np.load('../data/skin_X.npy') y = np.load('../data/skin_y.npy') else: for imgpath in images: X_img = [] y_img = [] # import pdb; pdb.set_trace() imgx = Image.open(imgpath) imgx = imgx.resize((w, h)) imgx = np.asarray(imgx) / 255.0 lblpath = imgpath.replace('Image', 'Label') lblpath = lblpath.replace('.jpg', '_Segmentation.png') # ddir = os.path.dirname(imgpath) # ddir = os.path.dirname(ddir) # lbldir = ddir + '/Label' # fname = os.path.basename(imgpath) # fname = fname.replace('.jpg', '_Segmentation.png') # fname = fname.replace('.' + args.extension, '') # lblpath = lbldir + '/' + args.label_pattern.replace('%', fname) # lblpath = lbldir + '/' + fname # imgy = cv2.imread(lblpath) imgy = Image.open(lblpath) imgy = imgy.resize((w, h)) imgy = np.asarray(imgy)[:, :, np.newaxis] / 255.0 # import pdb; pdb.set_trace() # imgx = cv2.resize(imgx, (w,h)) # imgy = cv2.resize(imgy, (w,h)) X_img.append(imgx) y_img.append(imgy) if self.img_augment: imh, imw, imc = imgx.shape # scales = [0.5, 0.6, 0.7, 0.8] # scales = [0.5, 0.8] # pts2s = [] # for scale in scales: # pts21 = np.float32([[0,0],[scale*imw,imh],[imw,0],[imw,imh]]) # pts22 = np.float32([[0,0],[0,scale*imh],[imw,0],[imw,imh]]) # pts23 = np.float32([[0,0],[0,imh],[scale*imw,0],[imw,imh]]) # pts24 = np.float32([[0,0],[0,imh],[imw,scale*imh],[imw,imh]]) # pts25 = np.float32([[0,0],[0,imh],[imw,0],[scale*imw,imh]]) # pts26 = np.float32([[0,0],[0,imh],[imw,0],[imw,scale*imh]]) # pts27 = np.float32([[scale*imw,0],[scale*imw,imh],[imw,0],[imw,imh]]) # pts28 = np.float32([[0,scale*imh],[scale*imw,imh],[imw,0],[imw,imh]]) # pts2s += [pts21, pts22, pts23, pts24, pts25, pts26, pts27, pts28] # # # pts1 = np.float32([[0,0],[0,imh],[imw,0],[imw,imh]]) # for pts2 in pts2s: # m = cv2.getPerspectiveTransform(pts1,pts2) # dstx = cv2.warpPerspective(imgx,m,(imw,imh)) # dsty = cv2.warpPerspective(imgy,m,(imw,imh)) # dstx = cv2.resize(dstx, (w,h)) # dsty = cv2.resize(dsty, (w,h)) # X_img.append(dstx) # y_img.append(dsty) rotx = [] roty = [] for imgx, imgy in zip(X_img, y_img): for angle in [90, 180, 270]: m = cv2.getRotationMatrix2D((w / 2, h / 2), angle, 1) dstx = cv2.warpAffine(imgx, m, (w, h)) dsty = cv2.warpAffine(imgy, m, (w, h)) rotx.append(dstx) roty.append(dsty) X_img += rotx y_img += roty X += X_img y += y_img print('lenX', len(X)) print('leny', len(y)) X = np.asarray(X) # # X = np.rollaxis(X, 3, 1) y = np.asarray(y) # # y = np.rollaxis(y, 3, 1) # # import pdb; pdb.set_trace() # with open('../data/skin_X.npy', 'wb') as Xout,\ open('../data/skin_y.npy', 'wb') as yout: np.save(Xout, X) np.save(yout, y) num_train = float(self.train_valid[0]) / sum(self.train_valid) * len(X) num_train = int(num_train) if self.shuffle: print('..shuffling') np.random.seed(1012) shf_idx = np.arange(len(X)) np.random.shuffle(shf_idx) X = X[shf_idx] y = y[shf_idx] train = tg.SequentialIterator(X[:num_train], y[:num_train], batchsize=self.batchsize) valid = tg.SequentialIterator(X[num_train:], y[num_train:], batchsize=self.batchsize) return train, valid
batchsize = 1 # size=3 ####### # Just to train 0 & 1, ignore 2=Other Pathology. Assign 2-->0 # dataY[dataY ==2] = 0 ####### X_train, y_train = dataset.NextBatch3D(10, dataset='train') X_test, y_test = dataset.NextBatch3D(10, dataset='validation') def Rotate3D(data, angle, axis): axis_ = [(1, 2), (0, 2), (0, 1)] return rotate(data, angle, axes=axis_[axis], reshape=False) X_train = np.array([Rotate3D()]) iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize) best_valid_accu = 0 for epoch in range(max_epoch): print('epoch:', epoch) pbar = tg.ProgressBar(len(iter_train)) ttl_train_cost = 0 ttl_examples = 0 print('..training') #for i in range(10): for XX, yy in iter_train: # X_tr = X_train[i] # y_tr = y_train[i] # y_tr = np.array(y_tr, dtype='int8')
def weight_calculation(path): saver.restore(s2s.sess, path) data_tlt = tg.SequentialIterator(encoder_in, decoder_tar, batchsize=128) attn_weight_tlt = [] for news_batch, tweet_batch in data_tlt: encoder_input, encoder_len = batch(news_batch) decoder_target, decoder_len = batch( tweet_batch) # , max_twee_len attn_weight = s2s.run_attn_weight(encoder_input, encoder_len, decoder_target, decoder_len) attn_weight_tlt += attn_weight.tolist() attn = [] if len(clu_sen_len) == len(attn_weight_tlt): for i in range(len(encoder_in)): sen_attn = [] for j in range(len(clu_sen_len[i])): snum = sum(clu_sen_len[i][:j]) enum = snum + clu_sen_len[i][j] sen_attn.append(sum(attn_weight_tlt[i][snum:enum])) attn.append(sen_attn) weight_dict = {} news_idx_clu = {} try: assert len(data_info) == len(attn) except: import pdb pdb.set_trace() for i in range(len(attn)): key = data_info[i].keys()[0] if key in weight_dict: value = weight_dict[key] value.append(attn[i]) weight_dict[key] = value else: value = [] weight_dict[key] = value.append(attn[i]) news_idx_clu[key] = data_info[i][key] top3 = 0 base = 0 tlt = 0 avg_weight_dict = {} twee_top_news = {} for key in weight_dict.keys(): value = np.asarray(weight_dict[key]) clu_twee_num, clu_news_num = value.shape news_idx = news_idx_clu[key] if clu_twee_num > 0: clu_avg_weight = np.divide(np.sum(value, axis=0), clu_twee_num * 1.0) avg_weight_dict[key] = clu_avg_weight news_id = [ m[0] for m in sorted(enumerate(clu_avg_weight), key=lambda x: x[1], reverse=True) ] top_id_idx = news_id[:3] if len(news_id) > 3 else news_id top_id = [ x for i, x in enumerate(news_idx) if i in top_id_idx ] twee_top_news[key] = top_id doc_id, _ = key.split(",") for ele in top_id: rk = min(news_grd_sim_rank[doc_id][ele]) if rk < 3: top3 += 1 tlt += len(top_id) base += 3 else: twee_top_news[key] = [] print "== Precision for tweets vote top 3 news is ", (top3 * 1.0) / tlt print "== Recall for tweets vote top 3 news is ", (top3 * 1.0) / base return twee_top_news
def run_training(s2s, train_encoder_in, train_decoder_tar, valid_encoder_in, valid_decoder_tar, vocab_inv, max_twee_len): with s2s.graph.as_default(): encoder_input, en_in_len, encoder_output, encoder_final_state = s2s.encoder( ) de_out, de_out_len, title_out, first_out, decoder_logits, decoder_prediction, loss_weight, \ decoder_target, decoder_title, decoder_first, decoder_prediction_inference \ , attention_values, attention_keys, decoder_state_train, decoder_context_state_train \ = s2s.decoder_adv(max_twee_len) train_dir = args.log_root + '_' + args.opt + '_plain' if not os.path.exists(train_dir): os.makedirs(train_dir) s2s.build_graph() saver = tf.train.Saver() if args.model_restore: saver.restore(s2s.sess, './log2_gru_embed21/pre_check_point-49') else: tf.set_random_seed(1) init = tf.global_variables_initializer() s2s.sess.run(init) summary_writer = tf.summary.FileWriter(train_dir + '/cost', s2s.sess.graph) data_train = tg.SequentialIterator(train_encoder_in, train_decoder_tar, batchsize=int(args.batch_size)) if args.valid: data_valid = tg.SequentialIterator(valid_encoder_in, valid_decoder_tar, batchsize=int(args.batch_size)) steps = 0 min_valid_loss = float("inf") optimal_step = 0 opt = False min_epoch_loss = float("inf") min_epoch = 0 best_valid_path = train_dir best_epoch_path = train_dir for epoch in range(int(args.max_epoch)): print('epoch: ', epoch) print('..training') loss_epoch = [] for news_batch, tweet_batch in data_train: encoder_input, encoder_len = batch(news_batch) decoder_target, decoder_len = batch( tweet_batch) # , max_twee_len loss, train_op, summ, global_step = s2s.run_train_step( encoder_input, encoder_len, decoder_target, decoder_len) loss_epoch.append(loss) summary_writer.add_summary(summ, global_step) steps += 1 if steps % 100 == 0 and steps != 0: summary_writer.flush() s2s.run_train_result(encoder_input, encoder_len, decoder_target, decoder_len, vocab_inv) valid_loss = [] print 'Step {} for validation '.format(steps) if args.valid: for news_valid, tweet_valid in data_valid: encoder_input, encoder_len = batch(news_valid) decoder_target, decoder_len = batch( tweet_valid, max_twee_len) valid_loss.append( s2s.run_valid_step(encoder_input, encoder_len, decoder_target, decoder_len)) s2s.run_valid_result(encoder_input, encoder_len, decoder_target, decoder_len, vocab_inv) if sum(valid_loss) < min_valid_loss: min_valid_loss = sum(valid_loss) optimal_step = steps saver.save(s2s.sess, train_dir + '/best_valid_checkpoint', global_step=global_step) print 'Saving Valid model' best_valid_path = train_dir + '/best_valid_checkpoint-' + str( global_step) if (steps - optimal_step) % 100 > 10: opt = True break # if opt: # break if opt: break print('epoch loss: {}'.format(sum(loss_epoch))) if loss_epoch < min_epoch_loss: min_epoch_loss = loss_epoch min_epoch = epoch saver.save(s2s.sess, train_dir + '/best_epoch_checkpoint', global_step=epoch) print 'Saving Epoch model' best_epoch_path = train_dir + '/best_epoch_checkpoint-' + str( epoch) summary_writer.close() if args.valid: print("Best running step is ", optimal_step) print("Minimum validation loss is ", min_valid_loss) print("*** Running end after {} epochs and {} iterations!!! ***". format(epoch, steps)) print( "*** The best model tested by batch_loss achieved at {} epoch ! ***" .format(min_epoch)) return best_valid_path, best_epoch_path, train_dir
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 2 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.1 # 20170616_1459: 0.05 20170616_1951: 0.01 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) X_train, y_train, X_valid, y_valid = Mnist() AuX_train = X_train Auy_train = y_train aux = np.empty((0, 28, 28, 1), 'float32') auy = np.empty((0, 10), 'int32') # 0617_1346: 0.05 #0619_1033: 0.01 0619_1528:0.1 0619_1944: 0.3 # X_train, y_train, X_valid, y_valid = Cifar100() # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) print '\n====== Before augment data size ', X_train.shape , ' ======\n' gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator() # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm]) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list] init = tf.global_variables_initializer() gan.sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]})) img_writer.flush() img_writer.close() for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 error_writer.reopen() for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run(gen_optimizer, feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch}) fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() error_writer.close() ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) if save_path and epoch == max_epoch-1: # print('\n..saving best model to: {}'.format(save_path)) dname = os.path.dirname(save_path) if not os.path.exists(dname): os.makedirs(dname) print('saved to {}'.format(dname)) train_writer = tf.summary.FileWriter('{}/experiment/{}'.format(logdir, epoch)) for X_batch, y_batch in data_train: #import pdb; pdb.set_trace() if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} G_train, G_img, fake_dis = gan.sess.run([G_train_sb, gen_train_mg, fake_train], feed_dict=feed_dict) fake_class_dis, fake_judge_dis = fake_dis idx = [i for i,v in enumerate(fake_judge_dis) if v>0.5] aux = np.concatenate((aux, G_train[idx]), axis = 0) auy = np.concatenate((auy, fake_class_dis[idx]), axis = 0) AuX_train = np.concatenate((G_train, AuX_train), axis = 0) Auy_train = np.concatenate((y_batch, Auy_train), axis = 0) # temp_data = zip(G_img, y_batch) # aug_data.append(temp_data) train_writer.add_summary(G_img) train_writer.flush() train_writer.close() xname = 'genx.npy' yname = 'geny.npy' np.save('{}/{}'.format(logdir, xname), aux) np.save('{}/{}'.format(logdir, yname), auy) print '\n====== Augment data size ', AuX_train.shape , ' ======\n' print '\n====== Augment data size ', Auy_train.shape , ' ======\n' return save_path, X_train, y_train, X_valid, y_valid, AuX_train, Auy_train, aux, auy
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 enc_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 100 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.1 # 20170616_1459: 0.05 20170616_1951: 0.01 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) X_train, y_train, X_valid, y_valid = Mnist() # X_train, y_train, X_valid, y_valid = X_train[0:10000], y_train[0:10000], X_valid[0:10000], y_valid[0:10000] # 0617_1346: 0.05 #0619_1033: 0.01 0619_1528:0.1 0619_1944: 0.3 # X_train, y_train, X_valid, y_valid = Cifar100() # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list, G_train_enc, G_test_enc, G_train_embed, G_test_embed = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator() # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) enc_train_cost_sb = encoder_cost(y_ph, G_train_enc) gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) enc_train_cost_sm = tf.summary.scalar('enc_cost', enc_train_cost_sb) cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm, enc_train_cost_sm]) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) enc_optimizer = tf.train.AdamOptimizer(enc_learning_rate).minimize(enc_train_cost_sb) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list] # embedding_var = tf.Variable(tf.zeros([60000, 300]), trainable=False, name="embedding") # prepare projector config # summary_writer = tf.summary.FileWriter(logdir) # saver = tf.train.Saver([embedding_var]) init = tf.global_variables_initializer() gan.sess.run(init) # es = tg.EarlyStopper(max_epoch=max_epoch, # epoch_look_back=epoch_look_back, # percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]})) img_writer.flush() img_writer.close() #embed = gan.sess.graph.get_tensor_by_name('Generator/genc4') # Create metadata # embeddir = logdir # if not os.path.exists(embeddir): # os.makedirs(embeddir) # metadata_path = os.path.join(embeddir, 'metadata.tsv') temp_acc = [] for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 ttl_enc_cost = 0 error_writer.reopen() if epoch == max_epoch-1: output = np.empty([0,300], 'float32') labels = np.empty([0,10], 'int32') # metadata = open(metadata_path, 'w') # metadata.write("Name\tLabels\n") for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([enc_optimizer, gen_optimizer], feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch}) fake_judge_v, cost_train,enc_cost, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, enc_train_cost_sb,gen_train_cost_sb,dis_train_cost_sb], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) ttl_enc_cost += enc_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() if epoch == max_epoch-1: results = gan.sess.run(G_train_embed, feed_dict = {real_ph:X_batch, y_ph:y_batch}) output = np.concatenate((output, results), axis = 0) labels = np.concatenate((labels, y_batch), axis = 0) # import pdb; pdb.set_trace() # for x_row, y_row in zip(X_batch, y_batch): # metadata.write('{}\t{}\n'.format(x_row, y_row)) # metadata.close() error_writer.close() # import pdb; pdb.set_trace() # for ot in output: # temp = tf.stack(ot, axis = 0) #embedding_var = tf.Variable(temp) # sess.run(tf.variables_initializer([embedding_var])) # saver.save(gan.sess, os.path.join(embeddir, 'model.ckpt')) # config = projector.ProjectorConfig() # embedding = config.embeddings.add() # embedding.tensor_name = embedding_var.name # embedding.metadata_path = metadata_path # save embedding_var # projector.visualize_embeddings(summary_writer, config) ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp mean_enc_cost = ttl_enc_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) print('enc train dis cost:', mean_enc_cost) lab = [] if epoch == max_epoch-1: embeddir = './genData/3' if not os.path.exists(embeddir): os.makedirs(embeddir) lab = np.nonzero(labels)[1] np.save(embeddir + 'embed.npy', output) np.save(embeddir + 'label.npy', lab) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {real_ph:X_batch, y_ph:y_batch} valid_outs = gan.sess.run(G_test_enc, feed_dict=feed_dict) valid_error += total_mse([valid_outs], [ys])[0] valid_accuracy += total_accuracy([valid_outs], [ys])[0] ttl_examples += len(X_batch) temp_acc.append(valid_accuracy/float(ttl_examples)) print 'max accuracy is:\t', max(temp_acc) print 'max accuracy is:\t', max(temp_acc) return save_path
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 100 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.3 # 20170616_1459: 0.05 20170616_1951: 0.01 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) X_train, y_train, X_valid, y_valid = Mnist() # 0617_1346: 0.05 #0619_1033: 0.01 0619_1528:0.1 0619_1944: 0.3 # X_train, y_train, X_valid, y_valid = Cifar100() # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator( ) # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) cost_train_mg = tf.summary.merge( [gen_train_cost_sm, dis_train_cost_sm]) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize( gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize( dis_train_cost_sb, var_list=dis_var_list) clip_D = [ p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list ] init = tf.global_variables_initializer() gan.sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) img_writer.add_summary( orig_sm.eval(session=gan.sess, feed_dict={real_ph: data_train[:100].data[0]})) img_writer.flush() img_writer.close() for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 error_writer.reopen() batch_iter = 1 for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform( -1, 1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = { noise_ph: noise, real_ph: X_batch, y_ph: y_batch } gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform( -1, 1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = { noise_ph: noise, real_ph: X_batch, y_ph: y_batch } gan.sess.run(gen_optimizer, feed_dict={ noise_ph: noise, real_ph: X_batch, y_ph: y_batch }) if batch_iter == 1: G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict) gen_writer = tf.summary.FileWriter( '{}/generator/{}'.format(logdir, epoch)) gen_writer.add_summary(G_img) gen_writer.flush() gen_writer.close() batch_iter = 0 fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run( [ fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb ], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() error_writer.close() ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) if save_path: # print('\n..saving best model to: {}'.format(save_path)) dname = os.path.dirname(save_path) if not os.path.exists(dname): os.makedirs(dname) print('saved to {}'.format(dname)) # gan.save(save_path) for X_batch, y_batch in data_train: if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform( -1, 1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = { noise_ph: noise, real_ph: X_batch, y_ph: y_batch } # print '---- Before ----' # print '--Number of threads running ', threading.active_count() G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict) train_writer = tf.summary.FileWriter( '{}/experiment/{}'.format(logdir, epoch)) # print '---- After ----' # print '--Number of threads running ', threading.active_count() train_writer.add_summary(G_img) train_writer.flush() train_writer.close() break return save_path
def train(): batchsize = 64 learning_rate = 0.001 max_epoch = 100 # batch x depth x height x width x channel X_train = np.random.rand(1000, 20, 32, 32, 1) M_train = np.random.rand(1000, 20, 32, 32, 1) X_valid = np.random.rand(1000, 20, 32, 32, 1) M_valid = np.random.rand(1000, 20, 32, 32, 1) X_ph = tf.placeholder('float32', [None, 20, 32, 32, 1]) M_ph = tf.placeholder('float32', [None, 20, 32, 32, 1]) h, w = 32, 32 model = tg.Sequential() # iter_model = tg.Sequential() model.add( Conv3D(input_channels=1, num_filters=8, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding='SAME')) model.add(RELU()) model.add( Conv3D(input_channels=8, num_filters=1, kernel_size=(5, 5, 5), stride=(1, 1, 1), padding='SAME')) # iter_model.add(RELU()) # model.add(Iterative(sequential=iter_model, num_iter=1)) model.add(Sigmoid()) M_train_s = model.train_fprop(X_ph) M_valid_s = model.test_fprop(X_ph) train_mse = tf.reduce_mean((M_ph - M_train_s)**2) valid_mse = tf.reduce_mean((M_ph - M_valid_s)**2) # train_mse = entropy(M_ph, M_train_s) # valid_mse = entropy(M_ph, M_valid_s) valid_f1 = image_f1(tf.to_int32(M_ph), tf.to_int32(M_valid_s >= 0.5)) data_train = tg.SequentialIterator(X_train, M_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, M_valid, batchsize=batchsize) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_mse) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) for epoch in range(max_epoch): print('epoch:', epoch) print('..training') pbar = ProgressBar(len(data_train)) n_exp = 0 for X_batch, M_batch in data_train: pbar.update(n_exp) sess.run(optimizer, feed_dict={X_ph: X_batch, M_ph: M_batch}) n_exp += len(X_batch) print('..validating') valid_f1_score, valid_mse_score = sess.run([valid_f1, valid_mse], feed_dict={ X_ph: X_valid, M_ph: M_valid }) print('valid mse score:', valid_mse_score) print('valid f1 score:', valid_f1_score)
def Encoder_Classifier(X_train, y_train, X_valid, y_valid, restore): batchsize = 100 learning_rate = 0.001 _, h, w, c = X_train.shape _, nclass = y_train.shape g = tf.Graph() with g.as_default(): data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_phs = [] for comp in [nclass]: y_phs.append(tf.placeholder('float32', [None, comp])) start = tg.StartNode(input_vars=[X_ph]) h1, w1 = valid(h, w, filters=(5,5), strides=(1,1)) h2, w2 = valid(h1, w1, filters=(5,5), strides=(2,2)) h3, w3 = valid(h2, w2, filters=(5,5), strides=(2,2)) flat_dim = int(h3*w3*32) scope = 'encoder' bottleneck_dim = 300 enc_hn = tg.HiddenNode(prev=[start], layers=[Conv2D(input_channels=c, num_filters=32, kernel_size=(5,5), stride=(1,1), padding='VALID'), TFBatchNormalization(name=scope + '/genc1'), RELU(), Conv2D(input_channels=32, num_filters=32, kernel_size=(5,5), stride=(2,2), padding='VALID'), TFBatchNormalization(name=scope + '/genc2'), RELU(), Conv2D(input_channels=32, num_filters=32, kernel_size=(5,5), stride=(2,2), padding='VALID'), TFBatchNormalization(name=scope + '/genc3'), RELU(), Flatten(), Linear(flat_dim, 300), TFBatchNormalization(name=scope + '/genc4'), RELU(), Linear(300, bottleneck_dim), Tanh() ]) h2_Node = tg.HiddenNode(prev=[enc_hn], layers=[Linear(prev_dim=bottleneck_dim, this_dim=nclass), Softmax()]) end_nodes = [tg.EndNode(prev=[h2_Node])] graph = Graph(start=[start], end=end_nodes) train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] # import pdb; pdb.set_trace() for y_ph, out in zip(y_phs, train_outs_sb): #ttl_mse.append(tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y_ph, out))) ttl_mse.append(tf.reduce_mean((y_ph-out)**2)) mse = sum(ttl_mse) #optimizer = tf.train.GradientDescentOptimizer(learning_rate).minimize(mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) gpu_options=tf.GPUOptions(per_process_gpu_memory_fraction=0.9) # saver_init = tf.train.Saver() saver = tf.train.Saver() vardir = './var/1' if not os.path.exists(vardir): os.makedirs(vardir) tf.set_random_seed(1) init = tf.global_variables_initializer() with tf.Session(config = tf.ConfigProto(gpu_options = gpu_options)) as sess: sess.run(init) if restore == 1: re_saver = tf.train.Saver() re_saver.restore(sess, vardir + "/model.ckpt") print("Model restored.") # save_path = saver_init.save(sess, vardir + "/init.ckpt") # print("Model saved in file: %s" % save_path) max_epoch = 2 temp_acc = [] for epoch in range(max_epoch): # print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_train: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch # import pdb; pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, [ys])[0] train_accuracy += total_accuracy(train_outs, [ys])[0] ttl_examples += len(X_batch) valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in data_valid: feed_dict = {X_ph:X_batch} for y_ph, y_batch in zip(y_phs, [ys]): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, [ys])[0] valid_accuracy += total_accuracy(valid_outs, [ys])[0] ttl_examples += len(X_batch) temp_acc.append(valid_accuracy/float(ttl_examples)) save_path = saver.save(sess, vardir + "/model.ckpt") print("Model saved in file: %s" % save_path) print 'max accuracy is:\t', max(temp_acc)
# plot(X_gen_val[10,:,:,0]) # plot(X_gen_val[10,:,:,0]) val = np.mean(z_val ** 2) print('mean z val:', np.sqrt(val)) # if z_grad.length < epsilon: # break X_gen_val = sess.run(X_gen_sb, feed_dict={z_ph:z_val}) X_data, X_gen_data, y_data = extract_pos_neg_patches(X_val, X_gen_val, patch_size=patch_size, max_patches=max_patches) X_train, X_valid = tg.utils.split_arr(X_data, train_valid_ratio=[5,1]) X_gen_train, X_gen_valid = tg.utils.split_arr(X_gen_data, train_valid_ratio=[5,1]) y_train, y_valid = tg.utils.split_arr(y_data, train_valid_ratio=[5,1]) train_iter = tg.SequentialIterator(X_train, X_gen_train, y_train, batchsize=32) valid_iter = tg.SequentialIterator(X_valid, X_gen_valid, y_valid, batchsize=32) print('training classifier') n_exp = 0 for X1_batch, X2_batch, y_batch in train_iter: _, cls_train_cost = sess.run([train_op, cls_train_cost_sb], feed_dict={X1_batch, X2_batch, y_batch}) ttl_cls_train_cost += cls_train_cost * len(X1_batch) n_exp += len(X1_batch) ttl_cls_train_cost /= float(n_exp) run_train_cost = 0.1 * ttl_cls_train_cost + 0.9 * run_train_cost print('validating classifier') n_exp = 0 for X1_batch, X2_batch, y_batch in valid_iter:
def train(): learning_rate = 0.001 batchsize = 64 max_epoch = 300 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=None, percent_decrease=0) X_train, y_train, X_test, y_test = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape seq = model(nclass=nclass, h=h, w=w, c=c) iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_ph = tf.placeholder('float32', [None, nclass]) y_train_sb = seq.train_fprop(X_ph) y_test_sb = seq.test_fprop(X_ph) train_cost_sb = entropy(y_ph, y_train_sb) test_cost_sb = entropy(y_ph, y_test_sb) test_accu_sb = accuracy(y_ph, y_test_sb) # required for BatchNormalization layer optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_ops = optimizer.minimize(train_cost_sb) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) best_valid_accu = 0 for epoch in range(max_epoch): print('epoch:', epoch) pbar = tg.ProgressBar(len(iter_train)) ttl_train_cost = 0 ttl_examples = 0 print('..training') for X_batch, y_batch in iter_train: feed_dict = {X_ph: X_batch, y_ph: y_batch} _, train_cost = sess.run([train_ops, train_cost_sb], feed_dict=feed_dict) ttl_train_cost += len(X_batch) * train_cost ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_train_cost = ttl_train_cost / float(ttl_examples) print('\ntrain cost', mean_train_cost) ttl_valid_cost = 0 ttl_valid_accu = 0 ttl_examples = 0 pbar = tg.ProgressBar(len(iter_test)) print('..validating') for X_batch, y_batch in iter_test: feed_dict = {X_ph: X_batch, y_ph: y_batch} valid_cost, valid_accu = sess.run([test_cost_sb, test_accu_sb], feed_dict=feed_dict) ttl_valid_cost += len(X_batch) * valid_cost ttl_valid_accu += len(X_batch) * valid_accu ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_valid_cost = ttl_valid_cost / float(ttl_examples) mean_valid_accu = ttl_valid_accu / float(ttl_examples) print('\nvalid cost', mean_valid_cost) print('valid accu', mean_valid_accu) if best_valid_accu < mean_valid_accu: best_valid_accu = mean_valid_accu if es.continue_learning(valid_error=mean_valid_cost, epoch=epoch): print('epoch', epoch) print('best epoch last update:', es.best_epoch_last_update) print('best valid last update:', es.best_valid_last_update) print('best valid accuracy:', best_valid_accu) else: print('training done!') break