import model # all model #from scipy.misc import imsave import numpy as np #from scipy.ndimage.interpolation import rotate if __name__ == '__main__': #print(sys.argv[0]) # input from terminal #print(sys.argv[1]) # input from terminal #print(sys.argv[2]) # input from terminal learning_rate = 0.001 max_epoch = 150 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=4, percent_decrease=0) dataset = HVSMRdataset('./datasetHVSMR16Heart') assert dataset.AbleToRetrieveData( ), 'not able to locate the directory of dataset' dataset.InitDataset(splitRatio=1.0, shuffle=True) # Take everything 100% X_ph = tf.placeholder('float32', [None, 181, 239, 165, 1]) #float32 y_ph = tf.placeholder('uint8', [None, 181, 239, 165, 1]) #X_ph = tf.placeholder('float32', [None, None, None, None, 1]) #float32 #y_ph = tf.placeholder('uint8', [None, None, None, None, 1]) y_ph_cat = tf.one_hot( y_ph, 3) # --> unstack into 3 categorical Tensor [?, 84, 256, 256, 1, 3] y_ph_cat = y_ph_cat[:, :, :, :, 0, :]
def train(): ### params sent_len = 50 word_len = 20 ch_embed_dim = 100 unicode_size = 128 tfidf_dim = 1000 tfidf_embed_dim = 1000 fc_dim = 1000 batchsize = 32 train_valid_ratio = [5, 1] learning_rate = 0.001 # components = [len(np.unique(val)) for val in comps] # components = [65, 454, 983, 892, 242, 6] # components = [42] # components = [65] # num_train = 10000 num_train = 10000 components = [65] train_X, valid_X, train_ys, valid_ys = infodocs(num_train, word_len, sent_len, components) # num_train = 560000 # train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len) #num_train=16000 #components = [20] #train_X, valid_X, train_ys, valid_ys = twenty_newsgroup(num_train, word_len, sent_len, components, use_sean=True) #num_train = 20000 #train_X, valid_X, train_ys, valid_ys, components = BASF_char(num_train,word_len, sent_len) print 'num train', len(train_X) print 'num valid', len(valid_X) trainset_X = SequentialIterator(train_X, batchsize=batchsize) trainset_y = SequentialIterator(*train_ys, batchsize=batchsize) validset_X = SequentialIterator(valid_X, batchsize=batchsize) validset_y = SequentialIterator(*valid_ys, batchsize=batchsize) ### define placeholders X_ph = tf.placeholder('int32', [None, sent_len, word_len]) y_phs = [] for comp in components: y_phs.append(tf.placeholder('float32', [None, comp])) ### define the graph model structure start = StartNode(input_vars=[X_ph]) # character CNN embed_n = HiddenNode(prev=[start], layers=[ Reshape(shape=(-1, word_len)), Embedding(cat_dim=unicode_size, encode_dim=ch_embed_dim, zero_pad=True), Reshape(shape=(-1, ch_embed_dim, word_len, 1)) ]) h1, w1 = valid(ch_embed_dim, word_len, strides=(1, 1), filters=(ch_embed_dim, 4)) conv1_n = HiddenNode(prev=[embed_n], layers=[ Conv2D(input_channels=1, num_filters=10, padding='VALID', kernel_size=(ch_embed_dim, 4), stride=(1, 1)), RELU(), Flatten(), Linear(int(h1 * w1 * 10), 1000), RELU(), Reshape((-1, sent_len, 1000)), ReduceSum(1), BatchNormalization(layer_type='fc', dim=1000, short_memory=0.01) ]) # conv2_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,2), stride=(1,1)), # Squeeze()]) # h2, w2 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,2)) # conv3_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,3), stride=(1,1)), # Squeeze()]) # h3, w3 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,3)) # # conv4_n = HiddenNode(prev=[embed_n], layers=[Conv2D(input_channels=1, num_filters=1, padding='VALID', # kernel_size=(ch_embed_dim,4), stride=(1,1)), # Squeeze()]) # h4, w4 = valid(ch_embed_dim, word_len, strides=(1,1), filters=(ch_embed_dim,4)) # concat_n = HiddenNode(prev=[conv1_n, conv2_n, conv3_n, conv4_n], # input_merge_mode=Concat(), layers=[RELU()]) # concat_n = HiddenNode(prev=[conv1_n, conv2_n], # input_merge_mode=Concat(), layers=[RELU()]) # fc_n = HiddenNode(prev=[concat_n], layers=[Linear(int(w1+w2), fc_dim), Sigmoid()]) # # # TF-IDF Embedding # words_combined_layer = WordsCombined(this_dim=tfidf_dim, mode='sum') # words_combined_n = HiddenNode(prev=[fc_n], # layers=[Linear(prev_dim=fc_dim, this_dim=tfidf_dim), Sigmoid(), # Reshape(shape=(-1, sent_len, tfidf_dim)), # words_combined_layer, # BatchNormalization(dim=tfidf_dim, layer_type='fc', short_memory=0.01)]) out_n = HiddenNode( prev=[conv1_n], layers=[Linear(prev_dim=1000, this_dim=components[0]), Softmax()]) # # hierachical softmax # prev_dim = components[0] # prev_node = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Softmax()]) # end_nodes = [] # end_nodes.append(EndNode(prev=[prev_node])) # for this_dim in components[1:]: # top_connect = HiddenNode(prev=[out_n], layers=[Linear(tfidf_embed_dim, prev_dim), Sigmoid()]) # prev_node = HiddenNode(prev=[prev_node, top_connect], layers=[Linear(prev_dim, this_dim), Softmax()]) # end_nodes.append(EndNode(prev=[prev_node])) # prev_dim = this_dim end_nodes = [EndNode(prev=[out_n])] graph = Graph(start=[start], end=end_nodes) # import pdb; pdb.set_trace() train_outs_sb = graph.train_fprop() test_outs = graph.test_fprop() ttl_mse = [] accus = [] for y_ph, out in zip(y_phs, train_outs_sb): ttl_mse.append(tf.reduce_mean((y_ph - out)**2)) pos = tf.reduce_sum((y_ph * out)) accus.append(pos) mse = sum(ttl_mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(mse) with tf.Session() as sess: init = tf.initialize_all_variables() sess.run(init) max_epoch = 50 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=3, percent_decrease=0.1) temp_acc = [] for epoch in range(max_epoch): print 'epoch:', epoch train_error = 0 train_accuracy = 0 ttl_examples = 0 for X_batch, ys in zip(trainset_X, trainset_y): feed_dict = {X_ph: X_batch[0]} for y_ph, y_batch in zip(y_phs, ys): feed_dict[y_ph] = y_batch sess.run(optimizer, feed_dict=feed_dict) train_outs = sess.run(train_outs_sb, feed_dict=feed_dict) train_error += total_mse(train_outs, ys)[0] train_accuracy += total_accuracy(train_outs, ys)[0] ttl_examples += len(X_batch[0]) print 'train mse', train_error / float(ttl_examples) print 'train accuracy', train_accuracy / float(ttl_examples) # print 'outputs' # ypred_train = sess.run(outs, feed_dict=feed_dict) # # print 'ypreds' # ypred = np.argmax(ypred_train[0],axis=1) # print ypred # print 'ylabels' # ylabel = np.argmax(ys[0],axis=1) # print ylabel # print 'mse' # print np.mean((ypred_train[0] - ys[0])**2) # for v in graph.variables: # print v.name, # print 'mean:', np.mean(np.abs(sess.run(v))) # print 'std:', np.std(sess.run(v)) # print sess.run(v) # print '---------------------------------' # import pdb; pdb.set_trace() # ypreds = [] # print 'words_combined_layer in',sess.run(tf.reduce_mean(words_combined_layer.train_in, reduction_indices=0), feed_dict=feed_dict) # print 'words_combined_layer out',sess.run(tf.reduce_mean(words_combined_layer.train_out, reduction_indices=0), feed_dict=feed_dict) # # for out in outs: # ypreds.append(sess.run(out, feed_dict=feed_dict)) # accus = [] # for y_batch, ypred_batch in zip(ys, ypreds): # accu = accuracy_score(y_batch.argmax(axis=1), ypred_batch.argmax(axis=1)) # accus.append(accu) # print accus # import pdb; pdb.set_trace() # train_error = sess.run(mse, feed_dict=feed_dict) # print 'train error:', train_error # for accu in accus: # train_pos = sess.run(, feed_dict=feed_dict) # print sess.run(embed._W[0,:]) # # print sess.run(embed.embedding[0,:]) # print '--------------' # import pdb; pdb.set_trace() # train_error = sess.run(mse, feed_dict=feed_dict) # print 'train error:', train_error valid_error = 0 valid_accuracy = 0 ttl_examples = 0 for X_batch, ys in zip(validset_X, validset_y): feed_dict = {X_ph: X_batch[0]} for y_ph, y_batch in zip(y_phs, ys): feed_dict[y_ph] = y_batch valid_outs = sess.run(test_outs, feed_dict=feed_dict) valid_error += total_mse(valid_outs, ys)[0] valid_accuracy += total_accuracy(valid_outs, ys)[0] ttl_examples += len(X_batch[0]) print 'valid mse', valid_error / float(ttl_examples) print 'valid accuracy', valid_accuracy / float(ttl_examples) temp_acc.append(valid_accuracy / float(ttl_examples)) print 'average accuracy is:\t', sum(temp_acc) / len(temp_acc)
def train(model, data, epoch_look_back=5, max_epoch=100, percent_decrease=0, batch_size=64, learning_rate=0.001, weight_regularize=True, save_dir=None, restore=False): if save_dir: logdir = '{}/log'.format(save_dir) if not os.path.exists(logdir): os.makedirs(logdir) model_dir = "{}/model".format(save_dir) if not os.path.exists(model_dir): os.makedirs(model_dir) train_tf, n_train, valid_tf, n_valid = data(create_tfrecords=True, batch_size=batch_size) y_train_sb = model._train_fprop(train_tf['X']) y_valid_sb = model._test_fprop(valid_tf['X']) loss_train_sb = tg.cost.mse(y_train_sb, train_tf['y']) if weight_regularize: loss_reg = tc.layers.apply_regularization( tc.layers.l2_regularizer(2.5e-5), weights_list=[ var for var in tf.global_variables() if __MODEL_VARSCOPE__ in var.name ]) loss_train_sb = loss_train_sb + loss_reg accu_train_sb = tg.cost.accuracy(y_train_sb, train_tf['y']) accu_valid_sb = tg.cost.accuracy(y_valid_sb, valid_tf['y']) tf.summary.scalar('train', accu_train_sb) if save_dir: sav_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=__MODEL_VARSCOPE__ + '/TemplateModel') saver = tf.train.Saver(sav_vars) # opt = tf.train.RMSPropOptimizer(learning_rate) opt = tf.train.AdamOptimizer(learning_rate) # opt = hvd.DistributedOptimizer(opt) # required for BatchNormalization layer update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_op = opt.minimize(loss_train_sb) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) # bcast = hvd.broadcast_global_variables(0) # Pin GPU to be used to process local rank (one GPU per process) config = tf.ConfigProto() config.gpu_options.allow_growth = True # config.gpu_options.visible_device_list = str(hvd.local_rank()) with tf.Session(config=config) as sess: coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) sess.run(init_op) if restore: logger.info('restoring model') saver.restore(sess, restore) train_writer = tf.summary.FileWriter('{}/train'.format(logdir), sess.graph) # bcast.run() # merge = tf.summary.merge_all() es = tg.EarlyStopper(max_epoch, epoch_look_back, percent_decrease) epoch = 0 best_valid_accu = 0 while True: epoch += 1 pbar = tg.ProgressBar(n_train) ttl_train_loss = 0 for i in range(0, n_train, batch_size): pbar.update(i) _, loss_train = sess.run([train_op, loss_train_sb]) # _, loss_train, merge_v = sess.run([train_op, loss_train_sb, merge]) ttl_train_loss += loss_train * batch_size # train_writer.add_summary(merge_v, i) pbar.update(n_train) ttl_train_loss /= n_train print('') logger.info('epoch {}, train loss {}'.format( epoch, ttl_train_loss)) pbar = tg.ProgressBar(n_valid) ttl_valid_accu = 0 for i in range(0, n_valid, batch_size): pbar.update(i) loss_accu = sess.run(accu_valid_sb) ttl_valid_accu += loss_accu * batch_size pbar.update(n_valid) ttl_valid_accu /= n_valid print('') logger.info('epoch {}, valid accuracy {}'.format( epoch, ttl_valid_accu)) if es.continue_learning(-ttl_valid_accu, epoch=epoch): logger.info('best epoch last update: {}'.format( es.best_epoch_last_update)) logger.info('best valid last update: {}'.format( es.best_valid_last_update)) if ttl_valid_accu > best_valid_accu: best_valid_accu = ttl_valid_accu if save_dir: save_path = saver.save(sess, model_dir + '/model.tf') print("Best model saved in file: %s" % save_path) else: logger.info('training done!') break coord.request_stop() coord.join(threads)
def train(dt): batchsize = 32 learning_rate = 0.005 max_epoch = 1000 epoch_look_back = 3 percent_decrease = 0.0 min_density = 0.01 num_patch_per_img = 200 threshold = 0.6 # dt = datetime.now() # dt = dt.strftime('%Y%m%d_%H%M_%S%f') dt = './save/' + dt if not os.path.exists(dt): os.makedirs(dt) save_path = dt + '/model.tf' # blks_train, blks_valid, valid_paths = datablks(depth, height, width, batchsize, min_density, num_patch_per_img) train_iter, valid_iter, max_shape = fullimage() depth, height, width = max_shape X_ph = tf.placeholder('float32', [None, depth, height, width, 1]) M_ph = tf.placeholder('float32', [None, depth, height, width, 1]) seq = model() M_train_s = seq.train_fprop(X_ph) M_valid_s = seq.test_fprop(X_ph) train_cost = tf.reduce_mean((M_ph - M_train_s)**2) train_iou = iou(M_ph, tf.to_float(M_train_s > threshold)) train_f1 = tg.cost.image_f1(M_ph, tf.to_float(M_train_s > threshold)) # train_cost = iou(M_ph, M_train_s) valid_cost = tf.reduce_mean((M_ph - M_valid_s)**2) valid_iou = iou(M_ph, tf.to_float(M_valid_s > threshold)) valid_f1 = tg.cost.image_f1(M_ph, tf.to_float(M_valid_s > threshold)) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_cost) with tf.Session() as sess: saver = tf.train.Saver() init = tf.global_variables_initializer() sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') pbar = ProgressBar(len(train_iter)) n_exp = 0 train_mse_score = 0 train_iou_score = 0 train_f1_score = 0 # for data_train in blks_train: # for X_batch, M_batch in blks_train: for X_batch, M_batch, shapes in train_iter: feed_dict = {X_ph: X_batch, M_ph: M_batch} import pdb pdb.set_trace() sess.run(optimizer, feed_dict=feed_dict) train_mse_score += sess.run(train_cost, feed_dict=feed_dict) * len(X_batch) train_iou_score += sess.run(train_iou, feed_dict=feed_dict) * len(X_batch) train_f1_score += sess.run(train_f1, feed_dict=feed_dict) * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) train_mse_score /= n_exp print('average patch train mse:', train_mse_score) train_iou_score /= n_exp print('average patch train iou:', train_iou_score) train_f1_score /= n_exp print('average patch train f1:', train_f1_score) print('..validating') pbar = ProgressBar(len(valid_iter)) n_exp = 0 valid_mse_score = 0 # for data_valid in blks_valid: valid_f1_score = 0 valid_iou_score = 0 for X_batch, M_batch, shapes in valid_iter: feed_dict = {X_ph: X_batch, M_ph: M_batch} valid_mse_score += sess.run(valid_cost, feed_dict=feed_dict) * len(X_batch) valid_iou_score += sess.run(valid_iou, feed_dict=feed_dict) * len(X_batch) valid_f1_score += sess.run(valid_f1, feed_dict=feed_dict) * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) valid_mse_score /= n_exp print('average patch valid mse:', valid_mse_score) valid_iou_score /= n_exp print('average patch valid iou:', valid_iou_score) valid_f1_score /= n_exp print('average patch valid f1:', valid_f1_score) ############################[ Testing ]############################# # if epoch % 10 == 0: # print('full image testing') # test(valid_paths, depth, height, width, M_valid_s, sess, threshold) if es.continue_learning(valid_error=valid_mse_score): print('epoch', epoch) print('valid error so far:', valid_mse_score) print('best epoch last update:', es.best_epoch_last_update) print('best valid last update:', es.best_valid_last_update) saver.save(sess, save_path) print('model saved to:', save_path) else: print('training done!') break
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 1000 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.05 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) #X_train, y_train, X_valid, y_valid = Cifar10() X_train, y_train, X_valid, y_valid = data_char() _, h, w, c = X_train.shape _, nclass = y_train.shape # c = 1 # train_embed, test_embed = text_embed(ch_embed_dim, sent_len, word_len) data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) # gan = AuGan(h, w, nclass, bottleneck_dim) gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): # X_oh = ph2onehot(X_ph) # train_mse = tf.reduce_mean((X_ph - G_train_s)**2) # valid_mse = tf.reduce_mean((X_ph - G_valid_s)**2) # gen_train_cost_sb = generator_cost(class_train_sb, judge_train_sb) # gen_valid_cost_sb = generator_cost(class_test_sb, judge_test_sb) gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) # dis_train_cost_sb = discriminator_cost(class_train_sb, judge_train_sb) # dis_valid_cost_sb = disciminator_cost(class_test_sb, judge_test_sb) # gen_train_img = put_kernels_on_grid(G_train_sb, batchsize) # gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm]) # gen_optimizer = tf.train.RMSPropOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) # dis_optimizer = tf.train.RMSPropOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list] init = tf.global_variables_initializer() gan.sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) # import pdb; pdb.set_trace() img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]})) img_writer.flush() img_writer.close() for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 error_writer.reopen() for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run(gen_optimizer, feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch}) fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() error_writer.close() ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) if save_path: # print('\n..saving best model to: {}'.format(save_path)) dname = os.path.dirname(save_path) if not os.path.exists(dname): os.makedirs(dname) print('saved to {}'.format(dname)) # gan.save(save_path) for X_batch, y_batch in data_train: if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict) train_writer = tf.summary.FileWriter('{}/experiment/{}'.format(logdir, epoch)) train_writer.add_summary(G_img) train_writer.flush() train_writer.close() break return save_path
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 100 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.3 # 20170616_1459: 0.05 20170616_1951: 0.01 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) X_train, y_train, X_valid, y_valid = Mnist() # 0617_1346: 0.05 #0619_1033: 0.01 0619_1528:0.1 0619_1944: 0.3 # X_train, y_train, X_valid, y_valid = Cifar100() # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator( ) # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) cost_train_mg = tf.summary.merge( [gen_train_cost_sm, dis_train_cost_sm]) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize( gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize( dis_train_cost_sb, var_list=dis_var_list) clip_D = [ p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list ] init = tf.global_variables_initializer() gan.sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) img_writer.add_summary( orig_sm.eval(session=gan.sess, feed_dict={real_ph: data_train[:100].data[0]})) img_writer.flush() img_writer.close() for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 error_writer.reopen() batch_iter = 1 for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform( -1, 1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = { noise_ph: noise, real_ph: X_batch, y_ph: y_batch } gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform( -1, 1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = { noise_ph: noise, real_ph: X_batch, y_ph: y_batch } gan.sess.run(gen_optimizer, feed_dict={ noise_ph: noise, real_ph: X_batch, y_ph: y_batch }) if batch_iter == 1: G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict) gen_writer = tf.summary.FileWriter( '{}/generator/{}'.format(logdir, epoch)) gen_writer.add_summary(G_img) gen_writer.flush() gen_writer.close() batch_iter = 0 fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run( [ fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb ], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() error_writer.close() ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) if save_path: # print('\n..saving best model to: {}'.format(save_path)) dname = os.path.dirname(save_path) if not os.path.exists(dname): os.makedirs(dname) print('saved to {}'.format(dname)) # gan.save(save_path) for X_batch, y_batch in data_train: if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform( -1, 1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = { noise_ph: noise, real_ph: X_batch, y_ph: y_batch } # print '---- Before ----' # print '--Number of threads running ', threading.active_count() G_train, G_img = gan.sess.run([G_train_sb, gen_train_mg], feed_dict=feed_dict) train_writer = tf.summary.FileWriter( '{}/experiment/{}'.format(logdir, epoch)) # print '---- After ----' # print '--Number of threads running ', threading.active_count() train_writer.add_summary(G_img) train_writer.flush() train_writer.close() break return save_path
def train(modelclass, dt=None): batchsize = 64 gen_learning_rate = 0.001 dis_learning_rate = 0.001 bottleneck_dim = 300 max_epoch = 2 epoch_look_back = 3 percent_decrease = 0 noise_factor = 0.1 # 20170616_1459: 0.05 20170616_1951: 0.01 max_outputs = 10 noise_type = 'normal' print('gen_learning_rate:', gen_learning_rate) print('dis_learning_rate:', dis_learning_rate) print('noise_factor:', noise_factor) print('noise_type:', noise_type) if dt is None: timestamp = tg.utils.ts() else: timestamp = dt save_path = './save/{}/model'.format(timestamp) logdir = './log/{}'.format(timestamp) X_train, y_train, X_valid, y_valid = Mnist() AuX_train = X_train Auy_train = y_train aux = np.empty((0, 28, 28, 1), 'float32') auy = np.empty((0, 10), 'int32') # 0617_1346: 0.05 #0619_1033: 0.01 0619_1528:0.1 0619_1944: 0.3 # X_train, y_train, X_valid, y_valid = Cifar100() # X_train, y_train, X_valid, y_valid = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape data_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) data_valid = tg.SequentialIterator(X_valid, y_valid, batchsize=batchsize) print '\n====== Before augment data size ', X_train.shape , ' ======\n' gan = getattr(model, modelclass)(h, w, c, nclass, bottleneck_dim) y_ph, noise_ph, G_train_sb, G_test_sb, gen_var_list = gan.generator() real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator() # real_ph, real_train, real_valid, fake_train, fake_valid, dis_var_list = gan.discriminator_allconv() print('..using model:', gan.__class__.__name__) print('Generator Variables') for var in gen_var_list: print(var.name) print('\nDiscriminator Variables') for var in dis_var_list: print(var.name) with gan.tf_graph.as_default(): gen_train_cost_sb = generator_cost(y_ph, real_train, fake_train) fake_clss, fake_judge = fake_train dis_train_cost_sb = discriminator_cost(y_ph, real_train, fake_train) gen_train_sm = tf.summary.image('gen_train_img', G_train_sb, max_outputs=max_outputs) gen_train_mg = tf.summary.merge([gen_train_sm]) gen_train_cost_sm = tf.summary.scalar('gen_cost', gen_train_cost_sb) dis_train_cost_sm = tf.summary.scalar('dis_cost', dis_train_cost_sb) cost_train_mg = tf.summary.merge([gen_train_cost_sm, dis_train_cost_sm]) gen_optimizer = tf.train.AdamOptimizer(gen_learning_rate).minimize(gen_train_cost_sb, var_list=gen_var_list) dis_optimizer = tf.train.AdamOptimizer(dis_learning_rate).minimize(dis_train_cost_sb, var_list=dis_var_list) clip_D = [p.assign(tf.clip_by_value(p, -0.01, 0.01)) for p in dis_var_list] init = tf.global_variables_initializer() gan.sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) ttl_iter = 0 error_writer = tf.summary.FileWriter(logdir + '/experiment', gan.sess.graph) img_writer = tf.summary.FileWriter('{}/orig_img'.format(logdir)) orig_sm = tf.summary.image('orig_img', real_ph, max_outputs=max_outputs) img_writer.add_summary(orig_sm.eval(session=gan.sess, feed_dict={real_ph:data_train[:100].data[0]})) img_writer.flush() img_writer.close() for epoch in range(1, max_epoch): print('epoch:', epoch) print('..training') print('..logdir', logdir) pbar = tg.ProgressBar(len(data_train)) n_exp = 0 ttl_mse = 0 ttl_gen_cost = 0 ttl_dis_cost = 0 error_writer.reopen() for X_batch, y_batch in data_train: for i in range(3): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run([dis_optimizer, clip_D], feed_dict=feed_dict) for i in range(1): if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} gan.sess.run(gen_optimizer, feed_dict={noise_ph:noise, real_ph:X_batch, y_ph:y_batch}) fake_judge_v, cost_train, gen_cost, dis_cost = gan.sess.run([fake_judge, cost_train_mg, gen_train_cost_sb, dis_train_cost_sb], feed_dict=feed_dict) ttl_gen_cost += gen_cost * len(X_batch) ttl_dis_cost += dis_cost * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) error_writer.add_summary(cost_train, n_exp + ttl_iter) error_writer.flush() error_writer.close() ttl_iter += n_exp mean_gan_cost = ttl_gen_cost / n_exp mean_dis_cost = ttl_dis_cost / n_exp print('\nmean train gen cost:', mean_gan_cost) print('mean train dis cost:', mean_dis_cost) if save_path and epoch == max_epoch-1: # print('\n..saving best model to: {}'.format(save_path)) dname = os.path.dirname(save_path) if not os.path.exists(dname): os.makedirs(dname) print('saved to {}'.format(dname)) train_writer = tf.summary.FileWriter('{}/experiment/{}'.format(logdir, epoch)) for X_batch, y_batch in data_train: #import pdb; pdb.set_trace() if noise_type == 'normal': noise = np.random.normal(loc=0, scale=noise_factor, size=(len(X_batch), bottleneck_dim)) else: noise = np.random.uniform(-1,1, size=(len(X_batch), bottleneck_dim)) * noise_factor feed_dict = {noise_ph:noise, real_ph:X_batch, y_ph:y_batch} G_train, G_img, fake_dis = gan.sess.run([G_train_sb, gen_train_mg, fake_train], feed_dict=feed_dict) fake_class_dis, fake_judge_dis = fake_dis idx = [i for i,v in enumerate(fake_judge_dis) if v>0.5] aux = np.concatenate((aux, G_train[idx]), axis = 0) auy = np.concatenate((auy, fake_class_dis[idx]), axis = 0) AuX_train = np.concatenate((G_train, AuX_train), axis = 0) Auy_train = np.concatenate((y_batch, Auy_train), axis = 0) # temp_data = zip(G_img, y_batch) # aug_data.append(temp_data) train_writer.add_summary(G_img) train_writer.flush() train_writer.close() xname = 'genx.npy' yname = 'geny.npy' np.save('{}/{}'.format(logdir, xname), aux) np.save('{}/{}'.format(logdir, yname), auy) print '\n====== Augment data size ', AuX_train.shape , ' ======\n' print '\n====== Augment data size ', Auy_train.shape , ' ======\n' return save_path, X_train, y_train, X_valid, y_valid, AuX_train, Auy_train, aux, auy
def train(model_name, data_name, fout): batchsize = 32 learning_rate = 0.001 max_epoch = 100 epoch_look_back = 3 percent_decrease = 0.0 train_valid = [5, 1] case = data_name model = model_name if case == 'skin': patch_size = [128, 128] data = Skin(batchsize=batchsize, train_valid=train_valid) if case == 'iris': patch_size = [64, 64] data = Iris(batchsize=batchsize, patch_size=patch_size, train_valid=train_valid) X_ph = tf.placeholder('float32', [None] + patch_size + [3]) M_ph = tf.placeholder('float32', [None] + patch_size + [1]) if model == 'fcn': model = fcn() M_train_s = model.train_fprop(X_ph) M_valid_s = model.test_fprop(X_ph) if model == 'resnet': model = resnet() M_train_s = model.train_fprop(X_ph) M_valid_s = model.test_fprop(X_ph) if model == 'crf_rnn': M_train_s, M_valid_s = crf_rnn(X_ph) # import pdb; pdb.set_trace() if model == 'resnet_crf_rnn': M_train_s, M_valid_s = resnet_crf_rnn(X_ph) h, w = patch_size train_mse = tf.reduce_mean((M_ph - M_train_s)**2) valid_mse = tf.reduce_mean((M_ph - M_valid_s)**2) # import pdb; pdb.set_trace() train_entropy = tg.cost.entropy(M_ph, M_train_s) data_train, data_valid = data.make_data() # optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_mse) optimizer = tf.train.AdamOptimizer(learning_rate).minimize(train_entropy) with tf.Session() as sess: init = tf.global_variables_initializer() sess.run(init) es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=epoch_look_back, percent_decrease=percent_decrease) best_valid_f1 = 0 for epoch in range(max_epoch): print('epoch:', epoch) print('..training') pbar = ProgressBar(len(data_train)) n_exp = 0 train_mse_score = 0 for X_batch, M_batch in data_train: feed_dict = {X_ph: X_batch, M_ph: M_batch} sess.run(optimizer, feed_dict=feed_dict) train_mse_score += sess.run(train_mse, feed_dict=feed_dict) * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) train_mse_score /= n_exp print('\nmean train mse:', train_mse_score) print('..validating') pbar = ProgressBar(len(data_valid)) n_exp = 0 valid_mse_score = 0 valid_f1_score = 0 best_th = 0 for X_batch, M_batch in data_valid: feed_dict = {X_ph: X_batch, M_ph: M_batch} ypred_valid = sess.run(M_valid_s, feed_dict=feed_dict) if model in ['crf_rnn', 'resnet_crf_rnn']: ypred_valid = ypred_valid[0] if case == 'skin': th = 0.25 ypvalid = ypred_valid > th max_fscore = f1_score(M_batch.flatten(), ypvalid.flatten()) if case == 'iris': th = 0.25 ypvalid = ypred_valid > th max_fscore = f1_score(M_batch.flatten(), ypvalid.flatten()) # if case == 'iris': # max_fscore = 0 # for th in range(0, 70): # th = (0.0 + th * 0.01) # ypvalid = ypred_valid > th # fscore = f1_score(M_batch.flatten(), ypvalid.flatten()) # if fscore > max_fscore: # max_fscore = fscore # best_th = th # print('best th:', best_th) valid_f1_score += max_fscore * len(X_batch) # valid_f1_score += f1_score(M_batch.flatten(), ypred_valid.flatten()>0.5) valid_mse_score += sess.run(valid_mse, feed_dict=feed_dict) * len(X_batch) n_exp += len(X_batch) pbar.update(n_exp) # import pdb; pdb.set_trace() valid_f1_score /= n_exp valid_mse_score /= n_exp print('\nmean valid f1:', valid_f1_score) print('mean valid mse:', valid_mse_score) if valid_f1_score > best_valid_f1: best_valid_f1 = valid_f1_score print('best valid f1:', best_valid_f1) if es.continue_learning(valid_error=valid_mse_score): print('epoch', epoch) print('valid error so far:', valid_mse_score) print('best epoch last update:', es.best_epoch_last_update) print('best valid last update:', es.best_valid_last_update) else: # import pdb; pdb.set_trace() print('training done for {model} on {data}'.format( model=model_name, data=data_name)) fout.write('{model},{data},f1_score:{f1}\n'.format( model=model_name, data=data_name, f1=best_valid_f1)) fout.write('{model},{data},valid_cost:{valid_cost}\n'.format( model=model_name, data=data_name, valid_cost=es.best_valid_last_update)) break
def train(): learning_rate = 0.001 batchsize = 32 max_epoch = 300 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=3, percent_decrease=0) seq = model() X_train, y_train, X_test, y_test = Mnist(flatten=False, onehot=True, binary=True, datadir='.') iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, 28, 28, 1]) y_ph = tf.placeholder('float32', [None, 10]) y_train_sb = seq.train_fprop(X_ph) y_test_sb = seq.test_fprop(X_ph) train_cost_sb = entropy(y_ph, y_train_sb) test_cost_sb = entropy(y_ph, y_test_sb) test_accu_sb = accuracy(y_ph, y_test_sb) # required for BatchNormalization layer optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_ops = optimizer.minimize(train_cost_sb) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9) with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess: init = tf.global_variables_initializer() sess.run(init) best_valid_accu = 0 for epoch in range(max_epoch): print('epoch:', epoch) pbar = tg.ProgressBar(len(iter_train)) ttl_train_cost = 0 ttl_examples = 0 print('..training') for X_batch, y_batch in iter_train: feed_dict = {X_ph: X_batch, y_ph: y_batch} _, train_cost = sess.run([train_ops, train_cost_sb], feed_dict=feed_dict) ttl_train_cost += len(X_batch) * train_cost ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_train_cost = ttl_train_cost / float(ttl_examples) print('\ntrain cost', mean_train_cost) ttl_valid_cost = 0 ttl_valid_accu = 0 ttl_examples = 0 pbar = tg.ProgressBar(len(iter_test)) print('..validating') for X_batch, y_batch in iter_test: feed_dict = {X_ph: X_batch, y_ph: y_batch} valid_cost, valid_accu = sess.run([test_cost_sb, test_accu_sb], feed_dict=feed_dict) ttl_valid_cost += len(X_batch) * valid_cost ttl_valid_accu += len(X_batch) * valid_accu ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_valid_cost = ttl_valid_cost / float(ttl_examples) mean_valid_accu = ttl_valid_accu / float(ttl_examples) print('\nvalid cost', mean_valid_cost) print('valid accu', mean_valid_accu) if best_valid_accu < mean_valid_accu: best_valid_accu = mean_valid_accu if es.continue_learning(valid_error=mean_valid_cost, epoch=epoch): print('epoch', epoch) print('best epoch last update:', es.best_epoch_last_update) print('best valid last update:', es.best_valid_last_update) print('best valid accuracy:', best_valid_accu) else: print('training done!') break
def train(): learning_rate = 0.001 batchsize = 64 max_epoch = 300 es = tg.EarlyStopper(max_epoch=max_epoch, epoch_look_back=None, percent_decrease=0) X_train, y_train, X_test, y_test = Cifar10(contrast_normalize=False, whiten=False) _, h, w, c = X_train.shape _, nclass = y_train.shape seq = model(nclass=nclass, h=h, w=w, c=c) iter_train = tg.SequentialIterator(X_train, y_train, batchsize=batchsize) iter_test = tg.SequentialIterator(X_test, y_test, batchsize=batchsize) X_ph = tf.placeholder('float32', [None, h, w, c]) y_ph = tf.placeholder('float32', [None, nclass]) y_train_sb = seq.train_fprop(X_ph) y_test_sb = seq.test_fprop(X_ph) train_cost_sb = entropy(y_ph, y_train_sb) test_cost_sb = entropy(y_ph, y_test_sb) test_accu_sb = accuracy(y_ph, y_test_sb) # required for BatchNormalization layer optimizer = tf.train.AdamOptimizer(learning_rate) update_ops = ops.get_collection(ops.GraphKeys.UPDATE_OPS) with ops.control_dependencies(update_ops): train_ops = optimizer.minimize(train_cost_sb) config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: init = tf.global_variables_initializer() sess.run(init) best_valid_accu = 0 for epoch in range(max_epoch): print('epoch:', epoch) pbar = tg.ProgressBar(len(iter_train)) ttl_train_cost = 0 ttl_examples = 0 print('..training') for X_batch, y_batch in iter_train: feed_dict = {X_ph: X_batch, y_ph: y_batch} _, train_cost = sess.run([train_ops, train_cost_sb], feed_dict=feed_dict) ttl_train_cost += len(X_batch) * train_cost ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_train_cost = ttl_train_cost / float(ttl_examples) print('\ntrain cost', mean_train_cost) ttl_valid_cost = 0 ttl_valid_accu = 0 ttl_examples = 0 pbar = tg.ProgressBar(len(iter_test)) print('..validating') for X_batch, y_batch in iter_test: feed_dict = {X_ph: X_batch, y_ph: y_batch} valid_cost, valid_accu = sess.run([test_cost_sb, test_accu_sb], feed_dict=feed_dict) ttl_valid_cost += len(X_batch) * valid_cost ttl_valid_accu += len(X_batch) * valid_accu ttl_examples += len(X_batch) pbar.update(ttl_examples) mean_valid_cost = ttl_valid_cost / float(ttl_examples) mean_valid_accu = ttl_valid_accu / float(ttl_examples) print('\nvalid cost', mean_valid_cost) print('valid accu', mean_valid_accu) if best_valid_accu < mean_valid_accu: best_valid_accu = mean_valid_accu if es.continue_learning(valid_error=mean_valid_cost, epoch=epoch): print('epoch', epoch) print('best epoch last update:', es.best_epoch_last_update) print('best valid last update:', es.best_valid_last_update) print('best valid accuracy:', best_valid_accu) else: print('training done!') break