offset = 0 while offset < training_ins_sz: if i % 50 == 0: [ _w_embedding ] = sess.run([w_embedding]) eval_ins = data_helper.get_eval_ins_embedding(_w_embedding, EMBEDDING_SIZE) eval_res = sess.run(eval_output, feed_dict = {eval_x:eval_ins}) eval_auc(eval_res, data_helper.eval_label) sys.stdout.write("Mini batch trained:") sys.stdout.flush() [label, ins] = data_helper.get_next_batch(MINI_BATCH_SIZE, offset = offset) offset += MINI_BATCH_SIZE _feed_dict = { y : label } for k in range(len(x)): _feed_dict[x[k]] = ins[k] if NORMALIZE_LAYER == 1: _,_loss,_w_embedding, _scale, _beta, _pop_mean, _pop_var,_h_embedding, _h_embedding_norm, _h_fc1, _output \ = sess.run([train_step, loss, w_embedding, embedding_scale, embedding_beta, embedding_pop_mean, embedding_pop_var,h_embedding, h_embedding_norm, h_fc1, output], feed_dict=_feed_dict) else: _,_loss = sess.run([train_step, loss], feed_dict=_feed_dict) sys.stdout.write(" %d" % i) sys.stdout.flush()
saver.restore(sess, restore_file) else: sess.run(init) trainWriter = tf.summary.FileWriter('../report/tf-log/train', graph=sess.graph) testWriter = tf.summary.FileWriter('../report/tf-log/test', graph=sess.graph) tf.logging.info((green(SimpleNet.print_total_params()))) while iters.eval() <= iter_limit: current_iter = iters.eval() if current_iter%5==0: tf.logging.info(f'Iter:{iters.eval()}...') batch_xs, batch_ys = dh.get_next_batch(iter_based=True, split_channels=True) sess.run(update, feed_dict={x:batch_xs, y:batch_ys, keep_prob:dropout_keep}) if current_iter % 10 == 0: evals = evaluate.evaluate(sess, infer, x, y, keep_prob, batch_xs, batch_ys, dropout_keep, '../report/train_progress.csv') s = sess.run(summaries, feed_dict={x:batch_xs, y:batch_ys, keep_prob:-1.0}) trainWriter.add_summary(s, current_iter) if current_iter % 50 == 0: tf.logging.info(yellow('Testing...')) batch_xs, batch_ys = dh.get_next_batch(iter_based=True, force_test=True, split_channels=True) evals = evaluate.evaluate(sess, infer, x, y, keep_prob, batch_xs, batch_ys, dropout_keep, '../report/test_progress.csv') # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() # s = sess.run(summaries,
print sess.run(W_conv1), sess.run(b_conv1), sess.run(W_conv2), sess.run( b_conv2) test_data, test_labels = dh.get_test_data(test_size) epoch = 1 train_start = time.time() while epoch <= epochs: epoch_start = time.time() print 'Training Epoch {}...'.format(epoch) # get data, test_idx = 19000 is ~83% train test split dh = DataHelper(batch_size, test_idx=test_start) # test data step = 1 # Looks like training iters in the number of images to process while step * batch_size < test_start: # TODO get data in proper format batch_xs, batch_ys = dh.get_next_batch() #print batch_xs.shape, batch_ys.shape #sys.exit(0) sess.run(optimizer, feed_dict={ x: batch_xs, y_: batch_ys, keep_prob: dropout_prob }) if step % display_step == 0: acc = sess.run(rmse, feed_dict={ x: batch_xs, y_: batch_ys, keep_prob: 1.0
for k in range(len(x)): eval_feed_dict[x[k]] = _eval_ins[k] _eval_res = sess.run(output, feed_dict=eval_feed_dict) eval_offset += eval_batch_sz eval_res.extend(_eval_res) eval_label.extend(_eval_label) sys.stdout.write(" %d" % j) sys.stdout.flush() j += 1 eval_auc(eval_res, eval_label) [label, ins] = data_helper.get_next_batch(MINI_BATCH_SIZE, offset=offset) offset += MINI_BATCH_SIZE _feed_dict = {y: label} for k in range(len(x)): _feed_dict[x[k]] = ins[k] _, _loss = sess.run([train_step, loss], feed_dict=_feed_dict) sys.stdout.write(" %d" % i) sys.stdout.flush() training_loss += _loss * len(ins) i += 1
def _train_network(net, eval_net): global params global x global y iters = tf.Variable(1, trainable=False) learning_rate = None if params['decay_steps']: learning_rate = tf.train.exponential_decay( params['start_learning_rate'], iters, params['decay_steps'], params['decay_base']) else: learning_rate = tf.Variable(params['start_learning_rate'], trainable=False) with tf.name_scope('loss'): #loss_weights = 1.003 - tf.reduce_max(y, axis=1) kl = lambda p, q: tf.losses.softmax_cross_entropy( p, q, reduction=tf.losses.Reduction.MEAN) hs_kl = lambda p, q: tf.multiply(0.5, tf.square(kl(p, q))) loss = tf.losses.softmax_cross_entropy( y, net, weights=1.0, reduction=tf.losses.Reduction.MEAN) #loss = tf.nn.softmax_cross_entropy_with_logits(logits=net, # labels=y, # weights=loss_weights, # reduction=tf.losses.Reduction.MEAN) #optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate, momentum=params['momentum']) optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) grads = optimizer.compute_gradients(loss) with tf.name_scope('clipping'): grads = [(tf.clip_by_value(grad, -1.5, 1.5), var) for grad, var in grads] update = optimizer.apply_gradients(grads, global_step=iters) # with tf.name_scope('grads'): # for grad, var in grads: # tf.summary.histogram(f"{var.name.split(':')[0]}", grad) # with tf.name_scope('weights'): # for grad, var in grads: # tf.summary.histogram(f"{var.name.split(':')[0]}", var) learning_rate_reduce = params['learning_rate_reduce'] # this should have a more general implementation, we chose 0 because # accuracy will grow as it improves top_result = 0.0 dh = DataHelper(batch_size=params['batch_size'], train_size=params['train_size'], label_noise=params['label_noise'], bands=params['bands'], transform_func=eval(params['trans_func']) if params['trans_func'] else None) with tf.name_scope('metrics'): evaluate.evaluate_tensorboard(eval_net, y) summaries = tf.summary.merge_all() init = tf.global_variables_initializer() saver = tf.train.Saver() config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: if params['restore']: saver.restore(sess, tf.train.latest_checkpoint(params['model_dir'])) else: sess.run(init) trainWriter = tf.summary.FileWriter(params['tf_train_dir'], graph=sess.graph) testWriter = tf.summary.FileWriter(params['tf_test_dir'], graph=sess.graph) # run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) # run_metadata = tf.RunMetadata() run_options = None run_metadata = None top_result = 0 while iters.eval() < params['iter_limit']: current_iter = iters.eval() if learning_rate_reduce and current_iter in learning_rate_reduce: sess.run(learning_rate.assign(learning_rate.eval() / 10)) if params['print']: tf.logging.info(f"Training iter:{current_iter}") batch_xs, batch_ys = dh.get_next_batch(iter_based=True) batch = {x: batch_xs, y: batch_ys} sess.run(update, feed_dict=batch) if current_iter % 10 == 0: if params['print']: tf.logging.info("Evaluating") s = sess.run(summaries, feed_dict=batch) trainWriter.add_summary(s, current_iter) if current_iter % 100 == 0: if params['print']: tf.logging.info('Testing') batch_xs, batch_ys = dh.get_next_batch(force_test=True) batch[x] = batch_xs batch[y] = batch_ys s = sess.run(summaries, feed_dict=batch) testWriter.add_summary(s, current_iter) evals = evaluate.evaluate(sess, eval_net, x, y, batch_xs, batch_ys, params['test_progress']) if params['save_progress'] and evals[0] > top_result: if params['print']: tf.logging.info('Saving checkpoint') model_path = os.path.join(params['model_dir'], 'res-net.ckpt') saver.save(sess, model_path, global_step=iters) top_result = evals[0] # This needs to be printed so that the async trainer can see the result if params['rtrn_eval']: print(top_result)
def _train_network(net): global params global x global y iters = tf.Variable(0, trainable=False) learning_rate = None if params['decay_steps']: learning_rate = tf.train.exponential_decay( params['start_learning_rate'], iters, params['decay_steps'], params['decay_base']) else: learning_rate = tf.Variable(params['start_learning_rate']) cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(net, y)) # find a way tp paramertize the optimizer optimizer = tf.train.MomentumOptimizer(learning_rate, params['momentum'], params['nesterov']) optimize = optimizer.minimize(cost, global_step=iters) init = tf.initialize_all_variables() saver = tf.train.Saver() learning_rate_reduce = params['learning_rate_reduce'] start = time.time() # this should have a more general implementation, we chose 0 because # accuracy will grow as it improves top_result = 0.0 with tf.Session() as sess: sess.run(init) for epoch in range(1, params['epoch_limit'] + 1): if params['print']: print epoch dh = DataHelper(batch_size=params['batch_size'], train_size=params['train_size'], label_noise=params['label_noise'], bands=params['bands'], transform_func=eval(params['trans_func']) if params['trans_func'] else None) if learning_rate_reduce and epoch in learning_rate_reduce: sess.run(learning_rate.assign(learning_rate.eval() / 10.0)) while dh.training: batch_xs, batch_ys = dh.get_next_batch() sess.run(optimize, feed_dict={x: batch_xs, y: batch_ys}) if iters.eval() % 20 == 0: evaluate.evaluate(sess, net, x, y, batch_xs, batch_ys, params['train_progress']) #testing batch_xs, batch_ys = dh.get_next_batch() results = evaluate.evaluate(sess, net, x, y, batch_xs, batch_ys, params['test_progress']) if params['save_progress'] and results[0] > top_result: if params['print']: print 'Saving checkpoint' saver.save(sess, params['model_dir'], global_step=iters) top_result = results[0] if params['print']: print 'Epoch took {} seconds'.format(time.time() - start) if params['rtrn_eval']: print top_result