def evaluate(network): with tf.Graph().as_default() as g: eval_data = FLAGS.eval_data == 'test' if network == 1: images, labels = cifar10.inputs(eval_data=eval_data) logits, w1, b1, w2, b2 = cifar10.create_model(images) else: images, labels = cifar10_2.inputs(eval_data=eval_data) logits, w1, b1, w2, b2 = cifar10_2.create_model(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval model for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for model. eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(network, checkpoint_dir): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = FLAGS.eval_data == 'test' if network == 1: images, labels = cifar10.inputs(eval_data=eval_data) logits, w1, b1, w2, b2 = cifar10.inference(images) else: images, labels = cifar10_2.inputs(eval_data=eval_data) logits, w1, b1, w2, b2 = cifar10_2.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cifar10.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) preds = eval_once(network, saver, summary_writer, top_k_op, summary_op, checkpoint_dir) """while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)""" return preds
def evaluate(): with tf.Graph().as_default(): eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): with tf.Graph().as_default() as g: eval_data = FLAGS.eval_data == 'test' if eval_data: print("Everything OK. Testing...") images, labels = model.inputs(eval_data=eval_data) logits = model.inference(images, is_training=False) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): with tf.Graph().as_default(): eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.merge_all_summaries() graph_def = tf.get_default_graph().as_graph_def() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, graph_def=graph_def) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def alt_loader(batch_size): images, labels = inputs('/home/owen/projects/akira/build/single-threaded', batch_size, None, one_hot_labels=True, train=True) return images, labels
def evaluate(): """Eval model for a number of steps.""" with tf.Graph() as g: # Get images and labels for model. eval_data = FLAGS.eval_data == 'test' images, labels = model.inputs(eval_data=eval_data) # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(): """Train SUN3D for a number of steps.""" with tf.Graph().as_default(), tf.device('/gpu:1'): global_step = tf.contrib.framework.get_or_create_global_step() # Get images and labels for SUN3D. images, depths = model.inputs() # Build a Graph that computes the logits predictions from the # inference model. phase_train = True scores = model.inference(images, phase_train) # Calculate loss. loss = model.loss(scores, depths) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = model.train(loss, global_step) gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.96) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 def before_run(self, run_context): self._step += 1 self._start_time = time.time() return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): duration = time.time() - self._start_time loss_value = run_values.results if self._step % 10 == 0: num_examples_per_step = BATCH_SIZE examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=TRAIN_LOG, hooks=[ tf.train.StopAtStepHook(last_step=NUM_ITER), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto( allow_soft_placement=True, gpu_options=gpu_options, log_device_placement=LOG_DEVICE_PLACEMENT)) as mon_sess: while not mon_sess.should_stop(): print(mon_sess.run(loss)) mon_sess.run(train_op)
def train(): """Train CIFAR-100 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-100. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = model.inputs() # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) # Calculate loss. loss = model.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = model.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def main(_): logging_path = FLAGS.dataset_dir + "output/log/eval_{}.log".format( time.strftime("%m%d-%H_%M_%S", time.localtime())) logging.basicConfig(filename=logging_path, level=logging.INFO, format='%(asctime)s %(message)s\t', datefmt='%Y-%m-%d %H:%M:%S') train_log = "/Users/jianbinlin/DDNN/project/stru2vec_data/garbage_account/output/log/train_1221-17_32_10.log" logging.info(train_log) logging.info(get_para(train_log)) for iter in np.arange(0, 100, 2): with tf.Graph().as_default() as g: label, fea, neig_id, neig_value = model.inputs(FLAGS.dataset_dir + "test.tfrecord") logit, loss = model.inference(label, fea, neig_id, neig_value) saver = tf.train.Saver() cnt = 0 reslist = [] with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) saver.restore( sess, FLAGS.dataset_dir + 'output/model/model-{}'.format(iter)) for i in xrange(FLAGS.sample_size): res_y, res_y_, res_loss = sess.run([label, logit, loss]) for i in xrange(len(res_y)): reslist.append([res_y[i], res_y_[i], res_loss]) coord.request_stop() coord.join(threads) loss, f1 = f1score(reslist) # loss, f1 = 0, 0 auc = aucscore(reslist) logging.info( "epoch: {}, loss:{:.6f}, f1:{:.6f}, auc:{:.6f}".format( iter, loss, f1, auc)) logging.info('done')
def train(): with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() with tf.device('/cpu:0'): images, labels = model.inputs(False) logits = model.inference(images) loss = model.loss(logits, labels) train_op = model.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. #with tf.device('/cpu:0'): images, labels = model.inputs(is_train=True) # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) # Calculate loss. loss = model.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = model.train(loss, global_step) #accuracy = model.accuracy(logits,labels) saver = tf.train.Saver() with tf.Session() as sess: tf.group(tf.global_variables_initializer(),tf.local_variables_initializer()).run() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess,coord=coord) writer = tf.summary.FileWriter(FLAGS.log_dir,sess.graph) merged_summary = tf.summary.merge_all() for step in range(FLAGS.max_steps): _,total_loss,summary = sess.run([train_op,loss,merged_summary]) writer.add_summary(summary,global_step=step) if step % 20 == 0: #prediction = sess.run([accuracy],feed_dict={data_cls:'var1'}) #acc = np.sum(prediction)/32 format = '%s: step %d, loss %.2f' print(format%(datetime.now(),step,total_loss)) saver.save(sess,FLAGS.log_dir+'/model.ckpt',global_step=step) coord.request_stop() coord.join(threads)
def evaluate(): with tf.Graph().as_default(): eval_data = FLAGS.eval_data == 'test' if eval_data: print("Everything OK. Testing...") images, labels = model.inputs(eval_data=eval_data) logits = model.inference(images) summary_saver = tf.summary.FileWriter(FLAGS.eval_dir) top_k_op = tf.nn.in_top_k(logits, labels, 1) while True: eval_once(summary_saver, top_k_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def train(train_dir, batch_size, num_batches, log_dir): images, labels = inputs(train_dir, batch_size, num_batches, one_hot_labels=True, train=True) predictions = simple(images) slim.losses.softmax_cross_entropy(predictions, labels) total_loss = tf.clip_by_value(slim.losses.get_total_loss(), 1e-10, 1000000.0) tf.scalar_summary('loss', total_loss) #optimizer = tf.train.RMSPropOptimizer(0.001, 0.9) optimizer = tf.train.GradientDescentOptimizer(0.001) train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True) slim.learning.train(train_op, log_dir, save_summaries_secs=20)
def evaluate(): with tf.Graph().as_default() as g: eval_data = 'test' images, labels = model.inputs(is_train=eval_data) logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break
def evaluate(): """Eval SUN3D for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for SUN3D. images, depths = model.inputs(eval_data=True) # Build a Graph that computes the logits predictions from the # inference model. phase_train=False result = model.inference(images,phase_train=False) tf.summary.image('result', result) tf.summary.image('gt', depths) # Calculate predictions. scale_inv_error=evalfunct.scinv(result,depths) L1_relative_error=evalfunct.L1rel(result,depths) L1_inverse_error=evalfunct.L1inv(result,depths) print('scale_inv',scale_inv_error) print('L1_relative_error',L1_relative_error) print('L1_inverse_error',L1_inverse_error) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(TEST_LOG, g) while True: print('Start evaluation') eval_once(saver, summary_writer, scale_inv_error, L1_relative_error,L1_inverse_error, summary_op) if EVAL_RUN_ONCE: print('end of evaluation') break time.sleep(EVAL_INTERVAL_SECS)
def tower_loss(scope): """Calculate the total loss on a single tower running the MNIST model. Args: scope: unique prefix string identifying the MNIST tower, e.g. 'tower_0' Returns: Tensor of shape [] containing the total loss for a batch of data """ # Get images and labels for MSNIT. images, labels = model.inputs(FLAGS.batch_size) # Build inference Graph. logits = model.inference(images, keep_prob=0.5) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = model.loss(logits, labels) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do # the same for the averaged version of the losses. if (FLAGS.tb_logging): for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU # training session. This helps the clarity of presentation on # tensorboard. loss_name = re.sub('%s_[0-9]*/' % model.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def train(): with tf.Graph().as_default() as g: global_step = tf.contrib.framework.get_or_create_global_step() images1, images2, labels = model.inputs(eval_data=False) # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images1, images2) # Calculate loss. loss = model.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_no_accuracy_op = model.train(loss, global_step) def train_accuracy_op(): # Calculate predictions. with tf.control_dependencies([train_no_accuracy_op]): return (True, tf.nn.in_top_k(logits, labels, 1)) def cond_train_accuracy(): return tf.logical_and( tf.greater(global_step, 0), tf.equal( tf.truncatemod(global_step, constants.TRAIN_ACCURACY_FREQUENCY), 0)) train_op = tf.cond(cond_train_accuracy(), train_accuracy_op, lambda: (False, train_no_accuracy_op)) class _LoggerHook(tf.train.SessionRunHook): """ Logs loss and runtime. """ def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %4d, loss = %2.2f (%3.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) summary_train_acc_writer = tf.summary.FileWriter(FLAGS.train_dir + '/train_accuracy') kwargs = { 'checkpoint_dir': FLAGS.train_dir, 'hooks': [ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], 'config': tf.ConfigProto(log_device_placement=FLAGS.log_device_placement), } with tf.train.MonitoredTrainingSession(**kwargs) as mon_sess: while not mon_sess.should_stop(): accuracy, results = mon_sess.run(train_op) if accuracy: true_count = np.sum(results) accuracy = true_count / constants.BATCH_SIZE format_str = ('%s: Training Accuracy = %.3f') print(format_str % (datetime.now(), accuracy)) summary = tf.Summary() summary.value.add(tag='train_accuracy', simple_value=accuracy) summary_train_acc_writer.add_summary( summary, mon_sess.run(global_step))
def evaluate(): print('[Testing Configuration]') print('\tCheckpoint Dir: %s' % FLAGS.checkpoint_dir) print('\tDataset: %s data' % ('Training' if FLAGS.train_data else 'Test')) print('\tOutput: %s' % FLAGS.output) with open(DATASET_ATTRIBUTE_PATH, 'r') as fd: # attribute_list = [temp.strip() for temp in fd.readlines()] attribute_list = [temp.strip().split()[1] for temp in fd.readlines()] batch_size = FLAGS.batch_size num_attr = model.NUM_ATTRS with tf.Graph().as_default(): test_images, test_labels = model.inputs('train' if FLAGS.train_data else 'test', False) test_probs = model.inference(test_images) if FLAGS.train_data: total_cnt = data_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN else: total_cnt = data_input.NUM_EXAMPLES_PER_EPOCH_FOR_TEST # Start running operations on the Graph. init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=False)) sess.run(init) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print '\tRestore from %s' % ckpt.model_checkpoint_path # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return # Open IPython shell, if flag set. if FLAGS.embed: embed() # Start the queue runners. coord = tf.train.Coordinator() tf.train.start_queue_runners(sess=sess, coord=coord) # tp, fp, tn, fn result_ll = [[0 for __ in range(4)] for _ in range(num_attr)] idx = 0 while idx < total_cnt: print('Current Idx: ' + str(idx)) end = min([idx + batch_size, total_cnt]) this_batch_size = end - idx probs_val, labels_val = sess.run([test_probs, test_labels]) preds = np.around(probs_val) for i in range(this_batch_size): for j in range(num_attr): if labels_val[i, j] == 0: if preds[i, j] == 0: # tn result_ll[j][2] += 1 if preds[i, j] == 1: # fp result_ll[j][1] += 1 if labels_val[i, j] == 1: if preds[i, j] == 0: # fn result_ll[j][3] += 1 if preds[i, j] == 1: # tp result_ll[j][0] += 1 idx = end coord.request_stop() coord.wait_for_stop(1) sess.close() accuracy, precision, recall, f1 = ([], [], [], []) for i in range(num_attr): tp, fp, tn, fn = result_ll[i] assert total_cnt == (tp+fp+tn+fn) accuracy.append((tp+tn)/float(total_cnt)) if tp+fp == 0: precision.append(0.0) else: precision.append(tp/float(tp+fp)) if tp+fn == 0: recall.append(0.0) else: recall.append(tp/float(tp+fn)) if precision[i] == .0 or np.isnan(precision[i]) or recall[i] == .0: f1.append(0.0) else: f1.append(2/(1/precision[i]+1/recall[i])) result_t = [0 for _ in range(4)] for i in range(num_attr): for j in range(4): result_t[j] += result_ll[i][j] tp_t, fp_t, tn_t, fn_t = result_t # accuracy_t = float(tp_t+tn_t)/float(sum(result_t)) # precision_t = tp_t/float(tp_t+fp_t) # recall_t = tp_t/float(tp_t+fn_t) # f1_t = 2./(1./precision_t+1./recall_t) accuracy_t = np.average(accuracy) precision_t = np.average(precision) recall_t = np.average(recall) f1_t = np.average(f1) print 'Attribute\tTP\tFP\tTN\tFN\tAcc.\tPrec.\tRecall\tF1-score' for i in range(num_attr): tp, fp, tn, fn = result_ll[i] format_str = '%-15s %7d %7d %7d %7d %.5f %.5f %.5f %.5f' print format_str % (attribute_list[i].replace(' ', '-'),tp,fp,tn,fn,accuracy[i],precision[i],recall[i],f1[i]) print(format_str % ('(Total)',tp_t,fp_t,tn_t,fn_t,accuracy_t,precision_t,recall_t,f1_t)) with open(FLAGS.output, 'w') as fd: fd.write('Attribute\tTP\tFP\tTN\tFN\tAcc.\tPrec.\tRecall\tF1-score\n') for i in range(num_attr): tp, fp, tn, fn = result_ll[i] format_str = '%-15s %7d %7d %7d %7d %.5f %.5f %.5f %.5f\n' fd.write(format_str % (attribute_list[i].replace(' ', '-'),tp,fp,tn,fn,accuracy[i],precision[i],recall[i],f1[i])) fd.write(format_str % ('(Total)',tp_t,fp_t,tn_t,fn_t,accuracy_t,precision_t,recall_t,f1_t))
def run_train(): """Train CAPTCHA for a number of steps.""" with tf.Graph().as_default(): ### Read images and labels in from tfrecords file. image_one, label_one = captcha.inputs(True, FLAGS.batch_size, "train_one") image_two, label_two = captcha.inputs(True, FLAGS.batch_size, "train_two") ### Change the shape from ### [ img 1 : x x x x x x x x ] ### [ img 2 : x x x x x x x x ] ### [ ... ...] ### [ img n : x x x x x x x x ] ### to ### [ img 1 img 2 img 3 ... img n] ### [ x x x x ] ### [ ... ... ... ...] ### [ x x x x ] image_one = tf.reshape(image_one, [-1, IMAGE_HEIGHT * IMAGE_WIDTH]) image_two = tf.reshape(image_two, [-1, IMAGE_HEIGHT * IMAGE_WIDTH]) image_one = tf.transpose(image_one) image_two = tf.transpose(image_two) label_one = tf.reshape(label_one, [-1, 10]) label_two = tf.reshape(label_two, [-1, 10]) label_one = tf.transpose(label_one) label_two = tf.transpose(label_two) ### Build Net ### logits_one and logits_two are the outputs before softmax. logits_one, logits_two = captcha.build_net( image_one, image_two, dropout_rate=DROPOUT_RATE, regularization_rate=REGULARIZATION_RATE) ### Use softmax and cross-entropy to calculate loss. reg_loss_1, reg_loss_2 = captcha.reg_loss() loss_one, loss_two = captcha.loss(logits_one, logits_two, label_one, label_two) eval_one = captcha.correct_rate(logits_one, label_one) eval_two = captcha.correct_rate(logits_two, label_two) tf.summary.scalar('loss_one', loss_one) tf.summary.scalar('loss_two', loss_two) tf.summary.scalar('eval_one', eval_one) tf.summary.scalar('eval_two', eval_two) sum_merged = tf.summary.merge_all() ### Attach the optimizers to two nets. train_op_one, train_op_two, train_op_both = captcha.training( loss_one, loss_two) saver = tf.train.Saver(tf.global_variables()) init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() sess.run(init_op) #eval_one = sess.run([captcha.evaluation(logits_one, label_one)]) #eval_two = sess.run([captcha.evaluation(logits_two, label_two)]) #print('>> Step %d run_train: accr_one = %.2f, accr_two = %.2f (%.3f sec)' % (step, eval_one, # eval_two, duration)) ### visualize the compute graph train_summary_writer = tf.summary.FileWriter(VISUAL_DIR, tf.get_default_graph()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: step = 0 while not coord.should_stop(): start_time = time.time() ### Run a batch to train. ### Save a runtime_metadata after 1000 batches. summary_scl = None ### Different train method if TRAIN_METHOD == "M1": if step % 1000 == 0: run_options = tf.RunOptions( trace_level=tf.RunOptions.FULL_TRACE) run_meta = tf.RunMetadata() summary_scl, _, loss_value_one = sess.run( [sum_merged, train_op_one, loss_one], options=run_options, run_metadata=run_meta) summary_scl, _, loss_value_two = sess.run( [sum_merged, train_op_two, loss_two], options=run_options, run_metadata=run_meta) train_summary_writer.add_run_metadata(run_meta, 'step%06d' % step, global_step=step) else: summary_scl, _, loss_value_one = sess.run( [sum_merged, train_op_one, loss_one]) summary_scl, _, loss_value_two = sess.run( [sum_merged, train_op_two, loss_two]) elif TRAIN_METHOD == "M2": summary_scl, _, loss_value_one, loss_value_two = sess.run( [sum_merged, train_op_both, loss_one, loss_two]) duration = time.time() - start_time ### Output the status after 100 batchs. if step % 100 == 0: print( '>> Step %d run_train: loss_one = %.2f, loss_two = %.2f (%.3f sec)' % (step, loss_value_one, loss_value_two, duration)) reg_loss_value_1, reg_loss_value_2 = sess.run( [reg_loss_1, reg_loss_2]) print( '>> Step %d run_train: reg_loss_1 = %.2f, reg_loss_2 = %.2f (%.3f sec)' % (step, reg_loss_value_1, reg_loss_value_2, duration)) summary_scl, eval_value_one = sess.run( [sum_merged, eval_one]) summary_scl, eval_value_two = sess.run( [sum_merged, eval_two]) train_summary_writer.add_summary(summary_scl, global_step=step) print( '>> Step %d run_train: accr_one = %.2f, accr_two = %.2f (%.3f sec)' % (step, eval_value_one, eval_value_two, duration)) ### Save a checkpoint after 5000 batchs. if step % 5000 == 0: print('>> %s Saving in %s' % (datetime.now(), CHECKPOINT_ONE)) print('>> %s Saving in %s' % (datetime.now(), CHECKPOINT_TWO)) saver.save(sess, CHECKPOINT_ONE, global_step=step) saver.save(sess, CHECKPOINT_TWO, global_step=step) if step == 60000: coord.request_stop() coord.join(threads) sess.close() return 0 step += 1 except Exception as e: print('>> %s Saving in %s' % (datetime.now(), CHECKPOINT_ONE)) print('>> %s Saving in %s' % (datetime.now(), CHECKPOINT_TWO)) saver.save(sess, CHECKPOINT_ONE, global_step=step) saver.save(sess, CHECKPOINT_TWO, global_step=step) coord.request_stop(e) finally: coord.request_stop() coord.join(threads) train_summary_writer.close() sess.close()
def train(): print('[Training Configuration]') print('\tTrain dir: %s' % FLAGS.train_dir) print('\tTraining max steps: %d' % FLAGS.max_steps) print('\tSteps per displaying info: %d' % FLAGS.display) print('\tSteps per testing: %d' % FLAGS.test_interval) print('\tSteps per saving checkpoints: %d' % FLAGS.checkpoint_interval) print('\tGPU memory fraction: %f' % FLAGS.gpu_fraction) """Train aPascal for a number of steps.""" with tf.Graph().as_default(): init_step = 0 global_step = tf.Variable(0, trainable=False) # Get images and labels for aPascal. train_images, train_labels = model.distorted_inputs('train') test_images, test_labels = model.inputs('eval') # Build a Graph that computes the predictions from the inference model. images = tf.placeholder(tf.float32, [FLAGS.batch_size, model.IMAGE_WIDTH, model.IMAGE_WIDTH, 3]) labels = tf.placeholder(tf.int32, [FLAGS.batch_size, model.NUM_ATTRS]) probs = model.inference(images) # Calculate loss. (cross_entropy loss) loss, acc = model.loss_acc(probs, labels) tf.scalar_summary("accuracy", acc) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op, lr = model.train(loss, global_step) # Build the summary operation based on the TF collection of Summaries. train_summary_op = tf.merge_all_summaries() # Loss and accuracy summary used in test phase) loss_summary = tf.scalar_summary("test/loss", loss) acc_summary = tf.scalar_summary("test/accuracy", acc) test_summary_op = tf.merge_summary([loss_summary, acc_summary]) # Build an initialization operation to run below. init = tf.initialize_all_variables() # Start running operations on the Graph. sess = tf.Session(config=tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=FLAGS.log_device_placement)) sess.run(init) # Create a saver. saver = tf.train.Saver(tf.all_variables(), max_to_keep=10000) ckpt = tf.train.get_checkpoint_state(FLAGS.train_dir) if ckpt and ckpt.model_checkpoint_path: print('\tRestore from %s' % ckpt.model_checkpoint_path) # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) init_step = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]) else: print('No checkpoint file found. Start from the scratch.') # if finetune, load variables of the final predication layers # from pretrained model if FLAGS.finetune: base_variables = tf.trainable_variables()[:-2*model.NUM_ATTRS] base_saver = tf.train.Saver(base_variables, max_to_keep=10000) ckpt = tf.train.get_checkpoint_state(FLAGS.pretrained_dir) print('Initial checkpoint: ' + ckpt.model_checkpoint_path) base_saver.restore(sess, ckpt.model_checkpoint_path) # Start the queue runners. tf.train.start_queue_runners(sess=sess) if not os.path.exists(FLAGS.train_dir): os.mkdir(FLAGS.train_dir) summary_writer = tf.train.SummaryWriter(FLAGS.train_dir) # Training!! for step in xrange(init_step, FLAGS.max_steps): start_time = time.time() try: train_images_val, train_labels_val = sess.run([train_images, train_labels]) _, lr_value, loss_value, acc_value, train_summary_str = sess.run([train_op, lr, loss, acc, train_summary_op], feed_dict={images:train_images_val, labels:train_labels_val}) except tf.python.framework.errors.InvalidArgumentError: embed() duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % FLAGS.display == 0: num_examples_per_step = FLAGS.batch_size examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: (Training) step %d, loss=%.4f, acc=%.4f, lr=%f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, acc_value, lr_value, examples_per_sec, sec_per_batch)) summary_writer.add_summary(train_summary_str, step) if step % FLAGS.test_interval == 0: test_images_val, test_labels_val = sess.run([test_images, test_labels]) loss_value, acc_value, test_summary_str = sess.run([loss, acc, test_summary_op], feed_dict={images:test_images_val, labels:test_labels_val}) format_str = ('%s: (Test) step %d, loss=%.4f, acc=%.4f') print (format_str % (datetime.now(), step, loss_value, acc_value)) summary_writer.add_summary(test_summary_str, step) # Save the model checkpoint periodically. if step % FLAGS.checkpoint_interval == 0 or (step + 1) == FLAGS.max_steps: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def evaluate(): print('[Testing Configuration]') print('\tCheckpoint Dir: %s' % FLAGS.checkpoint_dir) print('\tDataset: %s data' % ('Training' if FLAGS.train_data else 'Test')) print('\tOutput: %s' % FLAGS.output) with open(DATASET_CLASS_PATH, 'r') as fd: # attribute_list = [temp.strip() for temp in fd.readlines()] class_list = [temp.strip()[10:40] for temp in fd.readlines()] batch_size = FLAGS.batch_size # num_attr = model.NUM_ATTRS num_class = model.NUM_CLASSES with tf.Graph().as_default(): test_images, test_labels = model.inputs('train' if FLAGS.train_data else 'eval', False) test_logits = model.inference(test_images) if FLAGS.train_data: total_cnt = data_input.NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN else: total_cnt = data_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL # Start running operations on the Graph. init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_fraction), log_device_placement=False)) sess.run(init) saver = tf.train.Saver(tf.all_variables()) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: print '\tRestore from %s' % ckpt.model_checkpoint_path # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return # Open IPython shell, if flag set. if FLAGS.embed: embed() # Start the queue runners. coord = tf.train.Coordinator() tf.train.start_queue_runners(sess=sess, coord=coord) # correct, incorrect result_ll = [[0, 0] for _ in range(num_class)] idx = 0 while idx < total_cnt: print('Current Idx: ' + str(idx)) end = min([idx + batch_size, total_cnt]) this_batch_size = end - idx logits_val, labels_val = sess.run([test_logits, test_labels]) preds = np.argmax(logits_val, axis=1) for i in range(this_batch_size): correct = 0 if labels_val[i] == preds[i] else 1 result_ll[labels_val[i] % num_class][correct] += 1 idx = end coord.request_stop() coord.wait_for_stop(1) sess.close() accuracy = [float(result[0])/float(result[0]+result[1]) for result in result_ll] correct_t = np.sum([result[0] for result in result_ll]) incorrect_t = np.sum([result[1] for result in result_ll]) accuracy_t = float(correct_t)/float(correct_t + incorrect_t) print 'Class \t\t\tT\tF\tAcc.' format_str = '%-31s %7d %7d %.5f' for i in range(num_class): print format_str % (class_list[i], result_ll[i][0], result_ll[i][1], accuracy[i]) print(format_str % ('(Total)', correct_t, incorrect_t, accuracy_t)) with open(FLAGS.output, 'w') as fd: fd.write('Class \t\t\tT\tF\tAcc.\n') for i in range(num_class): t, f = result_ll[i] format_str = '%-31s %7d %7d %.5f\n' fd.write(format_str % (class_list[i].replace(' ', '-'),t,f,accuracy[i])) fd.write(format_str % ('(Total)',correct_t,incorrect_t,accuracy_t))
def run_eval(): with tf.Graph().as_default(), tf.device('/cpu:0'): ### Read images and labels in from tfrecords file. image_one, label_one = captcha.inputs(False, FLAGS.batch_size, "valid_one") image_two, label_two = captcha.inputs(False, FLAGS.batch_size, "valid_two") ### Change the shape from ### [ img 1 : x x x x x x x x ] ### [ img 2 : x x x x x x x x ] ### [ ... ...] ### [ img n : x x x x x x x x ] ### to ### [ img 1 img 2 img 3 ... img n] ### [ x x x x ] ### [ ... ... ... ...] ### [ x x x x ] image_one = tf.reshape(image_one, [-1, IMAGE_HEIGHT * IMAGE_WIDTH]) image_two = tf.reshape(image_two, [-1, IMAGE_HEIGHT * IMAGE_WIDTH]) image_one = tf.transpose(image_one) image_two = tf.transpose(image_two) label_one = tf.reshape(label_one, [-1, 10]) label_two = tf.reshape(label_two, [-1, 10]) label_one = tf.transpose(label_one) label_two = tf.transpose(label_two) ### Build Net ### logits_one and logits_two are the outputs before softmax. logits_one, logits_two = captcha.build_net(image_one, image_two) ### Use softmax and cross-entropy to calculate loss. loss_one, loss_two = captcha.loss(logits_one, logits_two, label_one, label_two) ### Evaluate eval_correct_one = captcha.correct_num(logits_one, label_one) eval_correct_two = captcha.correct_num(logits_two, label_two) sess = tf.Session() saver = tf.train.Saver() saver.restore(sess, tf.train.latest_checkpoint(TRAIN_DIR_ONE)) saver.restore(sess, tf.train.latest_checkpoint(TRAIN_DIR_TWO)) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) true_count_one = 0 true_count_two = 0 total_true_count_one = 0 total_true_count_two = 0 total_sample_count = num_iter * FLAGS.batch_size step = 0 print('>> loop: %d, total_sample_count: %d' % (num_iter, total_sample_count)) while step < num_iter and not coord.should_stop(): true_count_one = sess.run(eval_correct_one) true_count_two = sess.run(eval_correct_two) total_true_count_one += true_count_one total_true_count_two += true_count_two precision_one = true_count_one / FLAGS.batch_size precision_two = true_count_two / FLAGS.batch_size print('>> %s Step %d Net 1: true/total: %d/%d precision @ 1 = %.3f' %(datetime.now(), step, true_count_one, FLAGS.batch_size, precision_one)) print('>> %s Step %d Net 2: true/total: %d/%d precision @ 1 = %.3f' %(datetime.now(), step, true_count_two, FLAGS.batch_size, precision_two)) step += 1 precision_one = total_true_count_one / total_sample_count precision_two = total_true_count_two / total_sample_count print('>> %s Net 1 true/total: %d/%d precision @ 1 = %.3f' %(datetime.now(), total_true_count_one, total_sample_count, precision_one)) print('>> %s Net 2 true/total: %d/%d precision @ 1 = %.3f' %(datetime.now(), total_true_count_two, total_sample_count, precision_two)) except Exception as e: coord.request_stop(e) finally: coord.request_stop() coord.join(threads) sess.close()
def main(_): logging_path = FLAGS.dataset_dir + "output/log/train_{}.log".format( time.strftime(time.strftime("%m%d-%H_%M_%S", time.localtime()))) logging.basicConfig(filename=logging_path, level=logging.INFO, format='%(asctime)s %(message)s\t', datefmt='%Y-%m-%d %H:%M:%S') for k, v in FLAGS.__flags.iteritems(): logging.info('{}, {}'.format(k, v)) global_step = tf.train.get_or_create_global_step() label, fea, neig_id, neig_value = model.inputs(FLAGS.dataset_dir + "train.tfrecord") logit, loss = model.inference(label, fea, neig_id, neig_value) lr = model.configure_lr(global_step) optimizer = tf.train.AdagradOptimizer(learning_rate=lr) # optimizer = tf.train.RMSPropOptimizer(learning_rate=FLAGS.lr) train_op = optimizer.minimize(loss, global_step=global_step) saver = tf.train.Saver(max_to_keep=FLAGS.num_epochs + 1) with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) if FLAGS.init_iter > 0: saver.restore( sess, FLAGS.dataset_dir + 'output/model/model-{}'.format(FLAGS.init_iter)) steps_num_per_epoch = int(FLAGS.sample_size / FLAGS.batch_size) total_steps = steps_num_per_epoch * FLAGS.num_epochs batch_cnt = 0 epoch_cnt = FLAGS.init_iter + 1 for i in xrange(total_steps): res_train, res_logit, res_loss = sess.run([train_op, logit, loss]) res_lr = sess.run(lr) if (i + 1) % steps_num_per_epoch == 0: if epoch_cnt % 2 == 0: saver.save(sess, FLAGS.dataset_dir + 'output/model/model', epoch_cnt) epoch_cnt += 1 if i % 20000 == 0: print(i, epoch_cnt, res_loss) logging.info( 'batch count: {}, epoch count: {}, lr: {:.6f}, loss: {:.6f}'. format(i, epoch_cnt, res_lr, res_loss)) saver.save(sess, FLAGS.dataset_dir + 'output/model/model', epoch_cnt) coord.request_stop() coord.join(threads) logging.info('done')
def getPrediction(): with tf.Graph().as_default() as g: eval_data = 'test' images, labels = model.inputs(is_train=eval_data) logits = model.inference(images) top_k_op = tf.nn.in_top_k(logits, labels, 1) variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size)) true_count = 0 # Counts the number of correct predictions. total_sample_count = num_iter * FLAGS.batch_size step = 0 x = tf.placeholder(tf.float32, [None, 1600]) W = tf.Variable(tf.zeros([1600, 26])) b = tf.Variable(tf.zeros([26])) y = tf.nn.softmax(tf.matmul(x, W) + b) while step < num_iter and not coord.should_stop(): result = sess.run(y, feed_dict={x: images}) print(result) step += 1 # Compute precision @ 1. precision = true_count / total_sample_count # print(predictions) print('%s: prediction @ 1 = %.3f' % (datetime.now(), precision)) summary = tf.Summary() summary.ParseFromString(sess.run(summary_op)) summary.value.add(tag='Precision @ 1', simple_value=precision) summary_writer.add_summary(summary, global_step) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)