def evaluate(): """Eval CIFAR-10 for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. images, labels = model.input() # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( model.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if run_once: break time.sleep(eval_interval_secs)
def train(): """Train CIFAR-10 for a number of steps.""" with tf.Graph().as_default(): global_step = tf.contrib.framework.get_or_create_global_step() keep_prob3 = tf.placeholder(tf.float32) keep_prob4 = tf.placeholder(tf.float32) # Get images and labels for CIFAR-10. # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): images, labels = model.input() #images_eval, labels_eval = model.input_eval() # Build a Graph that computes the logits predictions from the # inference model. logits = model.inference(images, keep_prob3=0.7, keep_prob4=0.5) # Calculate loss. loss = model.loss(logits, labels) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = model.train(loss, global_step) tf.summary.scalar("cost_value_loss", loss) summary_op = tf.summary.merge_all() class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = log_frequency * batch_size / duration sec_per_batch = float(duration / log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=logdir, hooks=[ tf.train.StopAtStepHook(last_step=max_steps), tf.train.NanTensorHook(loss), _LoggerHook(), tf.train.SummarySaverHook(save_steps=log_frequency, output_dir=logdir, summary_op=summary_op) ], config=tf.ConfigProto( log_device_placement=log_device_placement)) as mon_sess: while not mon_sess.should_stop(): #summary_writer = tf.summary.FileWriter('/tmp/uniqlo_1',mon_sess.graph) mon_sess.run(train_op)
def train(dataset_train, dataset_val, ckptfile='', caffemodel=''): print 'train() called' is_finetune = bool(ckptfile) batch_size = FLAGS.batch_size data_size = dataset_train.size() print 'training size:', data_size with tf.Graph().as_default(): startstep = 0 if not is_finetune else int(ckptfile.split('-')[-1]) global_step = tf.Variable(startstep, trainable=False) image_, y_ = model.input() keep_prob_ = tf.placeholder('float32', name='keep_prob') phase_train_ = tf.placeholder(tf.bool, name='phase_train') logits = model.inference(image_, keep_prob_, phase_train_) prediction = model.classify(logits) loss, print_op = model.loss(logits, y_) train_op = model.train(loss, global_step, data_size) # build the summary operation based on the F colection of Summaries summary_op = tf.summary.merge_all() # must be after merge_all_summaries validation_loss = tf.placeholder('float32', shape=(), name='validation_loss') validation_summary = tf.summary.scalar('validation_loss', validation_loss) validation_acc = tf.placeholder('float32', shape=(), name='validation_accuracy') validation_acc_summary = tf.summary.scalar('validation_accuracy', validation_acc) saver = tf.train.Saver(tf.global_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.333) # sess = tf.Session(config=tf.ConfigProto(log_device_placement=FLAGS.log_device_placement, # gpu_options=gpu_options)) sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if is_finetune: saver.restore(sess, ckptfile) print 'restore variables done' elif caffemodel: sess.run(init_op) model.load_alexnet(sess, caffemodel) print 'loaded pretrained caffemodel:', caffemodel else: # from scratch sess.run(init_op) print 'init_op done' summary_writer = tf.summary.FileWriter(FLAGS.train_dir, graph=sess.graph) step = startstep for epoch in xrange(100): print 'epoch:', epoch dataset_train.shuffle() # dataset_val.shuffle() for batch_x, batch_y in dataset_train.batches(batch_size): # print batch_x_v[0,0,:] # print batch_y if step >= FLAGS.max_steps: break step += 1 start_time = time.time() feed_dict = { image_: batch_x, y_: batch_y, keep_prob_: 0.5, phase_train_: True } _, loss_value, logitsyo, _ = sess.run( [train_op, loss, logits, print_op], feed_dict=feed_dict) # print batch_y # print logitsyo.max(), logitsyo.min() duration = time.time() - start_time assert not np.isnan( loss_value), 'Model diverged with loss = NaN' if step % 10 == 0 or step < 30: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) # val if step % 100 == 0: # and step > 0: val_losses = [] val_logits = [] predictions = np.array([]) val_y = [] for val_step, (val_batch_x, val_batch_y) in \ enumerate(dataset_val.sample_batches(batch_size, g_.VAL_SAMPLE_SIZE)): # enumerate(dataset_val.batches(batch_size)): val_feed_dict = { image_: val_batch_x, y_: val_batch_y, keep_prob_: 1.0, phase_train_: False } val_loss, pred, val_logit, _ = sess.run( [loss, prediction, logits, print_op], feed_dict=val_feed_dict) val_losses.append(val_loss) val_logits.extend(val_logit.tolist()) predictions = np.hstack((predictions, pred)) val_y.extend(val_batch_y) val_logits = np.array(val_logits) # print val_logits # print val_y # print predictions # print val_logits[0].tolist() # val_logits.dump('val_logits.npy') # predictions.dump('predictions.npy') # np.array(val_y).dump('val_y.npy') val_loss = np.mean(val_losses) acc = metrics.accuracy_score(val_y[:predictions.size], np.array(predictions)) print '%s: step %d, validation loss=%.4f, acc=%f' %\ (datetime.now(), step, val_loss, acc*100.) # validation summary val_loss_summ = sess.run( validation_summary, feed_dict={validation_loss: val_loss}) val_acc_summ = sess.run(validation_acc_summary, feed_dict={validation_acc: acc}) summary_writer.add_summary(val_loss_summ, step) summary_writer.add_summary(val_acc_summ, step) summary_writer.flush() if step % 100 == 0: # print 'running f*****g summary' summary_str = sess.run(summary_op, feed_dict=feed_dict) summary_writer.add_summary(summary_str, step) summary_writer.flush() if step % 200 == 0 or (step+1) == FLAGS.max_steps \ and step > startstep: checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def test(dataset, ckptfile): print 'train() called' batch_size = FLAGS.batch_size data_size = dataset.size() print 'training size:', data_size with tf.Graph().as_default(): startstep = 0 global_step = tf.Variable(startstep, trainable=False) image_, y_ = model.input() keep_prob_ = tf.placeholder('float32', name='keep_prob') phase_train_ = tf.placeholder(tf.bool, name='phase_train') logits = model.inference(image_, keep_prob_, phase_train_) prediction = model.classify(logits) loss, print_op = model.loss(logits, y_) train_op = model.train(loss, global_step, data_size) # build the summary operation based on the F colection of Summaries summary_op = tf.merge_all_summaries() saver = tf.train.Saver(tf.all_variables(), max_to_keep=1000) init_op = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=FLAGS.log_device_placement)) if FLAGS.caffemodel: caffemodel = FLAGS.caffemodel # sess.run(init_op) model.load_model(sess, caffemodel, fc8=True) print 'loaded pretrained caffemodel:', caffemodel else: saver.restore(sess, ckptfile) print 'restore variables done' summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, graph_def=sess.graph_def) step = startstep predictions = [] labels = [] for batch_x, batch_y in dataset.batches(batch_size): if step >= FLAGS.max_steps: break step += 1 if step == 1: img = batch_x[0, ...] cv2.imwrite('img0.jpg', img) start_time = time.time() feed_dict = {image_: batch_x, y_: batch_y, keep_prob_: 1.0} pred, loss_value = sess.run([ prediction, loss, ], feed_dict=feed_dict) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: sec_per_batch = float(duration) print '%s: step %d, loss=%.2f (%.1f examples/sec; %.3f sec/batch)' \ % (datetime.now(), step, loss_value, FLAGS.batch_size/duration, sec_per_batch) predictions.extend(pred.tolist()) labels.extend(batch_y.tolist()) # print pred # print batch_y print labels print predictions acc = metrics.accuracy_score(labels, predictions) print 'acc:', acc * 100
def train(): sess = tf.InteractiveSession() global_step = tf.contrib.framework.get_or_create_global_step() #Load data train_sets = PRMUDataSet("1_train_0.9") train_sets.load_data_target() n_train_samples = train_sets.get_n_types_target() print("n_train_samples = " + str(n_train_samples)) if not os.path.isfile('save/current/model.ckpt.index'): print('Create new model') x, y_ = model.input() y_conv = model.inference(x) loss = model.loss(y_conv=y_conv, y_=y_) train_step = model.train_op(loss, global_step) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() else: print('Load exist model') saver = tf.train.import_meta_graph('save/current/model.ckpt.meta') saver.restore(sess, 'save/current/model.ckpt') learning_rate = tf.get_collection('learning_rate')[0] cross_entropy_loss = tf.get_collection('cross_entropy_loss')[0] train_step = tf.get_collection('train_step')[0] keep_prob_fc1 = tf.get_collection('keep_prob_fc1')[0] keep_prob_fc2 = tf.get_collection('keep_prob_fc2')[0] x = tf.get_collection('x')[0] y_ = tf.get_collection('y_')[0] y_conv = tf.get_collection('y_conv')[0] correct_prediction = tf.equal(tf.arg_max(y_conv, 1), tf.arg_max(y_, 1)) true_pred = tf.reduce_sum(tf.cast(correct_prediction, dtype=tf.float32)) for epoch in range(nb_epochs): print("Epoch: %d" % epoch) print("Learning rate: " + str(learning_rate.eval())) avg_ttl = [] nb_true_pred = 0 # shuffle data perm = np.random.permutation(n_train_samples) print('x_train = ' + str(perm)) if epoch % 10 == 0: saver.save(sess, "save/current/model.ckpt") for i in range(0, n_train_samples, batch_size): x_batch = train_sets.data[perm[i:(i + batch_size)]] #print('x_batch['+str(i)+'] = '+str(perm[i:(i+batch_size)])) batch_target = np.asarray(train_sets.target[perm[i:i + batch_size]]) y_batch = np.zeros((len(x_batch), 46), dtype=np.float32) y_batch[np.arange(len(x_batch)), batch_target] = 1.0 #print('batch_target = '+str(batch_target)) ttl, _ = sess.run( [cross_entropy_loss, train_step], feed_dict={ x: x_batch, y_: y_batch, keep_prob_fc1: (1 - drop_out_prob), keep_prob_fc2: (1 - drop_out_prob) }) avg_ttl.append(ttl * len(x_batch)) nb_true_pred += true_pred.eval(feed_dict={ x: x_batch, y_: y_batch, keep_prob_fc1: 1, keep_prob_fc2: 1 }) print('Batch ' + str(i) + ' : Number of true prediction: ' + str(nb_true_pred)) print("Average total loss: " + str(np.sum(avg_ttl) / n_train_samples)) print("Train accuracy: " + str(nb_true_pred * 1.0 / n_train_samples))