def main(): # 学習用のimages, labelsのbatchを取得 train, test = inputs.get_data() train_images, train_labels = inputs.train_batch(train) # 推論結果、誤差、学習のためのOperationを定義 train_logits = cnn.inference(train_images) losses = loss(train_labels, train_logits) train_op = training(losses) test_images, test_labels = inputs.test_batch(test) test_logits = cnn.inference(test_images, reuse=True) correct_prediction = tf.equal(tf.argmax(test_logits, 1), tf.to_int64(test_labels)) accuracy = tf.reduce_mean(tf.to_float(correct_prediction)) with tf.Session() as sess: # batchからデータを取り出すためのスレッドの準備 coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) sess.run(tf.global_variables_initializer()) # 学習を繰り返す for i in range(300): _, loss_value, accuracy_value = sess.run([train_op, losses, accuracy]) print("step {:3d} : {:5f} ({:3f})".format(i + 1, loss_value, accuracy_value * 100.0)) coord.request_stop() coord.join(threads)
def tower_loss(scope, images1, labels1, hots1, images2, labels2, hots2): """Calculate the total loss on a single tower running the multi-task_cnn model. Args: scope: unique prefix string identifying the multi-task_cnn tower, e.g. 'tower_0' images: Images. 4D tensor of shape [batch_size, height, width, 3]. labels: Labels. 1D tensor of shape [batch_size]. Returns: Tensor of shape [] containing the total loss for a batch of data """ # Build inference Graph. logits1 = cnn.inference(images1, n_cnn=5) tf.get_variable_scope().reuse_variables() logits2 = cnn.inference(images2, n_cnn=5) # Build the portion of the Graph calculating the losses. Note that we will # assemble the total_loss using a custom function below. _ = cnn.loss(logits1, labels1, hots1, logits2, labels2, hots2, loss_type=1) # Assemble all of the losses for the current tower only. losses = tf.get_collection('losses', scope) # Calculate the total loss for the current tower. total_loss = tf.add_n(losses, name='total_loss') # Attach a scalar summary to all individual losses and the total loss; do the # same for the averaged version of the losses. for l in losses + [total_loss]: # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training # session. This helps the clarity of presentation on tensorboard. loss_name = re.sub('%s_[0-9]*/' % cnn.TOWER_NAME, '', l.op.name) tf.summary.scalar(loss_name, l) return total_loss
def train(): """Train CGCNN for a number of steps.""" with tf.Graph().as_default(): global_step = tf.train.get_or_create_global_step() # Get data for training # Force input pipeline to CPU:0 to avoid operations sometimes ending up on # GPU and resulting in a slow down. with tf.device('/cpu:0'): energies, sites_matrices, adj_matrices = cnn.inputs( eval_data=False) # Build a Graph that computes the energy predictions from the # inference model. energies_hat = cnn.inference(sites_matrices, adj_matrices) # Calculate loss. loss = cnn.loss(energies_hat, energies) # Build a Graph that trains the model with one batch of examples and # updates the model parameters. train_op = cnn.train(loss, global_step) class _LoggerHook(tf.train.SessionRunHook): """Logs loss and runtime.""" def begin(self): self._step = -1 self._start_time = time.time() def before_run(self, run_context): self._step += 1 return tf.train.SessionRunArgs(loss) # Asks for loss value. def after_run(self, run_context, run_values): if self._step % FLAGS.log_frequency == 0: current_time = time.time() duration = current_time - self._start_time self._start_time = current_time loss_value = run_values.results examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration sec_per_batch = float(duration / FLAGS.log_frequency) format_str = ( '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print(format_str % (datetime.now(), self._step, loss_value, examples_per_sec, sec_per_batch)) with tf.train.MonitoredTrainingSession( checkpoint_dir=FLAGS.train_dir, hooks=[ tf.train.StopAtStepHook(last_step=FLAGS.max_steps), tf.train.NanTensorHook(loss), _LoggerHook() ], config=tf.ConfigProto(log_device_placement=FLAGS. log_device_placement)) as mon_sess: while not mon_sess.should_stop(): mon_sess.run(train_op)
def evaluate(X_predict): with tf.Graph().as_default() as g: NUM = 1 # 预测图片数量 # 网络输入、输出tensor x = tf.placeholder(tf.float32, shape=(NUM, IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), name='x_input') y = cnn.inference(x, False, False, regularizer=None) # 预测结果 # 实例化一个tf.train.Saver saver = tf.train.Saver() with tf.Session() as sess: # 通过checkpoint文件找到模型文件名 ckpt = tf.train.get_checkpoint_state(train.MODEL_SAVE_PATH) # ckpt.model_checkpoint_path:表示模型存储的位置 if ckpt and ckpt.model_checkpoint_path: # 恢复模型 saver.restore(sess, ckpt.model_checkpoint_path) # 预测 predict_y = sess.run(y, feed_dict={x: X_predict / 255.0}) # 选出最大的那个 predict_y = np.argmax(predict_y, axis=1) return predict_y else: print('No checkpoint file found') return
def evaluate(): """Eval CGCNN for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for CIFAR-10. eval_data = (FLAGS.eval_data == 'test') energies, sites_matrices, adj_matrices = cnn.inputs(eval_data=eval_data) # Build a Graph that computes the energy predictions from the # inference model. energies_hat = cnn.inference(sites_matrices, adj_matrices) # Calculate the absolute error tensor. abs_error_tensor = tf.abs(energies_hat-energies) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cnn.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, abs_error_tensor, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """Eval CNN for a number of steps.""" g = tf.Graph() #with g.as_default(): with tf.Session(graph=g) as sess: images, labels = sess.run(dataset_input_fn()) imgs = images["image"] # Build a Graph that computes the logits predictions from the # inference model. logits = cnn.inference(imgs) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if FLAGS.run_once: break time.sleep(FLAGS.eval_interval_secs)
def evaluate(): """ Test one image against the saved models and parameters """ # you need to change the directories to yours. train_dir = './data/train/' train, train_label = train_test_split.get_files(train_dir) image_array = get_image(train) with tf.Graph().as_default(): batch_size = 1 n_classes = 2 image = tf.cast(image_array, tf.float32) image = tf.image.per_image_standardization(image) image = tf.reshape(image, [1, 208, 208, 3]) logits = cnn.inference(image, batch_size, n_classes) logits = tf.nn.softmax(logits) X = tf.placeholder(tf.float32, shape=[208, 208, 3]) # you need to change the directories to yours. logs_train_dir = './logs/train/' saver = tf.train.Saver() with tf.Session() as sess: print("Reading checkpoints...") ckpt = tf.train.get_checkpoint_state(logs_train_dir) if ckpt and ckpt.model_checkpoint_path: global_step = ckpt.model_checkpoint_path.split('/')[-1].split( '-')[-1] saver.restore(sess, ckpt.model_checkpoint_path) print("Loading success, global_step is %s".format(global_step)) else: print("No checkpoint file found") prediction = sess.run(logits, feed_dict={X: image_array}) max_index = np.argmax(prediction) if max_index == 0: print("This is a cat with possibility {:.6f}".format( prediction[:, 0])) else: print("This is a dog with possibility {:.6f}".format( prediction[:, 1]))
def run_training(): # Set there directories . train_dir = './data/train/' logs_train_dir = './logs/train/' train, train_label = train_test_split.get_files(train_dir) train_batch, train_label_batch = train_test_split.get_batch( train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY) train_logits = cnn.inference(train_batch, BATCH_SIZE, N_CLASSES) train_loss = cnn.losses(train_logits, train_label_batch) train_op = cnn.training(train_loss, learning_rate) train__acc = cnn.evaluation(train_logits, train_label_batch) summary_op = tf.summary.merge_all() sess = tf.Session() train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: for step in np.arange(MAX_STEP): if coord.should_stop(): break _, tra_loss, tra_acc = sess.run([train_op, train_loss, train__acc]) if step % 50 == 0: print("Step {}, ".format(step), "train loss = {:.2f}, ".format(tra_loss), "train accuracy = {:.2f}%".format(tra_acc * 100.0)) summary_str = sess.run(summary_op) train_writer.add_summary(summary_str, step) if step % 2000 == 0 or (step + 1) == MAX_STEP: checkpoint_path = os.path.join(logs_train_dir, "model.ckpt") saver.save(sess, checkpoint_path, global_step=step) except tf.errors.OutOfRangeError: print("Done training -- epoch limit reached") finally: coord.request_stop() coord.join(threads) sess.close()
def main(args=None): if tf.gfile.Exists(FLAGS.train_dir): tf.gfile.DeleteRecursively(FLAGS.train_dir) tf.gfile.MakeDirs(FLAGS.train_dir) with tf.Graph().as_default(): images, labels = network.train_set() logits = network.inference(images) loss = network.loss(logits, labels) train = network.train(loss, 0.01) summary = tf.merge_all_summaries() init = tf.initialize_all_variables() saver = tf.train.Saver() with tf.Session() as sess: summary_writer = tf.train.SummaryWriter(FLAGS.train_dir, sess.graph) sess.run(init) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) try: for step in range(300): if not coord.should_stop(): _, loss_value = sess.run([train, loss]) print 'Step %d: loss = %.2f' % (step, loss_value) summary_str = sess.run(summary) summary_writer.add_summary(summary_str, step) summary_writer.flush() checkpoint_file = os.path.join(FLAGS.train_dir, 'checkpoint') saver.save(sess, checkpoint_file, global_step=step) except tf.errors.OutOfRangeError: print 'Done training -- epoch limit reached' finally: coord.request_stop() coord.join(threads)
def predict(): #labels = [FLAGS.img_label] #filenames = [FLAGS.img_path] stars = [ "nicolas_cage", "brad_pitt", "angelina_jolie", "leonardo_dicaprio", "robert_downey_jr" ] img, lbl = get_image(FLAGS.img_path, FLAGS.img_label) img = tf.convert_to_tensor(img, dtype=tf.float32) #label = tf.convert_to_tensor(lbl, dtype=tf.int64) #print(tf.shape(label)) logit = cnn.inference(img) # Calculate predictions. #top_k_op = tf.nn.in_top_k(logit, label, 1) with tf.Session() as sess: # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) else: print('No checkpoint file found') return #true_count = 0 #predictions = sess.run([top_k_op]) predictions = sess.run([logit]) max_idx = predictions[0][0].argmax() #true_count += np.sum(predictions) print("prediction: {}, truth: {}".format(stars[max_idx], stars[FLAGS.img_label]))
def run_training(): # for mnist # train_data, test_data, validation_data = input_data.read_data_sets("../data/MNIST_data/") # for cifar-10 train_data, test_data, validation_data = input_data.load_data() with tf.Graph().as_default(): image_pl, label_pl, keep_prob_pl = place_holder(FLAGS.batch_size) logits = nn_structure.inference(image_pl, conv_1_params, max_pool_1_params, conv_2_params, max_pool_2_params, full_connected_units, keep_prob_pl) loss = nn_structure.loss(logits, label_pl) train_op = nn_structure.train(loss, FLAGS.learning_rate) eval_correct = nn_structure.evaluation(logits, label_pl, k=1) init = tf.initialize_all_variables() with tf.Session() as sess: sess.run(init) start_time = time.time() for step in range(FLAGS.max_step): feed_dict = fill_feed_dict(train_data, 0.5, image_pl, label_pl, keep_prob_pl) _, loss_value = sess.run([train_op, loss], feed_dict) if step % 100 == 0: duration = time.time() - start_time print("Step: {:d}, Training Loss: {:.4f}, {:.1f}ms/step". format(step, loss_value, duration * 10)) start_time = time.time() if (step + 1) % 1000 == 0 or (step + 1) == FLAGS.max_step: print("Train Eval:") do_eval(sess, eval_correct, train_data, image_pl, label_pl, keep_prob_pl) print("Validation Eval:") do_eval(sess, eval_correct, validation_data, image_pl, label_pl, keep_prob_pl) print("Test Eval:") do_eval(sess, eval_correct, test_data, image_pl, label_pl, keep_prob_pl)
def evaluate(run_once=False): with tf.Graph().as_default() as graph: examples, labels = cnn.inputs(data_type='test') logits = cnn.inference(examples) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage(config.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.merge_all_summaries() summary_writer = tf.train.SummaryWriter(config.eval_dir, graph) while True: eval_once(saver, summary_writer, top_k_op, summary_op) if run_once==True: break time.sleep(EVAL_INTERVAL_SECS)
def train(): with tf.Graph().as_default(): global_step = tf.Variable(0, trainable=False) images, labels = cnn.distorted_inputs() logits = cnn.inference(images) loss = cnn.loss(logits, labels) train_op = cnn.train(loss, global_step) summary_op = tf.merge_all_summaries() init = tf.initialize_all_variables() sess = tf.Session(config=tf.ConfigProto( log_device_placement=LOG_DEVICE_PLACEMENT)) saver = tf.train.Saver(tf.all_variables()) if tf.gfile.Exists(TRAIN_DIR): ckpt = tf.train.get_checkpoint_state(CHECKPOINT_DIR) last_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] ckpt_dir = os.path.join(CHECKPOINT_DIR,"model.ckpt-" + last_step) if ckpt and ckpt_dir: tf.gfile.DeleteRecursively(TRAIN_DIR) saver.restore(sess, ckpt_dir) assign_op = global_step.assign(int(last_step)) sess.run(assign_op) print ("Read old model from: ", ckpt_dir) print ("Starting training at: ", sess.run(global_step)) else: tf.gfile.DeleteRecursively(TRAIN_DIR) sess.run(init) print ("No model found. Starting training at: ",sess.run(global_step)) else: tf.gfile.MakeDirs(TRAIN_DIR) sess.run(init) print ("No folder found. Starting training at: ",sess.run(global_step)) print ("Writing train results to: ", TRAIN_DIR) print ("Train file: ", TRAIN_FILE) # Start the queue runners. tf.train.start_queue_runners(sess=sess) summary_writer = tf.train.SummaryWriter(TRAIN_DIR, graph_def=sess.graph_def) for step in xrange(sess.run(global_step), MAX_STEPS): start_time = time.time() _, loss_value = sess.run([train_op, loss]) duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % 10 == 0: num_examples_per_step = BATCH_SIZE examples_per_sec = num_examples_per_step / duration sec_per_batch = float(duration) format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f ' 'sec/batch)') print (format_str % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch)) if step % 10 == 0: summary_str = sess.run(summary_op) summary_writer.add_summary(summary_str, step) # Save the model checkpoint periodically. if step % 1000 == 0 or (step + 1) == MAX_STEPS: checkpoint_path = os.path.join(TRAIN_DIR, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step)
def train(networkmodel, MODEL_SAVE_PATH, MODEL_NAME): if FLAGS.job_name is None or FLAGS.job_name == '': raise ValueError('Must specify an explicit job_name !') else: print('job_name : %s' % FLAGS.job_name) if FLAGS.task_index is None or FLAGS.task_index == '': raise ValueError('Must specify an explicit task_index!') else: print('task_index : %d' % FLAGS.task_index) ps_spec = FLAGS.ps_hosts.split(',') worker_spec = FLAGS.worker_hosts.split(',') # 创建集群 # num_worker = len(worker_spec) cluster = tf.train.ClusterSpec({'ps': ps_spec, 'worker': worker_spec}) server = tf.train.Server(cluster, job_name=FLAGS.job_name, task_index=FLAGS.task_index) if FLAGS.job_name == 'ps': server.join() is_chief = (FLAGS.task_index == 0) # worker_device = '/job:worker/task%d/cpu:0' % FLAGS.task_index with tf.device(tf.train.replica_device_setter(cluster=cluster)): # 生成训练数据含标签 x, y_ = readdata.get_batch(train=True, batch_size=BATCH_SIZE, num_epochs=None) # 生成测试数据含标签 text_x, text_y = readdata.get_batch(train=False, batch_size=BATCH_SIZE, num_epochs=50) # 神经网络模型 if networkmodel: # 调整神经网络输入为一维,-1代表未知数量 x = tf.reshape(x, [-1, x.shape[1] * x.shape[2] * x.shape[3]]) # 训练部分输出 y = network.inference(x, avg_class=None, reuse=False, lamada=None) else: # 卷积模型 # 训练部分输入、输出tensor y = cnn.inference(x, False, False, regularizer=None) # 初始化,从0开始,每batch一次,增加1,创建纪录全局训练步数变量 global_step = tf.Variable(0, name='global_step', trainable=False) # 神经网络模型 if networkmodel: # 测试数据转化为一维,适应神经网络输入 text_x = tf.reshape( text_x, [-1, text_x.shape[1] * text_x.shape[2] * text_x.shape[3]]) # 测试输出 average_y = network.inference(text_x, avg_class=None, reuse=True, lamada=None) else: # 卷积网络模型测试输入、输出 average_y = cnn.inference(text_x, True, False, regularizer=None) # 对每个batch数据结果求均值,cross_entropy是一种信息熵方法,能够预测模型对真实概率分布估计的准确程度 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) # 求损失函数 loss = tf.reduce_mean(cross_entropy) # 训练操作,GradientDescentOptimizer为梯度下降算法的优化器,学习率LEARNING_RATE,minimize为最小化损失函数操作 train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( loss, global_step=global_step) # 预测数字类别是否为正确类别,tf.argmax找出真实类别 correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(text_y, 1)) # tf.reduce_mean求平均值 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # # 设计计算图 # with tf.control_dependencies([train_step]): # train_op = tf.no_op(name='train') # 生成本地的参数初始化操作init_op init_op = tf.global_variables_initializer() train_dir = tempfile.mkdtemp() sv = tf.train.Supervisor(is_chief=is_chief, logdir=train_dir, init_op=init_op, recovery_wait_secs=1, global_step=global_step) if is_chief: print('Worker %d: Initailizing session...' % FLAGS.task_index) else: print('Worker %d: Waiting for session to be initaialized...' % FLAGS.task_index) sess = sv.prepare_or_wait_for_session(server.target) print('Worker %d: Session initialization complete.' % FLAGS.task_index) time_begin = time.time() print('Traing begins @ %f' % time_begin) local_step = 0 for i in range(TRAINING_STEPS): coord = tf.train.Coordinator() # 创建一个协调器,管理线程 threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 启动所有队列线程 _, step, loss_value = sess.run([train_step, global_step, loss]) local_step += 1 now = time.time() print('%f: Worker %d: traing step %d dome (global step:%d)' % (now, FLAGS.task_index, local_step, step)) # 打印验证准确率 if (i + 1) % 100 == 0: validate_acc = sess.run(accuracy) # 设置好整个图后,启动计算accuracy print( "After %d training step(s),validation accuracy using average model is %g." % (step, validate_acc)) coord.request_stop() # 要求所有线程停止 coord.join(threads) time_end = time.time() print('Training ends @ %f' % time_end) train_time = time_end - time_begin print('Training elapsed time:%f s' % train_time) sess.close()
def predict(): filename_queue = tf.train.string_input_producer([ "../../datasets/stars_from_google_images/brad_pitt_test.jpeg" ]) # list of files to read labels = [1] reader = tf.WholeFileReader() _, value = reader.read(filename_queue) img = tf.image.decode_jpeg( value) # use png or jpg decoder based on your files. # preprocessing img = tf.image.resize_images(img, [FLAGS.image_size, FLAGS.image_size], method=tf.image.ResizeMethod.BICUBIC) img = tf.image.rgb_to_grayscale(img) img = tf.image.convert_image_dtype(img, tf.float32) img = tf.scalar_mul(2.0, img) img = img - tf.constant([1.0]) logits = cnn.inference(img) # Calculate predictions. top_k_op = tf.nn.in_top_k(logits, labels, 1) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( FLAGS.moving_average_decay) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) # Assuming model_checkpoint_path looks something like: # /my-favorite-path/cifar10_train/model.ckpt-0, # extract global_step from it. #global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend( qr.create_threads(sess, coord=coord, daemon=True, start=True)) true_count = 0 predictions = sess.run([top_k_op]) true_count += np.sum(predictions) # Compute precision @ 1. precision = true_count print('%s: precision @ 1 = %.3f' % (datetime.now(), precision)) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
def train(): with tf.Graph().as_default(): log('===== START TRAIN RUN: ' + str(datetime.now()) + '=====') global_step = tf.Variable(0, trainable=False) # get examples and labels examples, labels = cnn.inputs(data_type='train') # build graph to compute logits logits = cnn.inference(examples) # compute loss loss, losses_collection = cnn.loss(logits, labels) accuracy = cnn.accuracy(logits, labels) # train model with one batch of examples train_op = cnn.train(loss, global_step) # create saver saver = tf.train.Saver(tf.all_variables()) # build summary and init op summary_op = tf.merge_all_summaries() init_op = tf.initialize_all_variables() # start session # sess = tf.Session(config=tf.ConfigProto(log_device_placement=True)) sess = tf.Session() sess.run(init_op) # start queue runners tf.train.start_queue_runners(sess=sess) # set up summary writers train_writer = tf.train.SummaryWriter(config.train_dir, sess.graph) for step in xrange(config.max_steps): start_time = time.time() summary, loss_value, accuracy_value, _ = sess.run([summary_op, loss, accuracy, train_op]) loss_breakdown = [(str(l.op.name), sess.run(l)) for l in losses_collection] duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' if step % config.summary_every_n_steps == 0: # summaries examples_per_sec = config.batch_size / duration sec_per_batch = float(duration) train_writer.add_summary(summary, step) log_str_1 = ('%s: step %d, loss = %.3f (%.2f examples/sec; %.3f sec/batch), accuracy %.3f ') % (datetime.now(), step, loss_value, examples_per_sec, sec_per_batch, accuracy_value) log_str_1 += str(loss_breakdown) # print loss breakdown log(log_str_1) log("memory usage: {} Mb".format(float(resource.getrusage(resource.RUSAGE_SELF).ru_maxrss)/1000000.0)) if (step % config.ckpt_every_n_steps == 0) and (step>0): # save weights to file & validate checkpoint_path = os.path.join(config.checkpoint_dir, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) log("Checkpoint saved at step %d" % step)
for line in f: labels.append(line.rstrip()) test_image = [] for i in range(1, len(sys.argv)): img = Image.open(sys.argv[i]) img = img.resize((nn.IMAGE_SIZE, nn.IMAGE_SIZE)) test_image.append(np.asarray(img) / 255.0) test_image = np.asarray(test_image) images_placeholder = tf.placeholder("float", shape=(None, nn.IMAGE_SIZE, nn.IMAGE_SIZE, 3)) labels_placeholder = tf.placeholder('float', shape=(None, nn.NUM_CLASSES)) keep_prob = tf.placeholder("float") logits = nn.inference(images_placeholder, keep_prob) sess = tf.InteractiveSession() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) saver.restore(sess, backup_dir + "/model.ckpt") for i in range(len(test_image)): pred = np.argmax( logits.eval(feed_dict={ images_placeholder: [test_image[i]], keep_prob: 1.0 })[0]) print(labels[pred])
TARGET = sys.argv[1] LNAME = ['non-patio', 'patio'] IMAGE_SIZE = 64 NUM_CLASS = 2 CH_SIZE = 3 BATCH_SIZE = 600 TRAIN_FILE = ['train.csv'] MAX_STEPS = 100000 flags = tf.app.flags FLAGS = flags.FLAGS keep_prob = tf.placeholder("float") image = input_data.load_image(TARGET, image_size=IMAGE_SIZE, ch_size=CH_SIZE) #output=mynn.inference2(images,keep_prob,IMAGE_SIZE,CH_SIZE,NUM_CLASS) output = mynn.inference(image, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS) with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=0) #sess.run(tf.initialize_all_variables()) model_path = '/output' saver.restore(sess, tf.train.latest_checkpoint(model_path)) print("Model restore finished") # SummaryWriterでグラフを書く tf.train.start_queue_runners(sess) actual_res = sess.run([output], feed_dict={keep_prob: 1.0}) print('result', actual_res) print('label', np.argmax(actual_res)) print('label-name', LNAME[np.argmax(actual_res)]) print('patio value', actual_res[0][0][1])
def evaluate(test_num, test_tfrecord_file, test_pred_file): """Eval Multi-task_cnn for a number of steps.""" with tf.Graph().as_default() as g: # Get images and labels for Multi-task_cnn. images, skuid, labels, hots = cnn.inputs(test_tfrecord_file, eval_data=True, batch_size=FLAGS.test_batch_size) # Build a Graph that computes the logits predictions from the # inference model. logits = cnn.inference(images, n_cnn=5) hots = tf.cast(hots, tf.float32) logits = tf.multiply(logits, hots, name='assign_label') num_splits = tf.constant(cnn.obtain_splits(num_splits_path)) # Calculate predictions. cnn_pred = cnn.predict(logits, num_splits) origin_pred = cnn.predict(labels, num_splits) # Restore the moving average version of the learned variables for eval. variable_averages = tf.train.ExponentialMovingAverage( cnn.MOVING_AVERAGE_DECAY) variables_to_restore = variable_averages.variables_to_restore() saver = tf.train.Saver(variables_to_restore) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() summary_writer = tf.summary.FileWriter(FLAGS.eval_dir, g) with tf.Session() as sess: ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir) if ckpt and ckpt.model_checkpoint_path: # Restores from checkpoint saver.restore(sess, ckpt.model_checkpoint_path) print("restore from file") global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1] else: print('No checkpoint file found') return # Start the queue runners. coord = tf.train.Coordinator() try: threads = [] for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS): threads.extend(qr.create_threads(sess, coord=coord, daemon=True, start=True)) num_iter = int(math.ceil(test_num / FLAGS.test_batch_size)) # Compute precision @ 1. sku, cnn_predi, origin_predi = sess.run([skuid, cnn_pred, origin_pred]) record_sku_pred(sku, cnn_predi, origin_predi, num_splits, test_pred_file, 'wb') step = 0 while step < num_iter and not coord.should_stop(): sku, cnn_predi, origin_predi = sess.run([skuid, cnn_pred, origin_pred]) record_sku_pred(sku, cnn_predi, origin_predi, num_splits, test_pred_file, 'ab') step += 1 summary = tf.Summary() summary.ParseFromString(sess.run(summary_op)) # summary.value.add(tag='Precision @ 1', simple_value=precision) summary_writer.add_summary(summary, global_step) except Exception as e: # pylint: disable=broad-except coord.request_stop(e) coord.request_stop() coord.join(threads, stop_grace_period_secs=10)
BATCH_SIZE, image_size=IMAGE_SIZE, ch_size=CH_SIZE, shuffle = True, distored = True) v_images, v_labels, _ = input_data.load_cifar10( TRAIN_FILE, BATCH_SIZE, image_size=IMAGE_SIZE, ch_size=CH_SIZE, shuffle = True, distored = True) """ #output=mynn.inference2(images,keep_prob,IMAGE_SIZE,CH_SIZE,NUM_CLASS) output = mynn.inference(images, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS) validate = mynn.inference(v_images, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS, validate=True) loss = mynn.loss(output, labels) train_op = mynn.training(loss) acc = mynn.accuracy(validate, v_labels) with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=0) sess.run(tf.initialize_all_variables()) ckpt = tf.train.get_checkpoint_state(sess, '/output/') print(ckpt)
def train(networkmodel, MODEL_SAVE_PATH, MODEL_NAME): # with tf.device('/gpu:0'): with tf.device('/cpu:0'): train_start = time.time() # 生成训练数据含标签 x, y_ = readdata.get_batch(train=True, batch_size=BATCH_SIZE, num_epochs=None) # 生成测试数据含标签 text_x, text_y = readdata.get_batch(train=False, batch_size=BATCH_SIZE, num_epochs=None) # 神经网络模型 if networkmodel: # 调整神经网络输入为一维,-1代表未知数量 x = tf.reshape(x, [-1, x.shape[1] * x.shape[2] * x.shape[3]]) # 训练部分输出 y = network.inference(x, avg_class=None, reuse=False, lamada=None) else: # 卷积模型 # 训练部分输入、输出tensor y = cnn.inference(x, False, False, regularizer=None) # 初始化,从0开始,每batch一次,增加1 global_step = tf.Variable(0, trainable=False) # 神经网络模型 if networkmodel: # 测试数据转化为一维,适应神经网络输入 text_x = tf.reshape( text_x, [-1, text_x.shape[1] * text_x.shape[2] * text_x.shape[3]]) # 测试输出 average_y = network.inference(text_x, avg_class=None, reuse=True, lamada=None) else: # 卷积网络模型测试输入、输出 average_y = cnn.inference(text_x, True, False, regularizer=None) # 对每个batch数据结果求均值,cross_entropy是一种信息熵方法,能够预测模型对真实概率分布估计的准确程度 cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=y, labels=tf.argmax(y_, 1)) # 求平均值 cross_entropy_mean = tf.reduce_mean(cross_entropy) # 损失函数 loss = cross_entropy_mean # 训练操作,GradientDescentOptimizer为梯度下降算法的优化器,学习率LEARNING_RATE,minimize为最小化损失函数操作 train_step = tf.train.GradientDescentOptimizer(LEARNING_RATE).minimize( loss, global_step=global_step) # 设计计算图 with tf.control_dependencies([train_step]): train_op = tf.no_op(name='train') # 预测数字类别是否为正确类别,tf.argmax找出真实类别 correct_prediction = tf.equal(tf.argmax(average_y, 1), tf.argmax(text_y, 1)) # tf.reduce_mean求平均值 accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) # 初始化tf持久化类 saver = tf.train.Saver() # 初始化会话,并开始训练 with tf.Session() as sess: # 初始化模型的参数 sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() # 创建一个协调器,管理线程 threads = tf.train.start_queue_runners(sess=sess, coord=coord) # 启动所有队列线程 # 迭代的训练神经网络 for i in range(TRAINING_STEPS): start_time = time.time() _, loss_value, step = sess.run([train_op, loss, global_step]) # 设置好整个图后,启动计算 end_time = time.time() print('Training elapsed each step time:%f s' % (end_time - start_time)) # 打印训练损失 if (i + 1) % 10 == 0: print( "After %d training step(s), loss on training batch is %g." % (step, loss_value)) # 打印验证准确率 if (i + 1) % 100 == 0: validate_acc = sess.run(accuracy) # 设置好整个图后,启动计算accuracy print( "After %d training step(s),validation accuracy using average model is %g." % (step, validate_acc)) a = os.path.join(MODEL_SAVE_PATH, MODEL_NAME) saver.save(sess, os.path.join(MODEL_SAVE_PATH, MODEL_NAME), global_step=global_step) # 保存模型 train_end = time.time() print('Training elapsed total time:%f s' % (train_end - train_start)) coord.request_stop() # 要求所有线程停止 coord.join(threads)
import tensorflow as tf import cnn_input as cnn_input import cnn as cnn import time image, label = cnn_input.generate_image_and_label() images, labels = cnn_input.generate_images_and_labels_batch(image=image, label=label, shuffle=True) #神经网络计算出来的值 logits = cnn.inference(images) loss = cnn.loss(logits, labels) # 返回的交叉熵的均值 train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) #梯度下降 correct_predict = tf.equal(tf.argmax(logits, 1), tf.argmax(labels, 1)) accuracy = tf.reduce_mean(tf.cast(correct_predict, tf.float32)) #在训练集上的正确率 config = tf.ConfigProto() config.gpu_options.allow_growth = True t1 = time.time() with tf.Session() as sess: sess.run(tf.global_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) for i in range(10000): if i % 100 == 0: acc = sess.run(accuracy) print('epoch:%d, acc: %f' % (i, acc)) train_op = sess.run(train_step) coord.request_stop() coord.join(threads)
IMAGE_SIZE = 64 NUM_CLASS = 2 CH_SIZE = 3 BATCH_SIZE = 200 TRAIN_FILE = ['train.csv'] TEST_FILE = ['test.csv'] MAX_STEPS = 100000 flags = tf.app.flags FLAGS = flags.FLAGS keep_prob = tf.placeholder("float") v_images, v_labels, filename = input_data.load_data_for_test( TEST_FILE, BATCH_SIZE, image_size=IMAGE_SIZE, ch_size=CH_SIZE) #output=mynn.inference2(images,keep_prob,IMAGE_SIZE,CH_SIZE,NUM_CLASS) validate = mynn.inference(v_images, keep_prob, IMAGE_SIZE, CH_SIZE, NUM_CLASS) acc = mynn.accuracy(validate, v_labels) with tf.Session() as sess: saver = tf.train.Saver(max_to_keep=0) #sess.run(tf.initialize_all_variables()) model_path = '/output' saver.restore(sess, tf.train.latest_checkpoint(model_path)) print("Model restore finished") # SummaryWriterでグラフを書く tf.train.start_queue_runners(sess) acc_res, filename_res, actual_res, expect_res = sess.run( [acc, filename, validate, v_labels], feed_dict={keep_prob: 1.0}) print('accuracy', acc_res) goods = [] bads = []