Ejemplo n.º 1
0
    def get_tfrecords(self):

        # xtrain: all records
        # *_l   : partial records
        from mnist import inputs
        xtrain, _ = inputs(self.batch_size, 'train')
        xtrain_l, ytrain_l = inputs(self.batch_size, 'train_labeled')
        xtest, ytest = inputs(self.batch_size, 'test')

        return (xtrain_l, ytrain_l), xtrain, (xtest, ytest)
Ejemplo n.º 2
0
def main(train_dir, batch_size, num_batches, log_dir, checkpoint_dir=None):
    if checkpoint_dir is None:
        checkpoint_dir = log_dir

    images, labels = inputs(train_dir, False, batch_size, num_batches)
    predictions, total_loss = network(images, labels)

    tf.summary.scalar('loss', total_loss)
    predictions = tf.to_int32(tf.argmax(predictions, 1))

    tf.summary.scalar('accuracy', slim.metrics.accuracy(predictions, labels))

    # These are streaming metrics which compute the "running" metric,
    # e.g running accuracy
    metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({
        'accuracy':
        slim.metrics.streaming_accuracy(predictions, labels),
    })

    # Define the streaming summaries to write:
    for metric_name, metric_value in metrics_to_values.items():
        tf.summary.scalar(metric_name, metric_value)

    # Evaluate every 30 seconds
    slim.evaluation.evaluation_loop('',
                                    checkpoint_dir,
                                    log_dir,
                                    num_evals=1,
                                    eval_op=list(metrics_to_updates.values()),
                                    summary_op=tf.summary.merge_all(),
                                    eval_interval_secs=30,
                                    max_number_of_evaluations=100000000)
Ejemplo n.º 3
0
def main(train_dir, batch_size, num_batches, log_dir, checkpoint_dir=None):
    if checkpoint_dir is None:
        checkpoint_dir = log_dir

    images, labels = inputs(train_dir, False, batch_size, num_batches)
    predictions = lenet(images)
    predictions = tf.to_int32(tf.argmax(predictions, 1))

    tf.scalar_summary('accuracy', slim.metrics.accuracy(predictions, labels))

    # These are streaming metrics which compute the "running" metric,
    # e.g running accuracy
    metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map({
        "streaming_mse":
        slim.metrics.streaming_mean_squared_error(predictions, labels),
    })

    # Define the streaming summaries to write:
    for metric_name, metric_value in metrics_to_values.iteritems():
        tf.scalar_summary(metric_name, metric_value)

    # Evaluate every 30 seconds
    slim.evaluation.evaluation_loop('',
                                    checkpoint_dir,
                                    log_dir,
                                    num_evals=num_batches,
                                    eval_op=metrics_to_updates.values(),
                                    summary_op=tf.merge_all_summaries(),
                                    eval_interval_secs=30)
Ejemplo n.º 4
0
def train():
    images, labels = mnist.inputs(['train_img.tfrecords'], mnist.TRAIN_EXAMPLES_NUM,
                                  FLAGS.batch_size, shuffle=True)
    global_step = tf.train.get_or_create_global_step()

    logits, pred = mnist.inference(images, training=True)
    loss = mnist.loss(logits, labels)
    train_op = mnist.train(loss, global_step)
    saver = tf.train.Saver()
    with tf.Session() as sess:
        init_op = tf.group(
            tf.local_variables_initializer(),
            tf.global_variables_initializer())
        sess.run(init_op)
        ckpt = os.path.join(FLAGS.train_dir, 'model.ckpt')

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess, coord=coord)

        for i in range(1, FLAGS.max_step + 1):
            _, train_loss, predict, label = sess.run([train_op, loss, pred, labels])
            # print(predict, '\n', label)
            if i % 100 == 0:
                print('step: {}, loss: {}'.format(i, train_loss))
                # print(predict, '\n', label)
                saver.save(sess, ckpt, global_step=i)

        coord.request_stop()
        coord.join(threads)
Ejemplo n.º 5
0
def main(train_dir, batch_size, num_batches, log_dir):
    images, labels = inputs(train_dir,
                            True,
                            batch_size,
                            num_batches)
    predictions, total_loss = network(images, labels)

    tf.summary.scalar('loss', total_loss)

    optimizer = tf.train.GradientDescentOptimizer(0.001)
    train_op = slim.learning.create_train_op(total_loss, optimizer, summarize_gradients=True)

    slim.learning.train(train_op, log_dir, save_summaries_secs=10, save_interval_secs=10)
Ejemplo n.º 6
0
def evaluation():
    images, labels = mnist.inputs(['./validation_img.tfrecords'], mnist.VALIDATION_EXAMPLES_NUM,
                                  batch_size=FLAGS.batch_size, shuffle=False)
    logits, pred = mnist.inference(images, training=False)
    top_k_op = tf.nn.in_top_k(logits, labels, 1)

    saver = tf.train.Saver()

    while True:
        eval_once(saver, top_k_op)
        if FLAGS.run_once:
            break
        time.sleep(FLAGS.eval_interval_secs)
Ejemplo n.º 7
0
def main(train_dir, batch_size, num_batches, log_dir):
    images, labels = inputs(train_dir, True, batch_size, num_batches)
    predictions, total_loss = network(images, labels)

    tf.summary.scalar('loss', total_loss)

    optimizer = tf.train.MomentumOptimizer(learning_rate=0.025,
                                           momentum=0.9,
                                           use_nesterov=True)
    train_op = slim.learning.create_train_op(total_loss,
                                             optimizer,
                                             summarize_gradients=True)

    slim.learning.train(train_op,
                        log_dir,
                        save_summaries_secs=20,
                        save_interval_secs=20)
Ejemplo n.º 8
0
def main(train_dir, batch_size, num_batches, log_dir):
    images, labels = inputs(train_dir,
                            True,
                            batch_size,
                            num_batches,
                            one_hot_labels=True)
    predictions = lenet(images)

    slim.losses.softmax_cross_entropy(predictions, labels)
    total_loss = slim.losses.get_total_loss()
    tf.scalar_summary('loss', total_loss)

    optimizer = tf.train.RMSPropOptimizer(0.001, 0.9)
    train_op = slim.learning.create_train_op(total_loss,
                                             optimizer,
                                             summarize_gradients=True)

    slim.learning.train(train_op, log_dir, save_summaries_secs=20)
Ejemplo n.º 9
0
def train():
    """
	训练mnist网络
	"""
    # with tf.Graph().as_default():
    # 	global_step = tf.contrib.framework.get_or_create_global_step()
    global_step = tf.contrib.framework.get_or_create_global_step()

    #初始化所有参数
    init = tf.global_variables_initializer()

    #获取(image, label)batch pair
    image_batch, label_batch = mnist.inputs('train')

    #损失函数sparse_softmax_cross_entropy_with_logits要求rank_of_labels = rank_of_images - 1
    #对label_batch作扁平化处理
    label_batch = tf.reshape(label_batch, [50])

    #扩展image维度,从[batch, row, col]转换为[batch, row, col, depth=1]
    expand_image_batch = tf.expand_dims(image_batch, -1)

    #损失函数使用sparse_softmax_cross_entropy_with_logits(),自动完成one_hot编码转化
    #将label数据由标量转换为one_hot编码形式
    # labels_one_hot = dense_to_one_hot(label_batch, 10)

    #创建mnist模型,并计算每个batch样本的logits
    logits = mnist.inference(expand_image_batch, dropout=0.5)

    loss = mnist.loss(logits=logits, labels=label_batch)

    accuracy = mnist.train_accuracy(logits, label_batch)

    train_op = mnist.train(loss, global_step)

    class _LoggerHook(tf.train.SessionRunHook):
        """
		记录损失和运行时间日志信息
		"""
        def begin(self):
            self._step = -1
            self._start_time = time.time()

        def before_run(self, run_context):
            self._step += 1
            # self._start_time = time.time()
            #请求目标tensor的值,在after_run方法中获取
            return tf.train.SessionRunArgs([loss, accuracy])

        def after_run(self, run_context, run_values):
            if self._step % FLAGS.log_frequency == 0:
                _current_time = time.time()
                duration = _current_time - self._start_time

                self._start_time = _current_time

                #提取before_run中请求的损失和精确度值
                loss_value, accuracy_value = run_values.results
                #样本数/秒,秒/batch_size数样本
                examples_per_sec = FLAGS.batch_size * FLAGS.log_frequency / duration
                sec_per_batch = float(duration / FLAGS.log_frequency)

                #console打印训练状态数据
                #时间:步数,损失,精确度(每秒样本数,每batch样本处理时间)
                format_str = (
                    '%s: step %d, loss=%.2f, accuracy=%.2f(%.1f examples/sec, %.3f sec/batch)'
                )
                print(format_str %
                      (datetime.now(), self._step, loss_value, accuracy_value,
                       examples_per_sec, sec_per_batch))

    #最大训练步数20000,每10步打印一次输出
    #MonitoredTrainingSession默认情况下600s保存一次检查点,每100步保存一次summary
    with tf.train.MonitoredTrainingSession(
            checkpoint_dir=FLAGS.train_dir,
            hooks=[
                tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                tf.train.NanTensorHook(loss),
                _LoggerHook()
            ],
            save_checkpoint_secs=60,
            config=tf.ConfigProto(log_device_placement=False,
                                  allow_soft_placement=True)) as mon_sess:
        mon_sess.run(init)
        while not mon_sess.should_stop():
            # mon_sess.run(init)
            mon_sess.run(train_op)
Ejemplo n.º 10
0
def evaluate():
    image_batch, label_batch = mnist.inputs(data_type='test')

    image_batch = tf.expand_dims(image_batch, -1)
    label_batch = tf.reshape(label_batch, [50])

    #default value of dropout:1.0
    logits = mnist.inference(image_batch)

    #计算预测值
    top_k_op = tf.nn.in_top_k(logits, label_batch, 1)

    saver = tf.train.Saver()

    with tf.Session() as sess:
        #从检查点恢复模型
        ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)
            print(ckpt.model_checkpoint_path)
            #提取global_step变量值
            global_step = ckpt.model_checkpoint_path.split('/')[-1].split(
                '-')[-1]
        else:
            print('No Checkpoint File Found!')
            return

        graph = tf.get_default_graph()
        keep_prob = graph.get_tensor_by_name('keep_prob:0')

        #开启queue runner
        coord = tf.train.Coordinator()
        try:
            threads = []
            #为数据读取队列创建线程
            for qr in tf.get_collection(tf.GraphKeys.QUEUE_RUNNERS):
                threads.extend(
                    qr.create_threads(sess, coord, daemon=True, start=True))

            #防止样本总数无法被batch_size整除,batch()默认不接受小于设定大小的batch输出情况
            num_iter = int(math.ceil(FLAGS.num_examples / FLAGS.batch_size))
            total_example_cnt = num_iter * FLAGS.batch_size
            true_cnt = 0  #统计正确预测的数量
            step = 0

            while step < num_iter and not coord.should_stop():
                predictions = sess.run(top_k_op, feed_dict={keep_prob: 1.0})
                true_cnt += np.sum(predictions)
                step += 1

            #计算测试精确度:precision @ 1
            precision = true_cnt / total_example_cnt
            print('%s: precision @ 1 = %.3f' % (datetime.now(), precision))

            #参考的code中还有用于tensorboard的summary存储操作
        except Exception as e:
            coord.request_stop(e)

        coord.request_stop()
        #当一个线程调用了request_stop()方法后,其余线程有grace period的时间来停止(default 2min)
        #超时后由coordinator.join()报运行时异常
        coord.join(threads, stop_grace_period_secs=10)