def evaluation():
    with tf.Graph().as_default():
        n_test = cifar10_input.NUM_EXAMPLES_PER_EPOCH_FOR_EVAL
        eval_images, eval_lables = cifar10_input.inputs(DATA_DIR, BATCH_SIZE)
        eval_logits = cifar10_model.inference(eval_images)  #测试预测值
        # tf.nn.in_top_k(predictions, targets, k, name=None)
        # 每个样本的预测结果的前k个最大的数里面是否包括包含targets预测中的标签,一般取1,
        # 即取预测最大概率的索引与标签的对比
        top_k_op = tf.nn.in_top_k(eval_logits, eval_lables, 1)
        saver = tf.train.Saver()
        with tf.Session() as session:
            ckpt = tf.train.get_checkpoint_state('./signal_GPU/saver')
            if ckpt and ckpt.model_checkpoint_path:
                saver.restore(session, ckpt.model_checkpoint_path)
            coord = tf.train.Coordinator()

            threads = tf.train.start_queue_runners(sess=session, coord=coord)
            num_iter = int(n_test / BATCH_SIZE)
            true_count = 0
            for step in range(num_iter):
                predictions = session.run(top_k_op)
                true_count = true_count + np.sum(predictions)
            precision = true_count / (num_iter * BATCH_SIZE)
            print('precision=', precision)
            coord.request_stop()
            coord.join(threads)
def train():
    with tf.Graph().as_default():
        global_step = tf.Variable(0, trainable=False)

        images, labels = cifar10_model.distorted_inputs()
        logits = cifar10_model.inference(images)
        loss = cifar10_model.loss(logits, labels)
        train_op = cifar10_model.train(loss, global_step)
        init = tf.initialize_all_variables()
        sess = tf.Session(config=tf.ConfigProto(
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)
        tf.train.start_queue_runners(sess=sess)

        for step in xrange(FLAGS.max_steps):
            start_time = time.time()
            _, loss_value = sess.run([train_op, loss])
            duration = time.time() - start_time

            if step % 100 == 0:
                num_examples_per_step = FLAGS.batch_size
                examples_per_sec = num_examples_per_step / duration
                sec_per_batch = float(duration)

                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                    'sec/batch)')
                print(format_str % (datetime.now(), step, loss_value,
                                    examples_per_sec, sec_per_batch))
Beispiel #3
0
def train():
    
    my_global_step = tf.Variable(0, name='global_step', trainable=False)
    
    
    data_dir = './data/cifar-10-batches-bin/'
    log_dir = './logs/train/'
    
    images, labels = cifar10_input.read_cifar10(data_dir=data_dir,
                                                is_train=True,
                                                batch_size= BATCH_SIZE,
                                                shuffle=True)
    logits = cifar10_model.inference(images, BATCH_SIZE, n_classes=N_CLASSES)
    
    loss = cifar10_model.losses(logits, labels)
    
    
    optimizer = tf.train.GradientDescentOptimizer(learning_rate) # 定义优化器
    train_op = optimizer.minimize(loss, global_step= my_global_step) # 运行优化
    
    saver = tf.train.Saver(tf.global_variables())
    summary_op = tf.summary.merge_all()
    
    
    
    init = tf.global_variables_initializer()
    sess = tf.Session()
    sess.run(init)
    
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(sess=sess, coord=coord)
    
    summary_writer = tf.summary.FileWriter(log_dir, sess.graph)
    
    try:
        for step in np.arange(MAX_STEP):
            if coord.should_stop():
                    break
            _, loss_value = sess.run([train_op, loss])
               
            if step % 50 == 0:                 
                print ('Step: %d, loss: %.4f' % (step, loss_value))
                
            if step % 100 == 0:
                summary_str = sess.run(summary_op)
                summary_writer.add_summary(summary_str, step)                
    
            if step % 2000 == 0 or (step + 1) == MAX_STEP:
                checkpoint_path = os.path.join(log_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
                
    except tf.errors.OutOfRangeError:
        print('Done training -- epoch limit reached')
    finally:
        coord.request_stop()
        
    coord.join(threads)
    sess.close()
#%%
def train():
    # 读取图片并带入网络计算
    images, labels = cifar10_input.distorted_inputs(DATA_DIR, BATCH_SIZE)
    t_logits = cifar10_model.inference(images)
    # 损失值
    t_loss = cifar10_model.loss(t_logits, labels)
    tf.summary.scalar('loss_value', t_loss)
    # 优化器
    global_step = tf.Variable(0, trainable=False)
    t_optimizer = cifar10_model.train_step(t_loss, global_step)
    # 准确值
    t_accuracy = cifar10_model.accuracy(t_logits, labels)  # 训练集正确率计算
    tf.summary.scalar('accuracy_value', t_accuracy)

    merged = tf.summary.merge_all()
    saver = tf.train.Saver()
    Accuracy_value = []
    Loss_value = []
    # 设定定量的GPU显存使用量
    config = tf.ConfigProto()
    config.gpu_options.per_process_gpu_memory_fraction = 0.5
    with tf.Session(config=config) as session:
        session.run(tf.global_variables_initializer())
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=session, coord=coord)
        train_writer = tf.summary.FileWriter('./signal_GPU/logs',
                                             session.graph)
        for index in range(EPOCHES):
            _, loss_value, accuracy_value, summary = session.run(
                [t_optimizer, t_loss, t_accuracy, merged])
            Accuracy_value.append(accuracy_value)
            Loss_value.append(loss_value)
            if index % 1000 == 0:
                print('index:', index, ' loss_value:', loss_value,
                      ' accuracy_value:', accuracy_value)
            train_writer.add_summary(summary, index)
        saver.save(session, os.path.join('./signal_GPU/saver/', 'model.ckpt'))
        # accuracy value
        plt.figure(figsize=(20, 10))
        plt.plot(range(EPOCHES), Accuracy_value)
        plt.xlabel('training step')
        plt.ylabel('accuracy value')
        plt.title('the accuracy value of training data')
        plt.savefig('./signal_GPU/accuracy.png')
        # loss value
        plt.figure()
        plt.plot(range(EPOCHES), Loss_value)
        plt.xlabel('training value')
        plt.ylabel('loss value')
        plt.title('the value of the loss function of the training data')
        plt.savefig('./signal_GPU/loss.png')
        #
        train_writer.close()
        coord.request_stop()
        coord.join(threads)
def evaluate():
    with tf.Graph().as_default():
        
        log_dir = './logs/train/'
        test_dir = './data/cifar-10-batches-bin/'
        n_test = 10000
        
        
        # reading test data
        images, labels = cifar10_input.read_cifar10(data_dir=test_dir,
                                                    is_train=False,
                                                    batch_size= BATCH_SIZE,
                                                    shuffle=False)

        logits = cifar10_model.inference(images, BATCH_SIZE, N_CLASSES)
        # 比较真实label和预测值
        top_k_op = tf.nn.in_top_k(logits, labels, 1)
        saver = tf.train.Saver(tf.global_variables())
        
        with tf.Session() as sess:
            # 读取模型文件
            print("Reading checkpoints...")
            ckpt = tf.train.get_checkpoint_state(log_dir)
            if ckpt and ckpt.model_checkpoint_path:
                global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                saver.restore(sess, ckpt.model_checkpoint_path)
                print('Loading success, global_step is %s' % global_step)
            else:
                print('No checkpoint file found')
                return
        
            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(sess = sess, coord = coord)
            
            try:
                num_iter = int(math.ceil(n_test / BATCH_SIZE)) # 10000/64 取整
                true_count = 0
                total_sample_count = num_iter * BATCH_SIZE # 我们测试的数量
                step = 0

                while step < num_iter and not coord.should_stop():
                    predictions = sess.run([top_k_op])
                    true_count += np.sum(predictions)
                    step += 1
                    precision = true_count / total_sample_count
                print('precision = %.3f' % precision)
            except Exception as e:
                coord.request_stop(e)
            finally:
                coord.request_stop()
                coord.join(threads)
    
#%%
def model_fn(features, labels, mode, params):
    logits = cifar10_model.inference(image_batch=features,
                                     batch_size=params.get('batch_size'))
    loss = cifar10_model.loss(logits, labels)
    train_op = cifar10_model.train(loss, batch_size=params.get('batch_size'))

    if mode == tf.estimator.ModeKeys.TRAIN:
        logging_hook = tf.train.LoggingTensorHook({'loss': loss},
                                                  every_n_iter=1000)
        return tf.estimator.EstimatorSpec(mode,
                                          loss=loss,
                                          train_op=train_op,
                                          training_hooks=[logging_hook])
Beispiel #7
0
def run_predicting(class_names, images, real_labels=None):
    """
    Run image predicting. Use matplotlib to draw the predicting results.
    :param class_names: names of labels.
    :param images: A numpy array of shape [NUM, HEIGHT, WIDTH, DEPTH], float32, represents the images to predict.
    :param real_labels: A numpy array of shape [NUM], int32, each element represents the class id of
    the image to predict.
    :return: if `pred_labels` is not none, returns the accuracy, else return none.
    """

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        # images = sess.run(central_crop_images(images))

        # logits, _ = cifar10_model.inference(tf.convert_to_tensor(images), iteration=None, is_test=tf.convert_to_tensor(True), keep_prob=1.0)
        logits, _ = cifar10_model.inference(central_crop_images(images),
                                            iteration=None,
                                            is_test=tf.convert_to_tensor(True),
                                            keep_prob=1.0)
        pred_op = prediction(logits)

        ckpt = tf.train.get_checkpoint_state(cifar10_train.LOG_DIR)
        saver = tf.train.Saver()

        if ckpt and ckpt.model_checkpoint_path:
            saver.restore(sess, ckpt.model_checkpoint_path)

            pred_labels = sess.run(pred_op)
            pred_labels = [class_names[i] for i in pred_labels]
            if real_labels is not None:
                eval_op = cifar10_eval.evaluation(logits, real_labels)
                accuracy = sess.run(eval_op)
                print("accuracy: %f" % accuracy)
            rlbl = [class_names[i] for i in real_labels]
            datavis.data_vis(images, pred_labels, rlbl)
        else:
            print('You must train before use!')
Beispiel #8
0
def run_training():
    cifar10_data = Cifar10Data('./input_data')

    images_pl = tf.placeholder(tf.float32, [
        None, cifar10_model.IMAGE_BATCH_HEIGHT,
        cifar10_model.IMAGE_BATCH_WIDTH, cifar10_model.IMAGE_BATCH_DEPTH
    ])
    labels_pl = tf.placeholder(tf.int32)
    keep_prob_pl = tf.placeholder(tf.float32)
    learning_rate_pl = tf.placeholder(tf.float32)
    is_test_pl = tf.placeholder(tf.bool)
    iter_pl = tf.placeholder(tf.int32)

    with tf.Session() as sess:
        logits, update_ema = cifar10_model.inference(images_pl, iter_pl,
                                                     is_test_pl, keep_prob_pl)
        total_loss = cifar10_model.loss(logits, labels_pl)
        train_op = cifar10_model.train(total_loss, learning_rate_pl)
        eval_op = cifar10_eval.evaluation(logits, labels_pl)

        saver = tf.train.Saver()
        summary = tf.summary.merge_all()
        summary_writer = tf.summary.FileWriter(LOG_DIR, sess.graph)
        sess.run(tf.global_variables_initializer())

        # learning rate decay
        max_learning_rate = 0.02  # 0.003
        min_learning_rate = 0.0001
        decay_speed = 1600.0  # 2000.0
        for step in xrange(MAX_STEPS):
            print('step %d/%d' % (step, MAX_STEPS))
            start_time = time.time()
            images_feed, labels_feed = cifar10_data.random_training_batch(
                cifar10_model.BATCH_SIZE)
            learning_rate = min_learning_rate + (max_learning_rate -
                                                 min_learning_rate) * math.exp(
                                                     -step / decay_speed)

            images_feed = sess.run(
                cifar10_model.random_distort_images(images_feed))

            feed_dict = {
                images_pl: images_feed,
                labels_pl: labels_feed,
                keep_prob_pl: 0.75,
                learning_rate_pl: learning_rate,
                is_test_pl: False,
                iter_pl: step
            }
            sess.run(train_op, feed_dict=feed_dict)

            feed_dict = {
                images_pl: images_feed,
                labels_pl: labels_feed,
                keep_prob_pl: 1.0,
                learning_rate_pl: learning_rate,
                is_test_pl: False,
                iter_pl: step
            }
            sess.run(update_ema, feed_dict=feed_dict)

            duration = time.time() - start_time

            # Write the summaries and print an overview fairly often.
            if (step + 1) % 100 == 0 or (step + 1) == MAX_STEPS:
                train_eval_val, loss_value = sess.run([eval_op, total_loss],
                                                      feed_dict=feed_dict)
                print('Step %d: loss = %.2f, lr = %f (%.3f sec)' %
                      (step + 1, loss_value, learning_rate, duration))
                print('Training Data Eval: %.4f' % train_eval_val)

                summary_str = sess.run(summary, feed_dict=feed_dict)
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()

                # Evaluate the model periodically.
                # feed_dict = {images_pl: data_sets.testing_image,
                #             labels_pl: data_sets.testing_label}
                # test_eval_val = sess.run(eval_op, feed_dict=feed_dict)
                test_eval_val, test_loss_val = cifar10_eval.mass_evaluation(
                    cifar10_data, sess, eval_op, total_loss, images_pl,
                    labels_pl, keep_prob_pl, is_test_pl)
                print('Testing Data Eval: ' + str(test_eval_val) + '  loss: ' +
                      str(test_loss_val))

            # Save a checkpoint periodically.
            if (step + 1) % 1000 == 0 or (step + 1) == MAX_STEPS:
                checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt')
                saver.save(sess, checkpoint_file, global_step=step)

        summary_writer.close()
Beispiel #9
0
def main(argv=None):
    global_step = tf.Variable(0, trainable=False)

    train_placeholder = tf.placeholder(tf.float32,
                                       shape=[32, 32, 3],
                                       name='input_image')
    label_placeholder = tf.placeholder(tf.int32, shape=[1], name='label')

    # (width, height, depth) -> (batch, width, height, depth)
    image_node = tf.expand_dims(train_placeholder, 0)

    logits = model.inference(image_node)
    total_loss = _loss(logits, label_placeholder)

    train_op = _train(total_loss, global_step)

    summary = tf.summary.merge_all()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        total_duration = 0

        writer = tf.summary.FileWriter('./tensorflow_log', sess.graph)
        summary_i = 0

        for epoch in range(1, FLAGS.epoch + 1):
            start_time = time.time()

            for file_index in range(5):
                print('Epoch %d: %s' % (epoch, filenames[file_index]))
                reader = Cifar10Reader(filenames[file_index])

                for index in range(10000):
                    if index % 100 == 0:
                        accurancy = 0.0
                        accurate_count = 0
                        accurate_tried_count = 0

                    image = reader.read(index)

                    _, loss_value, logits_value = sess.run(
                        [train_op, total_loss, logits],
                        feed_dict={
                            train_placeholder: image.byte_array,
                            label_placeholder: image.label
                        })

                    accurate_tried_count += 1
                    result = np.argmax(logits_value, 1)
                    if ("%d" % image.label) == ("%d" % result):
                        accurate_count += 1

                    assert not np.isnan(loss_value), \
                      'Model diverged with loss = NaN'

                    if index % 100 == 99:
                        print('[%d]: %r' % (image.label, logits_value))
                        print('Inference: %r' % result)
                        accurancy = accurate_count / accurate_tried_count
                        print('Accurancy: %f' % accurancy)
                        summary_i += 1
                        summary_str = sess.run(summary,
                                               feed_dict={
                                                   train_placeholder:
                                                   image.byte_array,
                                                   label_placeholder:
                                                   image.label
                                               })

                        writer.add_summary(summary_str, summary_i)
                        writer.flush()

                reader.close()

            duration = time.time() - start_time
            total_duration += duration

        print('Total duration = %d sec' % total_duration)
Beispiel #10
0
                                        num_parallel_calls=4,
                                        num_epoch=1)
    train_iterator = train_dataset.make_initializable_iterator()
    test_iterator = test_dataset.make_initializable_iterator()

    train_handle = train_iterator.string_handle()
    test_handle = test_iterator.string_handle()
    # build public data entrance
    handle = tf.placeholder(tf.string, shape=[])
    iterator = tf.data.Iterator.from_string_handle(handle, train_iterator.output_types)
    labels, images = iterator.get_next()

# set global step counter
global_step = tf.Variable(initial_value=0, trainable=False, name='global_step')
# inference
logits_before_softmax = inference(images)

with tf.name_scope('train_loss'):
    # compute loss function
    batch_loss, total_loss = loss_func(labels, logits_before_softmax)
    # summary the train loss
    tf.summary.scalar(name='train_loss', tensor=batch_loss)

with tf.name_scope('optimization'):
    # define a placeholder to control learning raw
    lr = tf.placeholder(dtype=tf.float32, shape=[], name='learning_rate')
    # optimize the model
    train_op = tf.train.AdamOptimizer(learning_rate=1e-3,
                                       beta1=0.9,
                                       beta2=0.999,
                                       epsilon=1e-08).minimize(batch_loss, global_step=global_step)
Beispiel #11
0
def train():
    """
    train cifar10 for a number of steps
    """

    with tf.Graph().as_default():
        global_step = tf.train.get_checkpoint_state()

        # get images and labels for cifar-10
        # force input pipeline to CPU:0 to avoid operations sometimes ending up
        # on GPU and resulting in a show down.
        with tf.device('/cpu:0'):
            images, labels = cifar10_model.distorted_inputs()

        # build a graph that computes the logits predictions from
        # the inference model.
        logits = cifar10_model.inference(images)

        # calculate loss.
        loss = cifar10_model.loss(logits, labels)

        # build a graph that trains the model with one batch of examples
        # and updates the model parameters
        train_op = cifar10_model.train(loss, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """
            Logs loss and runtime. 
            """
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)  # asks for loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
Beispiel #12
0
def main(argv=None):
  global_step = tf.Variable(0, trainable=False)
  
  train_placeholder = tf.placeholder(tf.float32,
                                     shape=[32, 32, 3],
                                     name='input_image')
  label_placeholder = tf.placeholder(tf.int32, shape=[1], name='label')
  
  # (width, height, depth) -> (batch, width, height, depth)
  image_node = tf.expand_dims(train_placeholder, 0)
  
  logits = model.inference(image_node)
  total_loss = _loss(logits, label_placeholder)

  train_op = _train(total_loss, global_step)

  summary = tf.summary.merge_all()

  with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())

    total_duration = 0

    writer = tf.summary.FileWriter('./tensorflow_log', sess.graph)
    summary_i = 0
    
    for epoch in range(1, FLAGS.epoch + 1):
      start_time = time.time()
      
      for file_index in range(5):
        print('Epoch %d: %s' % (epoch, filenames[file_index]))
        reader = Cifar10Reader(filenames[file_index])

        for index in range(10000):
          if index % 100 == 0:
            accurancy = 0.0
            accurate_count = 0
            accurate_tried_count = 0

          image = reader.read(index)
          
          _, loss_value, logits_value = sess.run(
            [train_op, total_loss, logits],
            feed_dict={
              train_placeholder: image.byte_array,
              label_placeholder: image.label
            })

          accurate_tried_count += 1
          result = np.argmax(logits_value, 1)
          if ("%d" % image.label) == ("%d" % result):
            accurate_count += 1

          assert not np.isnan(loss_value), \
            'Model diverged with loss = NaN'

          if index % 100 == 99:
            print('[%d]: %r' % (image.label, logits_value))
            print('Inference: %r' % result)
            accurancy = accurate_count / accurate_tried_count
            print('Accurancy: %f' % accurancy)
            summary_i += 1
            summary_str = sess.run(summary,
                feed_dict={
                train_placeholder: image.byte_array,
                label_placeholder: image.label
              })

            writer.add_summary(summary_str, summary_i)
            writer.flush()

        reader.close()

      duration = time.time() - start_time
      total_duration += duration
    
    print('Total duration = %d sec' % total_duration)