Beispiel #1
0
def train(train_data_set, val_data_set, load_model_path, save_model_path,
          phases_names):

    x_ROI = tf.placeholder(tf.float32,
                           shape=[
                               None, net_config.ROI_SIZE_W,
                               net_config.ROI_SIZE_H,
                               net_config.IMAGE_CHANNEL * len(phases_names)
                           ],
                           name='input_x')

    x_EXPAND = tf.placeholder(tf.float32,
                              shape=[
                                  None, net_config.EXPAND_SIZE_W,
                                  net_config.EXPAND_SIZE_H,
                                  net_config.IMAGE_CHANNEL * len(phases_names)
                              ])
    y_ = tf.placeholder(tf.float32, shape=[
        None,
    ])
    tf.summary.histogram('label', y_)
    global_step = tf.Variable(0, trainable=False)
    # variable_average = tf.train.ExponentialMovingAverage(
    #     sub_Config.MOVING_AVERAGE_DECAY,
    #     global_step
    # )
    # vaeriable_average_op = variable_average.apply(tf.trainable_variables())
    # regularizer = tf.contrib.layers.l2_regularizer(sub_Config.REGULARIZTION_RATE)
    is_training = tf.placeholder('bool', [], name='is_training')
    FLAGS = tf.app.flags.FLAGS
    tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar-data',
                               'where to store the dataset')
    tf.app.flags.DEFINE_boolean(
        'use_bn', True, 'use batch normalization. otherwise use biases')
    y = inference_small([x_ROI, x_EXPAND],
                        is_training=is_training,
                        num_classes=net_config.OUTPUT_NODE,
                        use_bias=FLAGS.use_bn,
                        phase_names=phases_names,
                        num_blocks=3)
    tf.summary.histogram('logits', tf.argmax(y, 1))
    loss_ = loss(logits=y, labels=tf.cast(y_, np.int32))
    tf.summary.scalar('loss', loss_)
    # opt = tf.train.MomentumOptimizer(sub_Config.LEARNING_RATE, sub_Config.MOMENTUM)
    # grads = opt.compute_gradients(loss_)
    # apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
    # train_op = apply_gradient_op
    train_op = tf.train.GradientDescentOptimizer(
        learning_rate=net_config.LEARNING_RATE).minimize(
            loss=loss_, global_step=global_step)
    # with tf.control_dependencies([train_step, vaeriable_average_op]):
    #     train_op = tf.no_op(name='train')

    with tf.variable_scope('accuracy'):
        accuracy_tensor = tf.reduce_mean(
            tf.cast(tf.equal(x=tf.argmax(y, 1), y=tf.cast(y_, tf.int64)),
                    tf.float32))
        tf.summary.scalar('accuracy', accuracy_tensor)
    saver = tf.train.Saver()
    merge_op = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if load_model_path:
            saver.restore(sess, load_model_path)
        writer = tf.summary.FileWriter('./log/fine_tuning/train',
                                       tf.get_default_graph())
        val_writer = tf.summary.FileWriter('./log/fine_tuning/val',
                                           tf.get_default_graph())
        for i in range(net_config.ITERATOE_NUMBER):
            images, images_expand, labels = train_data_set.get_next_batch(
                net_config.BATCH_SIZE, net_config.DISTRIBUTION)
            _, loss_value, accuracy_value, summary, global_step_value = sess.run(
                [train_op, loss_, accuracy_tensor, merge_op, global_step],
                feed_dict={
                    x_ROI: images,
                    x_EXPAND: images_expand,
                    y_: labels
                })
            writer.add_summary(summary=summary, global_step=global_step_value)
            if net_config.OUTPUT_NODE == 2 and (
                    global_step_value -
                    1) % 100 == 0 and i != 0 and save_model_path is not None:
                # 保存模型 二分类每100步保存一下模型
                import os
                save_path = os.path.join(save_model_path,
                                         str(global_step_value))
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                save_path += '/'
                print 'mode saved path is ', save_path
                saver.save(sess, save_path)
            if net_config.OUTPUT_NODE == 5 and (
                    global_step_value -
                    1) % 100 == 0 and i != 0 and save_model_path is not None:
                # 保存模型 五分类每500步保存一下模型
                import os
                save_path = os.path.join(save_model_path,
                                         str(global_step_value))
                if not os.path.exists(save_path):
                    os.mkdir(save_path)
                save_path += '/'
                print 'mode saved path is ', save_path
                saver.save(sess, save_path)
            if i % 100 == 0:
                validation_images, validation_images_expand, validation_labels = val_data_set.get_next_batch(
                )

                validation_accuracy, validation_loss, summary, logits = sess.run(
                    [accuracy_tensor, loss_, merge_op, y],
                    feed_dict={
                        x_ROI: validation_images,
                        x_EXPAND: validation_images_expand,
                        y_: validation_labels
                    })
                calculate_acc_error(logits=np.argmax(logits, 1),
                                    label=validation_labels,
                                    show=True)
                binary_acc = acc_binary_acc(
                    logits=np.argmax(logits, 1),
                    label=validation_labels,
                )
                val_writer.add_summary(summary, global_step_value)
                print 'step is %d,training loss value is %g,  accuracy is %g ' \
                      'validation loss value is %g, accuracy is %g, binary_acc is %g' % \
                      (global_step_value, loss_value, accuracy_value, validation_loss, validation_accuracy, binary_acc)
        writer.close()
        val_writer.close()
Beispiel #2
0
def train(training_set, training_labels):
    """Train on dataset for a number of steps."""
    with tf.Graph().as_default(), tf.device('/gpu:0'):
        # Create a variable to count the number of train() calls. This equals the
        # number of batches processed * FLAGS.num_gpus.
        global_step = tf.Variable(0, name="global_step", trainable=False)

        # get num of examples in training set
        #    dataset_num_examples = training_set.shape[0]

        # Calculate the learning rate schedule.
        #    num_batches_per_epoch = (dataset_num_examples / FLAGS.batch_size)

        #    decay_steps = int(num_batches_per_epoch * FLAGS.num_epochs_per_decay)

        # Decay the learning rate exponentially based on the number of steps.
        '''
    lr = tf.train.exponential_decay(FLAGS.initial_learning_rate,
                                    global_step,
                                    decay_steps,
                                    FLAGS.learning_rate_decay_factor,
                                    staircase=True)
    '''
        lr_placeholder = tf.placeholder(dtype=tf.float32, shape=[])

        # Create an optimizer that performs gradient descent.
        #opt = tf.train.AdamOptimizer(lr)
        opt = tf.train.MomentumOptimizer(lr_placeholder, MOMENTUM)

        #fetch the data batch from training set
        images, labels = cifar10.placeholder_inputs(FLAGS.batch_size)
        logits = resnet.inference(images,
                                  FLAGS.num_residual_blocks,
                                  reuse=False)

        #calc the loss and gradients
        loss = resnet.loss(logits, labels)
        regu_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        total_loss = tf.add_n([loss] + regu_losses)

        grads = opt.compute_gradients(total_loss)

        # Apply the gradients to adjust the shared variables.
        apply_gradients_op = opt.apply_gradients(grads,
                                                 global_step=global_step)

        with tf.control_dependencies([apply_gradients_op]):
            train_op = tf.identity(total_loss, name='train_op')

        # Create a saver.
        saver = tf.train.Saver(tf.global_variables())

        # Build the summary operation from the last tower summaries.
        summary_op = tf.summary.merge_all()

        # For testing trained model
        #    test_size = testset.num_examples
        #    test_images_placeholder, test_labels_placeholder = mnist.placeholder_inputs(FLAGS.batch_size)
        #    logits_test = mnist.inference(test_images_placeholder, train=False)
        #pred = mnist.predictions(logits_test)
        validation_accuracy = tf.reduce_sum(resnet.evaluation(
            logits, labels)) / tf.constant(FLAGS.batch_size)
        # Build an initialization operation to run below.
        init = tf.global_variables_initializer()

        # Start running operations on the Graph. allow_soft_placement must be set to
        # True to build towers on GPU, as some of the ops do not have GPU
        # implementations.
        sess = tf.Session(config=tf.ConfigProto(
            allow_soft_placement=True,
            log_device_placement=FLAGS.log_device_placement))
        sess.run(init)

        # these two parameters is used to measure when to enter next epoch
        local_data_batch_idx = 0
        epoch_counter = 0

        # Start the queue runners.
        tf.train.start_queue_runners(sess=sess)
        for step in range(FLAGS.max_steps):
            # change the API for new aug method
            epoch_counter, local_data_batch_idx, feed_dict = cifar10.fill_feed_dict(
                training_set, training_labels, images, labels,
                FLAGS.batch_size, local_data_batch_idx, epoch_counter,
                FLAGS.init_lr, lr_placeholder)

            start_time = time.time()
            _, loss_value, acc = sess.run(
                [train_op, total_loss, validation_accuracy],
                feed_dict=feed_dict)

            duration = time.time() - start_time

            assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

            examples_per_sec = FLAGS.batch_size / float(duration)
            format_str = ('%s: step %d, loss = %.8f (%.1f examples/sec; %.3f '
                          'sec/batch); acc=%.4f')
            tf.logging.info(format_str % (datetime.now(), step, loss_value,
                                          examples_per_sec, duration, acc))
            tf.logging.info("Data batch index: %s, Current epoch idex: %s" %
                            (str(epoch_counter), str(local_data_batch_idx)))

            if step == FLAGS.decay_step0 or step == FLAGS.decay_step1:
                FLAGS.init_lr = 0.1 * FLAGS.init_lr

            if step % 2000 == 0 or (step + 1) == FLAGS.max_steps:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)
Beispiel #3
0
def train(train_data_set, val_data_set, load_model_path, save_model_path):
    x = tf.placeholder(
        tf.float32,
        shape=[
            None,
            sub_Config.IMAGE_W,
            sub_Config.IMAGE_H,
            sub_Config.IMAGE_CHANNEL
        ],
        name='input_x'
    )
    y_ = tf.placeholder(
        tf.float32,
        shape=[
            None,
        ]
    )
    tf.summary.histogram(
        'label',
        y_
    )
    # global_step = tf.Variable(0, trainable=False)
    # variable_average = tf.train.ExponentialMovingAverage(
    #     sub_Config.MOVING_AVERAGE_DECAY,
    #     global_step
    # )
    # vaeriable_average_op = variable_average.apply(tf.trainable_variables())
    # regularizer = tf.contrib.layers.l2_regularizer(sub_Config.REGULARIZTION_RATE)
    is_training = tf.placeholder('bool', [], name='is_training')
    FLAGS = tf.app.flags.FLAGS
    tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar-data',
                               'where to store the dataset')
    tf.app.flags.DEFINE_boolean('use_bn', True, 'use batch normalization. otherwise use biases')
    y = inference_small(x, is_training=is_training,
                        num_classes=sub_Config.OUTPUT_NODE,
                        use_bias=FLAGS.use_bn,
                        num_blocks=3)
    tf.summary.histogram(
        'logits',
        tf.argmax(y, 1)
    )
    loss_ = loss(
        logits=y,
        labels=tf.cast(y_, np.int32)

    )
    tf.summary.scalar(
        'loss',
        loss_
    )
    train_op = tf.train.GradientDescentOptimizer(
        learning_rate=sub_Config.LEARNING_RATE
    ).minimize(
        loss=loss_,
        # global_step=global_step
    )
    # with tf.control_dependencies([train_step, vaeriable_average_op]):
    #     train_op = tf.no_op(name='train')

    with tf.variable_scope('accuracy'):
        accuracy_tensor = tf.reduce_mean(
            tf.cast(
                tf.equal(x=tf.argmax(y, 1), y=tf.cast(y_, tf.int64)),
                tf.float32
            )
        )
        tf.summary.scalar(
            'accuracy',
            accuracy_tensor
        )
    saver = tf.train.Saver()
    merge_op = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())

        if load_model_path:
            saver.restore(sess, load_model_path)
        writer = tf.summary.FileWriter('./log/fine_tuning/train', tf.get_default_graph())
        val_writer = tf.summary.FileWriter('./log/fine_tuning/val', tf.get_default_graph())
        for i in range(sub_Config.ITERATOE_NUMBER):
            images, labels = train_data_set.images, train_data_set.labels
            images = changed_shape(images, [
                    len(images),
                    sub_Config.IMAGE_W,
                    sub_Config.IMAGE_W,
                    sub_Config.IMAGE_CHANNEL
                ])
            if i == 0:
                from PIL import Image
                image = Image.fromarray(np.asarray(images[0, :, :, 0], np.uint8))
                image.show()
            _, loss_value, accuracy_value, summary = sess.run(
                [train_op, loss_, accuracy_tensor, merge_op],
                feed_dict={
                    x: images,
                    y_: labels
                }
            )
            writer.add_summary(
                summary=summary,
                global_step=i
            )
            if i % 1000 == 0 and i != 0 and save_model_path is not None:
                # 保存模型
                saver.save(sess, save_model_path)
            if i % 100 == 0:
                validation_images, validation_labels = val_data_set.images, val_data_set.labels
                validation_images = changed_shape(
                    validation_images,
                    [
                        len(validation_images),
                        sub_Config.IMAGE_W,
                        sub_Config.IMAGE_W,
                        1
                    ]
                )
                validation_accuracy, validation_loss, summary, logits = sess.run(
                    [accuracy_tensor, loss_, merge_op, y],
                    feed_dict={
                        x: validation_images,
                        y_: validation_labels
                    }
                )
                calculate_acc_error(
                    logits=np.argmax(logits, 1),
                    label=validation_labels,
                    show=True
                )
                binary_acc = acc_binary_acc(
                    logits=np.argmax(logits, 1),
                    label=validation_labels,
                )
                val_writer.add_summary(summary, i)
                print 'step is %d,training loss value is %g,  accuracy is %g ' \
                      'validation loss value is %g, accuracy is %g, binary_acc is %g' % \
                      (i, loss_value, accuracy_value, validation_loss, validation_accuracy, binary_acc)
        writer.close()
        val_writer.close()
def train():
    global parameters
    config = tf.ConfigProto(allow_soft_placement=True,
                            log_device_placement=FLAGS.log_device_placement)
    if device_str.find('cpu') >= 0:  # cpu version
        num_threads = os.getenv('OMP_NUM_THREADS', 1)
        config = tf.ConfigProto(allow_soft_placement=True,
                                intra_op_parallelism_threads=int(num_threads))
    with tf.Graph().as_default(), tf.device(get_device_str(
            FLAGS.device_id)), tf.Session(config=config) as sess:
        images, labels = cifar10_input.inputs(False, FLAGS.data_dir,
                                              FLAGS.batch_size)
        print('Images: ', images)

        #logits = inference(images, is_training=True, num_blocks=9)
        logits = inference_small(images, is_training=True, num_blocks=9)
        # Add a simple objective so we can calculate the backward pass.
        loss_value = loss(logits, labels)
        # Compute the gradient with respect to all the parameters.
        lr = 0.01
        #grad = tf.train.GradientDescentOptimizer(lr).minimize(loss_value)
        grad = tf.train.MomentumOptimizer(lr, 0.9).minimize(loss_value)

        # Create a saver.
        saver = tf.train.Saver(tf.all_variables())

        # Build an initialization operation.
        init = tf.initialize_all_variables()
        # Start running operations on the Graph.
        sess.run(init)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        real_batch_size = FLAGS.batch_size
        num_batches_per_epoch = int(
            (EPOCH_SIZE + real_batch_size - 1) / real_batch_size)
        iterations = FLAGS.epochs * num_batches_per_epoch
        average_batch_time = 0.0

        epochs_info = []
        average_loss = 0.0
        for step in xrange(iterations):
            start_time = time.time()
            _, loss_v = sess.run([grad, loss_value])
            duration = time.time() - start_time
            average_batch_time += float(duration)
            average_loss += loss_v
            assert not np.isnan(loss_v), 'Model diverged with loss = NaN'
            if step % FLAGS.log_step == 0:
                examples_per_sec = FLAGS.batch_size / duration
                sec_per_batch = float(duration)
                format_str = (
                    '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f sec/batch)'
                )
                print(format_str % (datetime.now(), step, loss_v,
                                    examples_per_sec, sec_per_batch))
            if step > 0 and step % (FLAGS.eval_step *
                                    num_batches_per_epoch) == 0:
                average_loss /= num_batches_per_epoch * FLAGS.eval_step
                epochs_info.append(
                    '%d:_:%s' %
                    (step /
                     (FLAGS.eval_step * num_batches_per_epoch), average_loss))
                average_loss = 0.0
            if step == iterations - 1:
                checkpoint_path = os.path.join(FLAGS.train_dir, 'model.ckpt')
                saver.save(sess, checkpoint_path, global_step=step)

        coord.request_stop()
        coord.join(threads)
        average_batch_time /= iterations
        print 'average_batch_time: ', average_batch_time
        print('epoch_info: %s' % ','.join(epochs_info))
def val(val_data_set, load_model_path, phases_names):

    x_ROI = tf.placeholder(tf.float32,
                           shape=[
                               None, net_config.ROI_SIZE_W,
                               net_config.ROI_SIZE_H,
                               net_config.IMAGE_CHANNEL * len(phases_names)
                           ],
                           name='input_x')

    x_EXPAND = tf.placeholder(tf.float32,
                              shape=[
                                  None, net_config.EXPAND_SIZE_W,
                                  net_config.EXPAND_SIZE_H,
                                  net_config.IMAGE_CHANNEL * len(phases_names)
                              ])
    y_ = tf.placeholder(tf.float32, shape=[
        None,
    ])
    tf.summary.histogram('label', y_)
    global_step = tf.Variable(0, trainable=False)
    # variable_average = tf.train.ExponentialMovingAverage(
    #     sub_Config.MOVING_AVERAGE_DECAY,
    #     global_step
    # )
    # vaeriable_average_op = variable_average.apply(tf.trainable_variables())
    # regularizer = tf.contrib.layers.l2_regularizer(sub_Config.REGULARIZTION_RATE)
    is_training = tf.placeholder('bool', [], name='is_training')
    FLAGS = tf.app.flags.FLAGS
    tf.app.flags.DEFINE_string('data_dir', '/tmp/cifar-data',
                               'where to store the dataset')
    tf.app.flags.DEFINE_boolean(
        'use_bn', True, 'use batch normalization. otherwise use biases')
    y = inference_small([x_ROI, x_EXPAND],
                        is_training=is_training,
                        num_classes=net_config.OUTPUT_NODE,
                        use_bias=FLAGS.use_bn,
                        phase_names=phases_names,
                        num_blocks=3)
    tf.summary.histogram('logits', tf.argmax(y, 1))
    loss_ = loss(logits=y, labels=tf.cast(y_, np.int32))
    tf.summary.scalar('loss', loss_)

    with tf.variable_scope('accuracy'):
        accuracy_tensor = tf.reduce_mean(
            tf.cast(tf.equal(x=tf.argmax(y, 1), y=tf.cast(y_, tf.int64)),
                    tf.float32))
        tf.summary.scalar('accuracy', accuracy_tensor)
    saver = tf.train.Saver()
    merge_op = tf.summary.merge_all()
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        if load_model_path:
            saver.restore(sess, load_model_path)

        validation_images, validation_images_expand, validation_labels = val_data_set.get_next_batch(
        )

        validation_accuracy, validation_loss, summary, logits = sess.run(
            [accuracy_tensor, loss_, merge_op, y],
            feed_dict={
                x_ROI: validation_images,
                x_EXPAND: validation_images_expand,
                y_: validation_labels
            })
        calculate_acc_error(logits=np.argmax(logits, 1),
                            label=validation_labels,
                            show=True)
        binary_acc = acc_binary_acc(
            logits=np.argmax(logits, 1),
            label=validation_labels,
        )

        print 'validation loss value is %g, accuracy is %g, binary_acc is %g' % \
              (validation_loss, validation_accuracy, binary_acc)
def train():
    """Train CIFAR-10 for a number of steps."""
    with tf.Graph().as_default():
        global_step = tf.train.get_or_create_global_step()

        # Get images and labels for CIFAR-10.
        # Force input pipeline to CPU:0 to avoid operations sometimes ending up on
        # GPU and resulting in a slow down.
        with tf.device('/cpu:0'):
            images, labels = resnet.distorted_inputs()

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits, tensor_list = resnet.inference(images)

        # Calculate loss.
        loss = resnet.loss(logits, labels)

        # Build a Graph that trains the model with one batch of examples and
        # updates the model parameters.
        train_op, _ = resnet.train(loss, tensor_list, global_step)

        class _LoggerHook(tf.train.SessionRunHook):
            """Logs loss and runtime."""
            def begin(self):
                self._step = -1
                self._start_time = time.time()

            def before_run(self, run_context):
                self._step += 1
                return tf.train.SessionRunArgs(loss)  # Asks for loss value.

            def after_run(self, run_context, run_values):
                if self._step % FLAGS.log_frequency == 0:
                    current_time = time.time()
                    duration = current_time - self._start_time
                    self._start_time = current_time

                    loss_value = run_values.results
                    examples_per_sec = FLAGS.log_frequency * FLAGS.batch_size / duration
                    sec_per_batch = float(duration / FLAGS.log_frequency)

                    format_str = (
                        '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                        'sec/batch)')
                    print(format_str % (datetime.now(), self._step, loss_value,
                                        examples_per_sec, sec_per_batch))

        start = time.time()
        with tf.train.MonitoredTrainingSession(
                checkpoint_dir=FLAGS.train_dir,
                hooks=[
                    tf.train.StopAtStepHook(last_step=FLAGS.max_steps),
                    tf.train.NanTensorHook(loss),
                    _LoggerHook()
                ],
                config=tf.ConfigProto(log_device_placement=FLAGS.
                                      log_device_placement)) as mon_sess:
            while not mon_sess.should_stop():
                mon_sess.run(train_op)
        end = time.time()
        print(end - start)