コード例 #1
0
def load(graph, sess):
    with graph.as_default():
        raw_images_op = tf.placeholder(tf.float32, [batch_size, 256, 256])
        images = tf.expand_dims(raw_images_op, 3)
        labels = tf.placeholder(tf.float32, [batch_size, num_tags])
        # after reading raw images, first resize to fit the model, then normalize the data
        # resize
        images = tf.image.resize_images(
            images, np.array([model_img_size, model_img_size]))
        # normalize
        std_images = []
        for idx in range(batch_size):
            std_image = tf.image.per_image_standardization(
                images[idx, :, :, :])
            std_image = tf.expand_dims(std_image, 0)
            std_images.append(std_image)
        images = tf.concat(std_images, 0)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = model.inference(images,
                                 is_training=False,
                                 num_classes=num_tags)

        # Calculate predictions.
        prob_op = tf.sigmoid(logits)

        # Restore the moving average version of the learned variables for eval.
        saver = tf.train.Saver(tf.global_variables())

        print('load from pretrained model from')
        print(model_checkpoint_path)
        saver.restore(sess, model_checkpoint_path)

        return prob_op, raw_images_op
コード例 #2
0
def evaluate():
    os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpuid)
    with tf.Graph().as_default() as g:
        images, labels = data_input.inputs(data_dir=FLAGS.data_dir,
                                           batch_size=FLAGS.eval_batch_size,
                                           num_tags=num_tags)

        # Build a Graph that computes the logits predictions from the
        # inference model.
        logits = model.inference(images,
                                 is_training=False,
                                 num_classes=num_tags)

        # Calculate predictions.
        prob_op = tf.sigmoid(logits)

        # Restore the moving average version of the learned variables for eval.
        saver = tf.train.Saver(tf.global_variables())

        summary_op = tf.summary.merge_all()

        summary_writer = tf.summary.FileWriter(eval_dir, g)

        while True:
            eval_once(saver, summary_writer, prob_op, labels, summary_op)
            if FLAGS.run_once:
                break
            time.sleep(FLAGS.eval_interval_secs)
コード例 #3
0
ファイル: run.py プロジェクト: brillianti/Resnet
testrecord_images = tf.stack(testrecord_images)

#transpose to set the channel first
testrecord_images = tf.transpose(testrecord_images, perm=[0, 3, 1, 2])

global_step = tf.Variable(0, trainable=False)
boundaries = [10000, 15000, 20000, 25000]
values = [0.1, 0.05, 0.01, 0.005, 0.001]
learning_rate = tf.train.piecewise_constant(global_step, boundaries, values)
weight_decay = 2e-4
filters = 16  #the first resnet block filter number
n = 5  #the basic resnet block number, total network layers are 6n+2
ver = 2  #the resnet block version

#Get the inference logits by the model
result = resnet_model.inference(distorted_images, True, filters, n, ver)

#Calculate the cross entropy loss
cross_entropy = tf.losses.sparse_softmax_cross_entropy(labels=record_labels,
                                                       logits=result)
cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')

#Add the l2 weights to the loss
#Add weight decay to the loss.
l2_loss = weight_decay * tf.add_n(
    # loss is computed using fp32 for numerical stability.
    [tf.nn.l2_loss(tf.cast(v, tf.float32)) for v in tf.trainable_variables()])
tf.summary.scalar('l2_loss', l2_loss)
loss = cross_entropy_mean + l2_loss

#Define the optimizer
コード例 #4
0
def train():
  os.environ['CUDA_VISIBLE_DEVICES'] = str(FLAGS.gpuid)
  with tf.Graph().as_default():
    global_step = tf.Variable(0, trainable=False)

    # Get images and labels
    images, labels = data_input.distorted_inputs(data_dir=FLAGS.data_dir,
            batch_size=FLAGS.batch_size, num_tags=num_tags)

    # Build a Graph that computes the logits predictions from the
    # inference model.
    logits = model.inference(images, is_training=True, num_classes=num_tags)

    # Calculate loss.
    loss = model.loss(logits, labels)

    # Build a Graph that trains the model with one batch of examples and
    # updates the model parameters.
    train_op = model.train(loss, global_step)

    # Create a saver.
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=100)

    # Build the summary operation based on the TF collection of Summaries.
    summary_op = tf.summary.merge_all()

    # Build an initialization operation to run below.
    init = tf.global_variables_initializer()

    config = tf.ConfigProto()
    config.log_device_placement=FLAGS.log_device_placement
    config.gpu_options.allow_growth = True
    # Start running operations on the Graph.
    sess = tf.Session(config=config)
    sess.run(init)

    step_init = 0

    if FLAGS.startep > 0:
      ckpt = tf.train.get_checkpoint_state(train_dir)
      if ckpt and ckpt.model_checkpoint_path:
        # Restores from checkpoint
        print('load from pretrained model')
        saver.restore(sess, ckpt.model_checkpoint_path)
        # extract global_step from it.
        step_init = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
      else:
        print('No checkpoint file found')
        return
    else:
      print('random initialize the model')

    # Start the queue runners.
    tf.train.start_queue_runners(sess=sess)

    summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
    step_per_epoch = num_training_images / FLAGS.batch_size
    print('step per epoch: %d' % step_per_epoch)
    for step in np.arange(step_init, FLAGS.max_steps):
      start_time = time.time()
      _, loss_value = sess.run([train_op, loss])
      duration = time.time() - start_time

      assert not np.isnan(loss_value), 'Model diverged with loss = NaN'

      if step % (step_per_epoch/2) == 0:
        num_examples_per_step = FLAGS.batch_size
        examples_per_sec = num_examples_per_step / duration
        sec_per_batch = float(duration)

        format_str = ('%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
                      'sec/batch)')
        print (format_str % (datetime.now(), step, loss_value,
                             examples_per_sec, sec_per_batch))

      if step % step_per_epoch == 0:
        summary_str = sess.run(summary_op)
        summary_writer.add_summary(summary_str, step)

      # Save the model checkpoint periodically.
      if step % step_per_epoch == 0 or (step + 1) == FLAGS.max_steps:
        checkpoint_path = os.path.join(train_dir, 'model.ckpt')
        epoch_num = int(step / step_per_epoch)
        saver.save(sess, checkpoint_path, global_step=epoch_num)
コード例 #5
0
def run_training(mode, num_classes, train_file, test_file):
    '''mode: 1-LGM loss, 2-softmax loss, 3-center loss
    '''
    print('mode=%d' % mode)
    print('data_dir=%s' % FLAGS.data_dir)
    print('train file=%s' % train_file)
    with tf.Graph().as_default() as g:
        # Generate placeholders for the images and labels.
        images_placeholder, labels_placeholder, lr = placeholder_inputs(
            FLAGS.batch_size, _HEIGHT, _WIDTH, 3)

        # data
        img_train_h5 = read_h5(FLAGS.data_dir + train_file, 'data')
        label_train_h5 = read_h5(FLAGS.data_dir + train_file, 'label')
        ## Build a Graph that computes predictions from the inference model.
        # normal
        is_training = True
        if mode == 1:
            logits, likelihood_reg, means = resnet_model.inference_lgm(
                images_placeholder,
                FLAGS.resnet_size,
                is_training,
                labels=labels_placeholder,
                num_classes=num_classes)  # lgm loss
        elif mode == 2:
            logits = resnet_model.inference(images_placeholder,
                                            FLAGS.resnet_size,
                                            is_training,
                                            num_classes=num_classes)  # softmax
        elif mode == 3:
            logits, likelihood_reg, centers, centers_op = resnet_model.inference_center(
                images_placeholder,
                FLAGS.resnet_size,
                is_training,
                labels=labels_placeholder,
                loss_weight=0.0005,
                num_classes=num_classes)  # center loss
        ## Add to the Graph the Ops for loss calculation.
        cross_entropy = tf.reduce_mean(
            tf.nn.sparse_softmax_cross_entropy_with_logits(
                labels=tf.to_int64(labels_placeholder),
                logits=logits,
                name='xentropy'))
        loss = cross_entropy + _WEIGHT_DECAY * tf.add_n(
            [tf.nn.l2_loss(v) for v in tf.trainable_variables()])
        tf.summary.scalar('cross-entropy', cross_entropy)
        if mode != 2:
            tf.summary.scalar('likelihood_reg', likelihood_reg)
            loss += likelihood_reg
        optimizer = tf.train.MomentumOptimizer(lr,
                                               _MOMENTUM,
                                               use_nesterov=True)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        # Batch norm requires update ops to be added as a dependency to the train_op
        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        depend_ops = update_ops
        if mode == 3:
            depend_ops += [centers_op]
        with tf.control_dependencies(depend_ops):
            train_op = optimizer.minimize(loss, global_step)

        # evaluation
        is_training = True  # The tf.layers.batch_normalization works properly only when training=True. Reasons unknown.
        if mode == 1:
            logits_eval, _, _ = resnet_model.inference_lgm(
                images_placeholder,
                FLAGS.resnet_size,
                is_training,
                reuse=True,
                num_classes=num_classes)  # lgm
        elif mode == 2:
            logits_eval = resnet_model.inference(
                images_placeholder,
                FLAGS.resnet_size,
                is_training,
                reuse=True,
                num_classes=num_classes)  # softmax
        elif mode == 3:
            logits_eval, _, _ = resnet_model.inference_center(
                images_placeholder,
                FLAGS.resnet_size,
                is_training,
                reuse=True,
                num_classes=num_classes)  # center loss
        correct = tf.nn.in_top_k(logits_eval, labels_placeholder, 1)
        eval_correct = tf.reduce_sum(tf.cast(correct, tf.int32))

        init = tf.global_variables_initializer()
        saver = tf.train.Saver(max_to_keep=None)

        # Create a session for running Ops on the Graph.
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)

        # Instantiate a SummaryWriter to output summaries and the Graph.
        summary_writer = tf.summary.FileWriter(FLAGS.log_dir, sess.graph)
        sum_op = tf.summary.merge_all()
        sess.run(init)

        # load pre-trained
        if False:
            save_path = 'model.ckpt-10000'
            saver.restore(sess, save_path)
            print('\n[*]model loaded from %s\n' % save_path)

        # Start the training loop.
        print('use %d images to train' % _NUM_IMAGES['train'])
        print('trian epochs: %d' % FLAGS.train_epochs)
        steps_per_epoch = _NUM_IMAGES['train'] // FLAGS.batch_size
        lr_value = 0.1
        g.finalize()
        for epc in range(FLAGS.train_epochs):
            idxArr = np.random.permutation(_NUM_IMAGES['train'])  # shuffle
            img_train = img_train_h5[idxArr]
            label_train = label_train_h5[idxArr]
            if epc in [150, 225]:
                lr_value *= 0.1
                print('lr changed to %f' % lr_value)
            for step in range(steps_per_epoch):
                start_time = time.time()

                # Fill a feed dictionary with the actual set of images and labels
                # for this particular training step.
                feed_dict = fill_feed_dict(img_train,
                                           label_train,
                                           step,
                                           FLAGS.batch_size,
                                           True,
                                           images_placeholder,
                                           labels_placeholder,
                                           MAX=_NUM_IMAGES['train'])
                feed_dict[lr] = lr_value
                _, crosse_entropy_, sum_str, gs = sess.run(
                    [train_op, cross_entropy, sum_op, global_step],
                    feed_dict=feed_dict)

                summary_writer.add_summary(sum_str, gs)
                summary_writer.flush()
                duration = time.time() - start_time
                # Write the summaries and print an overview fairly often.
                if gs % 100 == 0:
                    print(
                        '(Epoch %d) GlobalStep %d: loss = %.3f (%.3f sec/step)'
                        % (epc + 1, gs, crosse_entropy_, duration))
                # Save a checkpoint and evaluate the model periodically.
                if gs % 1000 == 0 or gs == 1:
                    checkpoint_file = os.path.join(FLAGS.model_dir,
                                                   'model.ckpt')
                    saver.save(sess, checkpoint_file, global_step=gs)
                    print('model saved to %s' % checkpoint_file)
                    # Evaluate against the validation set.
                    print('Validation Data Eval:')
                    do_eval(sess, eval_correct, images_placeholder,
                            labels_placeholder, FLAGS.data_dir + test_file)