Ejemplo n.º 1
0
def tower_loss(scope):
    images, labels = read_and_decode()
    if net == 'vgg_16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(images, num_classes=FLAGS.num_classes)
    elif net == 'vgg_19':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_19(images, num_classes=FLAGS.num_classes)
    elif net == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_101(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            logits, end_points = resnet_v1.resnet_v1_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    elif net == 'resnet_v2_50':
        with slim.arg_scope(resnet_v2.resnet_arg_scope()):
            logits, end_points = resnet_v2.resnet_v2_50(images, num_classes=FLAGS.num_classes)
        logits = tf.reshape(logits, [FLAGS.batch_size, FLAGS.num_classes])
    else:
        raise Exception('No network matched with net %s.' % net)
    assert logits.shape == (FLAGS.batch_size, FLAGS.num_classes)
    _ = cal_loss(logits, labels)
    losses = tf.get_collection('losses', scope)
    total_loss = tf.add_n(losses, name='total_loss')
    for l in losses + [total_loss]:
        loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
        tf.summary.scalar(loss_name, l)
    return total_loss
Ejemplo n.º 2
0
def main(args):
    start = time()
    print('Start')
    splits = get_splits(args.image_dir)
    img_size = 224

    # TF graph creation
    images_placeholder = tf.placeholder(tf.float32, [None, None, None, 3],
                                        name='image')
    proc_image_op = tf.image.resize_image_with_crop_or_pad(images_placeholder,
                                                           target_height=224,
                                                           target_width=224)
    _, end_points = vgg.vgg_16(proc_image_op,
                               is_training=False,
                               dropout_keep_prob=1.0)
    ft_name = os.path.join("vgg_16", "fc8")
    ft_output = end_points[ft_name]
    ####

    for split in splits:
        extract_features(images_placeholder=images_placeholder,
                         image_dir=os.path.join(args.image_dir, split),
                         ft_output=ft_output,
                         out_dir=args.output_dir,
                         split=split,
                         network_ckpt=args.model_ckpt)

    print('Image Features extracted.')
    print('Time taken: ', time() - start)
Ejemplo n.º 3
0
 def testEndPoints(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = 1000
     for is_training in [True, False]:
         with ops.Graph().as_default():
             inputs = random_ops.random_uniform(
                 (batch_size, height, width, 3))
             _, end_points = vgg.vgg_16(inputs,
                                        num_classes,
                                        is_training=is_training)
             expected_names = [
                 'vgg_16/conv1/conv1_1', 'vgg_16/conv1/conv1_2',
                 'vgg_16/pool1', 'vgg_16/conv2/conv2_1',
                 'vgg_16/conv2/conv2_2', 'vgg_16/pool2',
                 'vgg_16/conv3/conv3_1', 'vgg_16/conv3/conv3_2',
                 'vgg_16/conv3/conv3_3', 'vgg_16/pool3',
                 'vgg_16/conv4/conv4_1', 'vgg_16/conv4/conv4_2',
                 'vgg_16/conv4/conv4_3', 'vgg_16/pool4',
                 'vgg_16/conv5/conv5_1', 'vgg_16/conv5/conv5_2',
                 'vgg_16/conv5/conv5_3', 'vgg_16/pool5', 'vgg_16/fc6',
                 'vgg_16/fc7', 'vgg_16/fc8'
             ]
             self.assertSetEqual(set(end_points.keys()),
                                 set(expected_names))
Ejemplo n.º 4
0
 def testEndPoints(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = tf.random_uniform((batch_size, height, width, 3))
     _, end_points = vgg.vgg_16(inputs, num_classes)
     expected_names = ['vgg_16/conv1/conv1_1',
                       'vgg_16/conv1/conv1_2',
                       'vgg_16/pool1',
                       'vgg_16/conv2/conv2_1',
                       'vgg_16/conv2/conv2_2',
                       'vgg_16/pool2',
                       'vgg_16/conv3/conv3_1',
                       'vgg_16/conv3/conv3_2',
                       'vgg_16/conv3/conv3_3',
                       'vgg_16/pool3',
                       'vgg_16/conv4/conv4_1',
                       'vgg_16/conv4/conv4_2',
                       'vgg_16/conv4/conv4_3',
                       'vgg_16/pool4',
                       'vgg_16/conv5/conv5_1',
                       'vgg_16/conv5/conv5_2',
                       'vgg_16/conv5/conv5_3',
                       'vgg_16/pool5',
                       'vgg_16/fc6',
                       'vgg_16/fc7',
                       'vgg_16/fc8'
                      ]
     print(end_points.keys())
     self.assertSetEqual(set(end_points.keys()), set(expected_names))
Ejemplo n.º 5
0
def top_feature_net(input, anchors, inds_inside, num_bases):
  stride=8
    # arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    # with slim.arg_scope(arg_scope) :
  with slim.arg_scope(vgg.vgg_arg_scope()):
    # net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8)
    block5, end_points = vgg.vgg_16(input)
    block3 = end_points['conv3/conv3_3']
    # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
    tf.summary.histogram('rpn_top_block', block) 
    # tf.summary.histogram('rpn_top_block_weights', tf.get_collection('2/conv_weight')[0])
  with tf.variable_scope('top') as scope:
    #up     = upsample2d(block, factor = 2, has_bias=True, trainable=True, name='1')
    #up     = block
    up      = conv2d_bn_relu(block, num_kernels=128, kernel_size=(3,3), stride=[1,1,1,1], padding='SAME', name='2')
    scores  = conv2d(up, num_kernels=2*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='score')
    probs   = tf.nn.softmax( tf.reshape(scores,[-1,2]), name='prob')
    deltas  = conv2d(up, num_kernels=4*num_bases, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='delta')

  #<todo> flip to train and test mode nms (e.g. different nms_pre_topn values): use tf.cond
  with tf.variable_scope('top-nms') as scope:    #non-max
    batch_size, img_height, img_width, img_channel = input.get_shape().as_list()
    img_scale = 1
    # pdb.set_trace()
    rois, roi_scores = tf_rpn_nms( probs, deltas, anchors, inds_inside,
                                     stride, img_width, img_height, img_scale,
                                     nms_thresh=0.7, min_size=stride, nms_pre_topn=nms_pre_topn_, nms_post_topn=nms_post_topn_,
                                     name ='nms')

  #<todo> feature = upsample2d(block, factor = 4,  ...)
  feature = block
Ejemplo n.º 6
0
def VGG_16(input_image):
    arg_scope = vgg.vgg_arg_scope()
    with slim.arg_scope(arg_scope):
        features, _ = vgg.vgg_16(input_image)
        # feature flatten
        features = tf.reshape(features, shape=[1, -1])
        features = tf.squeeze(features)
    return features
Ejemplo n.º 7
0
 def testModelVariables(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = 1000
     with self.test_session():
         inputs = random_ops.random_uniform((batch_size, height, width, 3))
         vgg.vgg_16(inputs, num_classes)
         expected_names = [
             'vgg_16/conv1/conv1_1/weights',
             'vgg_16/conv1/conv1_1/biases',
             'vgg_16/conv1/conv1_2/weights',
             'vgg_16/conv1/conv1_2/biases',
             'vgg_16/conv2/conv2_1/weights',
             'vgg_16/conv2/conv2_1/biases',
             'vgg_16/conv2/conv2_2/weights',
             'vgg_16/conv2/conv2_2/biases',
             'vgg_16/conv3/conv3_1/weights',
             'vgg_16/conv3/conv3_1/biases',
             'vgg_16/conv3/conv3_2/weights',
             'vgg_16/conv3/conv3_2/biases',
             'vgg_16/conv3/conv3_3/weights',
             'vgg_16/conv3/conv3_3/biases',
             'vgg_16/conv4/conv4_1/weights',
             'vgg_16/conv4/conv4_1/biases',
             'vgg_16/conv4/conv4_2/weights',
             'vgg_16/conv4/conv4_2/biases',
             'vgg_16/conv4/conv4_3/weights',
             'vgg_16/conv4/conv4_3/biases',
             'vgg_16/conv5/conv5_1/weights',
             'vgg_16/conv5/conv5_1/biases',
             'vgg_16/conv5/conv5_2/weights',
             'vgg_16/conv5/conv5_2/biases',
             'vgg_16/conv5/conv5_3/weights',
             'vgg_16/conv5/conv5_3/biases',
             'vgg_16/fc6/weights',
             'vgg_16/fc6/biases',
             'vgg_16/fc7/weights',
             'vgg_16/fc7/biases',
             'vgg_16/fc8/weights',
             'vgg_16/fc8/biases',
         ]
         model_variables = [
             v.op.name for v in variables_lib.get_model_variables()
         ]
         self.assertSetEqual(set(model_variables), set(expected_names))
Ejemplo n.º 8
0
 def testForward(self):
     batch_size = 1
     height, width = 224, 224
     with self.test_session() as sess:
         inputs = random_ops.random_uniform((batch_size, height, width, 3))
         logits, _ = vgg.vgg_16(inputs)
         sess.run(variables.global_variables_initializer())
         output = sess.run(logits)
         self.assertTrue(output.any())
Ejemplo n.º 9
0
 def testForward(self):
   batch_size = 1
   height, width = 224, 224
   with self.test_session() as sess:
     inputs = random_ops.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(inputs)
     sess.run(variables.global_variables_initializer())
     output = sess.run(logits)
     self.assertTrue(output.any())
Ejemplo n.º 10
0
def encoder_vgg(x,
                enc_final_size,
                reuse=False,
                scope_prefix='',
                hparams=None,
                is_training=True):
    """VGG network to use as encoder without the top few layers.

  Can be pretrained.

  Args:
    x: The image to encode. In the range 0 to 1.
    enc_final_size: The desired size of the encoding.
    reuse: To reuse in variable scope or not.
    scope_prefix: The prefix before the scope name.
    hparams: The python hparams.
    is_training: boolean value indicating if training is happening.

  Returns:
    The generated image.
  """
    with tf.variable_scope(scope_prefix + 'encoder', reuse=reuse):

        # Preprocess input
        x *= 256
        x = x - COLOR_NORMALIZATION_VECTOR

        with arg_scope(vgg.vgg_arg_scope()):
            # Padding because vgg_16 accepts images of size at least VGG_IMAGE_SIZE.
            x = tf.pad(x, [[0, 0], [0, VGG_IMAGE_SIZE - IMG_WIDTH],
                           [0, VGG_IMAGE_SIZE - IMG_HEIGHT], [0, 0]])
            _, end_points = vgg.vgg_16(x,
                                       num_classes=enc_final_size,
                                       is_training=is_training)
            pool5_key = [key for key in end_points.keys() if 'pool5' in key]
            assert len(pool5_key) == 1
            enc = end_points[pool5_key[0]]
            # Undoing padding.
            enc = tf.slice(enc, [0, 0, 0, 0], [-1, 2, 2, -1])

        enc_shape = enc.get_shape().as_list()
        enc_shape[0] = -1
        enc_size = enc_shape[1] * enc_shape[2] * enc_shape[3]

        enc_flat = tf.reshape(enc, (-1, enc_size))
        enc_flat = tf.nn.dropout(enc_flat, hparams.enc_keep_prob)

        enc_flat = tf.layers.dense(
            enc_flat,
            enc_final_size,
            kernel_initializer=tf.truncated_normal_initializer(stddev=1e-4, ))

        if hparams.enc_pred_use_l2norm:
            enc_flat = tf.nn.l2_normalize(enc_flat, 1)

    return enc_flat
Ejemplo n.º 11
0
 def testBuild(self):
     batch_size = 5
     height, width = 224, 224
     num_classes = 1000
     with self.test_session():
         inputs = random_ops.random_uniform((batch_size, height, width, 3))
         logits, _ = vgg.vgg_16(inputs, num_classes)
         self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
         self.assertListEqual(logits.get_shape().as_list(),
                              [batch_size, num_classes])
Ejemplo n.º 12
0
 def testFullyConvolutional(self):
     batch_size = 1
     height, width = 256, 256
     num_classes = 1000
     with self.test_session():
         inputs = random_ops.random_uniform((batch_size, height, width, 3))
         logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
         self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
         self.assertListEqual(logits.get_shape().as_list(),
                              [batch_size, 2, 2, num_classes])
Ejemplo n.º 13
0
 def testModelVariables(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = random_ops.random_uniform((batch_size, height, width, 3))
     vgg.vgg_16(inputs, num_classes)
     expected_names = [
         'vgg_16/conv1/conv1_1/weights',
         'vgg_16/conv1/conv1_1/biases',
         'vgg_16/conv1/conv1_2/weights',
         'vgg_16/conv1/conv1_2/biases',
         'vgg_16/conv2/conv2_1/weights',
         'vgg_16/conv2/conv2_1/biases',
         'vgg_16/conv2/conv2_2/weights',
         'vgg_16/conv2/conv2_2/biases',
         'vgg_16/conv3/conv3_1/weights',
         'vgg_16/conv3/conv3_1/biases',
         'vgg_16/conv3/conv3_2/weights',
         'vgg_16/conv3/conv3_2/biases',
         'vgg_16/conv3/conv3_3/weights',
         'vgg_16/conv3/conv3_3/biases',
         'vgg_16/conv4/conv4_1/weights',
         'vgg_16/conv4/conv4_1/biases',
         'vgg_16/conv4/conv4_2/weights',
         'vgg_16/conv4/conv4_2/biases',
         'vgg_16/conv4/conv4_3/weights',
         'vgg_16/conv4/conv4_3/biases',
         'vgg_16/conv5/conv5_1/weights',
         'vgg_16/conv5/conv5_1/biases',
         'vgg_16/conv5/conv5_2/weights',
         'vgg_16/conv5/conv5_2/biases',
         'vgg_16/conv5/conv5_3/weights',
         'vgg_16/conv5/conv5_3/biases',
         'vgg_16/fc6/weights',
         'vgg_16/fc6/biases',
         'vgg_16/fc7/weights',
         'vgg_16/fc7/biases',
         'vgg_16/fc8/weights',
         'vgg_16/fc8/biases',
     ]
     model_variables = [v.op.name for v in variables_lib.get_model_variables()]
     self.assertSetEqual(set(model_variables), set(expected_names))
Ejemplo n.º 14
0
 def testFullyConvolutional(self):
   batch_size = 1
   height, width = 256, 256
   num_classes = 1000
   with self.test_session():
     inputs = random_ops.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(inputs, num_classes, spatial_squeeze=False)
     self.assertEquals(logits.op.name, 'vgg_16/fc8/BiasAdd')
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, 2, 2, num_classes])
Ejemplo n.º 15
0
 def testBuild(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     inputs = random_ops.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(inputs, num_classes)
     self.assertEquals(logits.op.name, 'vgg_16/fc8/squeezed')
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, num_classes])
Ejemplo n.º 16
0
 def testEvaluation(self):
   batch_size = 2
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     eval_inputs = random_ops.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, num_classes])
     predictions = math_ops.argmax(logits, 1)
     self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
Ejemplo n.º 17
0
 def testEvaluation(self):
   batch_size = 2
   height, width = 224, 224
   num_classes = 1000
   with self.test_session():
     eval_inputs = tf.random_uniform((batch_size, height, width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [batch_size, num_classes])
     predictions = tf.argmax(logits, 1)
     self.assertListEqual(predictions.get_shape().as_list(), [batch_size])
Ejemplo n.º 18
0
def encoder_vgg(x, enc_final_size, reuse=False, scope_prefix='', hparams=None,
                is_training=True):
  """VGG network to use as encoder without the top few layers.

  Can be pretrained.

  Args:
    x: The image to encode. In the range 0 to 1.
    enc_final_size: The desired size of the encoding.
    reuse: To reuse in variable scope or not.
    scope_prefix: The prefix before the scope name.
    hparams: The python hparams.
    is_training: boolean value indicating if training is happening.

  Returns:
    The generated image.
  """
  with tf.variable_scope(scope_prefix + 'encoder', reuse=reuse):

    # Preprocess input
    x *= 256
    x = x - COLOR_NORMALIZATION_VECTOR

    with arg_scope(vgg.vgg_arg_scope()):
      # Padding because vgg_16 accepts images of size at least VGG_IMAGE_SIZE.
      x = tf.pad(x, [[0, 0], [0, VGG_IMAGE_SIZE - IMG_WIDTH],
                     [0, VGG_IMAGE_SIZE - IMG_HEIGHT], [0, 0]])
      _, end_points = vgg.vgg_16(
          x,
          num_classes=enc_final_size,
          is_training=is_training)
      pool5_key = [key for key in end_points.keys() if 'pool5' in key]
      assert len(pool5_key) == 1
      enc = end_points[pool5_key[0]]
      # Undoing padding.
      enc = tf.slice(enc, [0, 0, 0, 0], [-1, 2, 2, -1])

    enc_shape = enc.get_shape().as_list()
    enc_shape[0] = -1
    enc_size = enc_shape[1] * enc_shape[2] * enc_shape[3]

    enc_flat = tf.reshape(enc, (-1, enc_size))
    enc_flat = tf.nn.dropout(enc_flat, hparams.enc_keep_prob)

    enc_flat = tf.layers.dense(
        enc_flat,
        enc_final_size,
        kernel_initializer=tf.truncated_normal_initializer(stddev=1e-4,))

    if hparams.enc_pred_use_l2norm:
      enc_flat = tf.nn.l2_normalize(enc_flat, 1)

  return enc_flat
Ejemplo n.º 19
0
 def testTrainEvalWithReuse(self):
   train_batch_size = 2
   eval_batch_size = 1
   train_height, train_width = 224, 224
   eval_height, eval_width = 256, 256
   num_classes = 1000
   with self.test_session():
     train_inputs = tf.random_uniform(
         (train_batch_size, train_height, train_width, 3))
     logits, _ = vgg.vgg_16(train_inputs)
     self.assertListEqual(logits.get_shape().as_list(),
                          [train_batch_size, num_classes])
     tf.get_variable_scope().reuse_variables()
     eval_inputs = tf.random_uniform(
         (eval_batch_size, eval_height, eval_width, 3))
     logits, _ = vgg.vgg_16(eval_inputs, is_training=False,
                            spatial_squeeze=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [eval_batch_size, 2, 2, num_classes])
     logits = tf.reduce_mean(logits, [1, 2])
     predictions = tf.argmax(logits, 1)
     self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
Ejemplo n.º 20
0
    def get_logits_prob(self, batch_input):
        """
        Prediction from the model on a single batch.
        :param batch_input: the input batch. Must be from size [?, 224, 224, 3]
        :return: the logits and probabilities for the batch
        """

        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, _ = vgg.vgg_16(batch_input,
                                   num_classes=1000,
                                   is_training=False)
            probs = tf.squeeze(tf.nn.softmax(logits))[1:]
        return logits, probs
Ejemplo n.º 21
0
 def testTrainEvalWithReuse(self):
   train_batch_size = 2
   eval_batch_size = 1
   train_height, train_width = 224, 224
   eval_height, eval_width = 256, 256
   num_classes = 1000
   with self.test_session():
     train_inputs = random_ops.random_uniform(
         (train_batch_size, train_height, train_width, 3))
     logits, _ = vgg.vgg_16(train_inputs)
     self.assertListEqual(logits.get_shape().as_list(),
                          [train_batch_size, num_classes])
     variable_scope.get_variable_scope().reuse_variables()
     eval_inputs = random_ops.random_uniform(
         (eval_batch_size, eval_height, eval_width, 3))
     logits, _ = vgg.vgg_16(
         eval_inputs, is_training=False, spatial_squeeze=False)
     self.assertListEqual(logits.get_shape().as_list(),
                          [eval_batch_size, 2, 2, num_classes])
     logits = math_ops.reduce_mean(logits, [1, 2])
     predictions = math_ops.argmax(logits, 1)
     self.assertEquals(predictions.get_shape().as_list(), [eval_batch_size])
Ejemplo n.º 22
0
def rgb_feature_net(input):
    # with tf.variable_scope("rgb_base"):
    # arg_scope = resnet_v1.resnet_arg_scope(weight_decay=0.0)
    # with slim.arg_scope(arg_scope):
    with slim.arg_scope(vgg.vgg_arg_scope()):
        # net, end_points = resnet_v1.resnet_v1_50(input, None, global_pool=False, output_stride=8)
        # block=end_points['resnet_v1_50/block4']
        # block   = conv2d_bn_relu(block, num_kernels=512, kernel_size=(1,1), stride=[1,1,1,1], padding='SAME', name='2')
        block, _ = vgg.vgg_16(input)
        #<todo> feature = upsample2d(block, factor = 4,  ...)
        tf.summary.histogram('rgb_top_block', block)
    feature = block
    return feature
Ejemplo n.º 23
0
    def build_model(self, is_training=True, dropout_keep_prob=0.5):
        self.inputs = tf.placeholder(real_type(self.FLAGS),
                                     [self.FLAGS.batch_size, 224, 224, 3])
        self.targets = tf.placeholder(tf.int32, [self.FLAGS.batch_size])

        with slim.arg_scope(vgg_arg_scope()):
            logits, endpoints = vgg.vgg_16(self.inputs,
                                           num_classes=self.FLAGS.num_classes)

        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            logits=logits, labels=self.targets)
        self.cost = tf.reduce_sum(loss)
        self.global_step = tf.contrib.framework.get_or_create_global_step()
        self.train_op = tf.train.AdagradOptimizer(0.01).minimize(
            self.cost, global_step=self.global_step)
Ejemplo n.º 24
0
    def __init__(self):
        self.holder = tf.placeholder(
            tf.float32, [None, IMAGE_SIZE, IMAGE_SIZE,
                         len(CHANNEL_MEAN)],
            name='image')
        _, self.end_points = vgg.vgg_16(self.holder,
                                        is_training=False,
                                        dropout_keep_prob=1.0)

        tf_config = tf.ConfigProto(log_device_placement=False)
        tf_config.gpu_options.allow_growth = True

        self.sess = tf.Session(config=tf_config)
        rospy.on_shutdown(self.sess.close)

        saver = tf.train.Saver()
        saver.restore(self.sess, VGG16_NTW_PATH)
Ejemplo n.º 25
0
    def __init__(self,
                 tensor,
                 keep_prob=1.0,
                 num_classes=1000,
                 retrain_layer=[],
                 weights_path='./weights/vgg_16.ckpt'):
        # Call the parent class
        Model.__init__(self, tensor, keep_prob, num_classes, retrain_layer,
                       weights_path)

        # TODO This implementation has a problem while validation (is still set to training)
        is_training = True if retrain_layer else False
        with slim.arg_scope(vgg_arg_scope()):
            self.final, self.endpoints = vgg_16(self.tensor,
                                                num_classes=self.num_classes,
                                                is_training=is_training,
                                                dropout_keep_prob=keep_prob)
    def build(self):
        # Input
        self.input = tf.placeholder(
            dtype=tf.float32,
            shape=[None, self.img_size[0], self.img_size[1], self.img_size[2]])
        self.input_mean = tfutils.mean_value(self.input, self.img_mean)
        if self.base_net == 'vgg16':
            with slim.arg_scope(vgg.vgg_arg_scope()):
                outputs, end_points = vgg.vgg_16(self.input_mean,
                                                 self.num_classes)
                self.prob = tf.nn.softmax(outputs, -1)
                self.logits = outputs

        elif self.base_net == 'res50':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_50(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res101':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_101(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        elif self.base_net == 'res152':
            with slim.arg_scope(resnet_v1.resnet_arg_scope()):
                net, end_points = resnet_v1.resnet_v1_152(
                    self.input_mean,
                    self.num_classes,
                    is_training=self.is_train)
                self.prob = tf.nn.softmax(net[:, 0, 0, :], -1)
                self.logits = net[:, 0, 0, :]
        else:
            raise ValueError(
                'base network should be vgg16, res50, -101, -152...')
        self.gt = tf.placeholder(dtype=tf.int32, shape=[None])
        # self.var_list = tf.trainable_variables()

        if self.is_train:
            self.loss()
Ejemplo n.º 27
0
def vgg_encode(inputs,
               trainable=False,
               is_training=False,
               dropout_keep_prob=0.8,
               add_summaries=True):

    fine_tune = is_training & trainable
    net, end_points = vgg_16(inputs,
                             is_training=fine_tune,
                             dropout_keep_prob=dropout_keep_prob,
                             spatial_squeeze=True,
                             scope='vgg_16')
    # Add summaries
    if add_summaries:
        for v in end_points.values():
            tf.contrib.layers.summaries.summarize_activation(v)

    return net, end_points
Ejemplo n.º 28
0
 def testEndPoints(self):
   batch_size = 5
   height, width = 224, 224
   num_classes = 1000
   for is_training in [True, False]:
     with ops.Graph().as_default():
       inputs = random_ops.random_uniform((batch_size, height, width, 3))
       _, end_points = vgg.vgg_16(inputs, num_classes, is_training=is_training)
       expected_names = [
           'vgg_16/conv1/conv1_1', 'vgg_16/conv1/conv1_2', 'vgg_16/pool1',
           'vgg_16/conv2/conv2_1', 'vgg_16/conv2/conv2_2', 'vgg_16/pool2',
           'vgg_16/conv3/conv3_1', 'vgg_16/conv3/conv3_2',
           'vgg_16/conv3/conv3_3', 'vgg_16/pool3', 'vgg_16/conv4/conv4_1',
           'vgg_16/conv4/conv4_2', 'vgg_16/conv4/conv4_3', 'vgg_16/pool4',
           'vgg_16/conv5/conv5_1', 'vgg_16/conv5/conv5_2',
           'vgg_16/conv5/conv5_3', 'vgg_16/pool5', 'vgg_16/fc6', 'vgg_16/fc7',
           'vgg_16/fc8'
       ]
       self.assertSetEqual(set(end_points.keys()), set(expected_names))
    def build_model(self):
        is_train = self.FLAGS.is_train
        dropout_keep_prob = 1.0
        if is_train:
            dropout_keep_prob = 0.5

        images_placeholder = tf.image.resize_images(self.input_placeholder, (224, 224))

        with slim.arg_scope(vgg.vgg_arg_scope()):
            logits, end_points = vgg.vgg_16(images_placeholder, is_training=is_train, dropout_keep_prob=dropout_keep_prob)

        image_features = end_points['vgg_16/fc8']

        scene_logits = slim.fully_connected(image_features, 100, activation_fn=None, scope='scene_pred', trainable=True)
        multi_hot_logits = slim.fully_connected(image_features, 175, activation_fn=None, scope='multi_hot_logits', trainable=True)
        word_embedding_logits = slim.fully_connected(image_features, 300, activation_fn=None, scope='word_embedding_pred', trainable=True)

        obj_embedding_size = 40
        object_embedding_logits = slim.fully_connected(image_features, obj_embedding_size, activation_fn=None, scope='object_embedding_pred', trainable=True)

        outputs = [scene_logits, multi_hot_logits, word_embedding_logits, object_embedding_logits]

        return outputs
Ejemplo n.º 30
0
def get_featuremap(net_name, input, num_classes=None):
    '''
    #tensorlayer
    input = tl.layers.InputLayer(input)
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_50,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_50'
                                                 )
            sv = tf.train.Supervisor()
            with sv.managed_session() as sess:
                a = sess.run(featuremap.all_layers)
                print(a)
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'resnet_v1_101':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_101,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_101'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'resnet_v1_152':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=resnet_v1.resnet_v1_152,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'global_pool': False
                                                 },
                                                 name='resnet_v1_152'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    if net_name == 'vgg16':
        with slim.arg_scope(vgg.vgg_arg_scope()):
            featuremap = tl.layers.SlimNetsLayer(prev_layer=input,
                                                 slim_layer=vgg.vgg_16,
                                                 slim_args={
                                                     'num_classes': num_classes,
                                                     'is_training': True,
                                                     'spatial_squeeze': False
                                                 },
                                                 name='vgg_16'
                                                 )
            feature_w_loss = tf.reduce_sum(slim.losses.get_regularization_losses())
            return featuremap.outputs, feature_w_loss, featuremap.all_params
    '''

    #slim
    if net_name == 'resnet_v1_50':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = resnet_v1.resnet_v1_50(
                inputs=input,
                num_classes=num_classes,
                is_training=False,
                global_pool=False)
        if cfg.USE_FPN:
            feature_maps_dict = {
                'C2': layer_dic[
                    'resnet_v1_50/block1/unit_2/bottleneck_v1'],  # [56, 56]
                'C3': layer_dic[
                    'resnet_v1_50/block2/unit_3/bottleneck_v1'],  # [28, 28]
                'C4': layer_dic[
                    'resnet_v1_50/block3/unit_5/bottleneck_v1'],  # [14, 14]
                'C5': layer_dic['resnet_v1_50/block4']  # [7, 7]
            }
            return feature_maps_dict
        return layer_dic['resnet_v1_50/block3/unit_5/bottleneck_v1']
        #return featuremap

    if net_name == 'resnet_v1_101':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = resnet_v1.resnet_v1_101(
                inputs=input,
                num_classes=num_classes,
                is_training=True,
                global_pool=False)
        if cfg.USE_FPN:
            feature_maps_dict = {
                'C2': layer_dic[
                    'resnet_v1_101/block1/unit_2/bottleneck_v1'],  # [56, 56]
                'C3': layer_dic[
                    'resnet_v1_101/block2/unit_3/bottleneck_v1'],  # [28, 28]
                'C4': layer_dic[
                    'resnet_v1_101/block3/unit_22/bottleneck_v1'],  # [14, 14]
                'C5': layer_dic['resnet_v1_101/block4']
            }
            return feature_maps_dict
        return featuremap

    if net_name == 'vgg_16':
        with slim.arg_scope(
                resnet_v1.resnet_arg_scope(
                    weight_decay=cfg.FEATURE_WEIGHT_DECAY)):
            featuremap, layer_dic = vgg.vgg_16(
                inputs=input,
                num_classes=7,
                is_training=False,
                spatial_squeeze=False,
            )

        return layer_dic['vgg_16/conv5/conv5_3']
def network_vgg_16():
    input_shape = [1, 224, 224, 3]
    input_ = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape)
    net, _end_points = vgg_16(input_, num_classes=1000, is_training=False)
    return net
Ejemplo n.º 32
0
if args.network == "resnet":

    # create network
    with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)):
        _, end_points = resnet_v1.resnet_v1_152(images, 1000)  # 1000 is the number of softmax class

    # define the feature name according slim standard
    feature_name = os.path.join("resnet_v1_152", args.feature_name)

    # create the output directory
    out_dir = os.path.join(args.data_dir, out_file)
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)

elif args.network == "vgg":
    _, end_points = vgg.vgg_16(images)
    out_dir = os.path.join(args.data_dir, out_file + ".pkl")
    feature_name = os.path.join("vgg_16", args.feature_name)
else:
    assert False, "Incorrect Network"

# check that the feature name is correct
assert feature_name in end_points, \
    "Invalid Feature name ({}), Must be on of the following {}"\
        .format({feature_name}, end_points.keys())




# CPU/GPU option
cpu_pool = Pool(args.no_thread, maxtasksperchild=1000)
Ejemplo n.º 33
0
def run_training():
    config = tf.ConfigProto(allow_soft_placement=True)
    sess = tf.Session(config=config)
    #     sess = tf.Session() # config=tf.ConfigProto(log_device_placement=True))

    # create input path and labels np.array from csv annotations
    df_annos = pd.read_csv(ANNOS_CSV, index_col=0)
    df_annos = df_annos.sample(frac=1).reset_index(
        drop=True)  # shuffle the whole datasets
    if DATA == 'l8':
        path_col = ['l8_vis_jpg']
    elif DATA == 's1':
        path_col = ['s1_vis_jpg']
    elif DATA == 'l8s1':
        path_col = ['l8_vis_jpg', 's1_vis_jpg']

    input_files_train = JPG_DIR + df_annos.loc[df_annos.partition == 'train',
                                               path_col].values
    input_labels_train = df_annos.loc[df_annos.partition == 'train',
                                      'pop_density_log2'].values
    input_files_val = JPG_DIR + df_annos.loc[df_annos.partition == 'val',
                                             path_col].values
    input_labels_val = df_annos.loc[df_annos.partition == 'val',
                                    'pop_density_log2'].values
    input_id_train = df_annos.loc[df_annos.partition == 'train',
                                  'village_id'].values
    input_id_val = df_annos.loc[df_annos.partition == 'val',
                                'village_id'].values

    print('input_files_train shape:', input_files_train.shape)
    train_set_size = len(input_labels_train)

    # data input
    with tf.device('/cpu:0'):
        train_images_batch, train_labels_batch, _ = \
        dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_train, input_labels_train, input_id_train,
                              IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=True, normalization=True)
        val_images_batch, val_labels_batch, _ = \
        dataset.input_batches(FLAGS.batch_size, FLAGS.output_size, input_files_val, input_labels_val, input_id_val,
                              IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL, regression=True, augmentation=False, normalization=True)

    images_placeholder = tf.placeholder(
        tf.float32, shape=[None, IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNEL])
    labels_placeholder = tf.placeholder(tf.float32, shape=[
        None,
    ])
    print('finish data input')

    TRAIN_BATCHES_PER_EPOCH = int(
        train_set_size /
        FLAGS.batch_size)  # number of training batches/steps in each epoch
    MAX_STEPS = TRAIN_BATCHES_PER_EPOCH * FLAGS.max_epoch  # total number of training batches/steps

    # CNN forward reference
    if MODEL == 'vgg':
        with slim.arg_scope(
                vgg.vgg_arg_scope(weight_decay=FLAGS.weight_decay)):
            outputs, _ = vgg.vgg_16(images_placeholder,
                                    num_classes=FLAGS.output_size,
                                    dropout_keep_prob=FLAGS.dropout_keep,
                                    is_training=True)
            outputs = tf.squeeze(
                outputs
            )  # change shape from (B,1) to (B,), same as label input
    if MODEL == 'resnet':
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            outputs, _ = resnet_v1.resnet_v1_152(images_placeholder,
                                                 num_classes=FLAGS.output_size,
                                                 is_training=True)
            outputs = tf.squeeze(
                outputs
            )  # change shape from (B,1) to (B,), same as label input

    # loss
    labels_real = tf.pow(2.0, labels_placeholder)
    outputs_real = tf.pow(2.0, outputs)

    # only loss_log2_mse are used for gradient calculate, model minimize this value
    loss_log2_mse = tf.reduce_mean(tf.squared_difference(
        labels_placeholder, outputs),
                                   name='loss_log2_mse')
    loss_real_rmse = tf.sqrt(tf.reduce_mean(
        tf.squared_difference(labels_real, outputs_real)),
                             name='loss_real_rmse')
    loss_real_mae = tf.losses.absolute_difference(labels_real, outputs_real)

    tf.summary.scalar('loss_log2_mse', loss_log2_mse)
    tf.summary.scalar('loss_real_rmse', loss_real_rmse)
    tf.summary.scalar('loss_real_mae', loss_real_mae)

    # accuracy (R2)
    def r_sqaured(labels, outputs):
        sst = tf.reduce_sum(
            tf.squared_difference(labels, tf.reduce_mean(labels)))
        sse = tf.reduce_sum(tf.squared_difference(labels, outputs))
        return (1.0 - tf.div(sse, sst))

    r2_log2 = r_sqaured(labels_placeholder, outputs)
    r2_real = r_sqaured(labels_real, outputs_real)

    tf.summary.scalar('r2_log2', r2_log2)
    tf.summary.scalar('r2_real', r2_real)

    # determine the model vairables to restore from pre-trained checkpoint
    if MODEL == 'vgg':
        if DATA == 'l8s1':
            model_variables = slim.get_variables_to_restore(
                exclude=['vgg_16/fc8', 'vgg_16/conv1'])
        else:
            model_variables = slim.get_variables_to_restore(
                exclude=['vgg_16/fc8'])
    if MODEL == 'resnet':
        model_variables = slim.get_variables_to_restore(
            exclude=['resnet_v1_152/logits', 'resnet_v1_152/conv1'])

    # training step and learning rate
    global_step = tf.Variable(0, name='global_step',
                              trainable=False)  #, dtype=tf.int64)
    learning_rate = tf.train.exponential_decay(
        FLAGS.learning_rate,  # initial learning rate
        global_step=global_step,  # current step
        decay_steps=MAX_STEPS,  # total numbers step to decay 
        decay_rate=FLAGS.lr_decay_rate
    )  # final learning rate = FLAGS.learning_rate * decay_rate
    tf.summary.scalar('learning_rate', learning_rate)

    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    #     optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate)
    #     optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate)

    # to only update gradient in first and last layer
    #     vars_update = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, 'vgg_16/(conv1|fc8)')
    #     print('variables to update in traing: ', vars_update)

    train_op = optimizer.minimize(
        loss_log2_mse, global_step=global_step)  #, var_list = vars_update)

    # summary output in tensorboard
    summary = tf.summary.merge_all()
    summary_writer_train = tf.summary.FileWriter(
        os.path.join(LOG_DIR, 'log_train'), sess.graph)
    summary_writer_val = tf.summary.FileWriter(
        os.path.join(LOG_DIR, 'log_val'), sess.graph)

    # variable initialize
    init = tf.global_variables_initializer()
    sess.run(init)

    # restore the model from pre-trained checkpoint
    restorer = tf.train.Saver(model_variables)
    restorer.restore(sess, PRETRAIN_WEIGHTS)
    print('loaded pre-trained weights: ', PRETRAIN_WEIGHTS)

    # saver object to save checkpoint during training
    saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)

    print('start training...')
    epoch = 0
    best_r2 = -float('inf')
    for step in xrange(MAX_STEPS):
        if step % TRAIN_BATCHES_PER_EPOCH == 0:
            epoch += 1

        start_time = time.time()  # record the time used for each batch

        images_out, labels_out = sess.run(
            [train_images_batch,
             train_labels_batch])  # inputs of this batch, numpy array format

        duration_batch = time.time() - start_time

        if step == 0:
            print("finished reading batch data")
            print("images_out shape:", images_out.shape)
        feed_dict = {
            images_placeholder: images_out,
            labels_placeholder: labels_out
        }
        _, train_loss, train_accuracy, train_outputs, lr = \
            sess.run([train_op, loss_log2_mse, r2_log2, outputs, learning_rate], feed_dict=feed_dict)

        duration = time.time() - start_time

        if step % 10 == 0 or (
                step + 1) == MAX_STEPS:  # print traing loss every 10 batches
            print('Step %d epoch %d lr %.3e: log2 MSE loss = %.4f log2 R2 = %.4f (%.3f sec, %.3f sec(each batch))' \
                  % (step, epoch, lr, train_loss, train_accuracy, duration*10, duration_batch))
            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer_train.add_summary(summary_str, step)
            summary_writer_train.flush()

        if step % 50 == 0 or (
                step + 1
        ) == MAX_STEPS:  # calculate and print validation loss every 50 batches
            images_out, labels_out = sess.run(
                [val_images_batch, val_labels_batch])
            feed_dict = {
                images_placeholder: images_out,
                labels_placeholder: labels_out
            }

            val_loss, val_accuracy = sess.run([loss_log2_mse, r2_log2],
                                              feed_dict=feed_dict)
            print('Step %d epoch %d: val log2 MSE = %.4f val log2 R2 = %.4f ' %
                  (step, epoch, val_loss, val_accuracy))

            summary_str = sess.run(summary, feed_dict=feed_dict)
            summary_writer_val.add_summary(summary_str, step)
            summary_writer_val.flush()

            # in each epoch, if the validation R2 is higher than best R2, save the checkpoint
            if step % (TRAIN_BATCHES_PER_EPOCH -
                       TRAIN_BATCHES_PER_EPOCH % 50) == 0:
                if val_accuracy > best_r2:
                    best_r2 = val_accuracy
                    checkpoint_file = os.path.join(LOG_DIR, 'model.ckpt')
                    saver.save(sess,
                               checkpoint_file,
                               global_step=step,
                               write_state=True)
Ejemplo n.º 34
0
def main(argv=None):
    # 加载处理好的数据
    processed_data = np.load(INPUT_DATA)
    training_images = processed_data[0]
    n_training_examples = len(training_images)
    training_labels = processed_data[1]
    validation_images = processed_data[2]
    validation_labels = processed_data[3]
    testing_images = processed_data[4]
    testing_labels = processed_data[5]
    print('%d training, %d validation, %d testing' %
          (n_training_examples, len(validation_labels), len(testing_labels)))

    # 定义vgg16的输入
    images = tf.placeholder(tf.float32, [None, 224, 224, 3],
                            name='input_image')
    labels = tf.placeholder(tf.int64, [None], name='labels')

    # 定义vgg16模型
    with slim.arg_scope(vgg.vgg_arg_scope()):
        logits, _ = vgg.vgg_16(images, num_classes=N_CLASSES)

    # 损失函数
    loss_fun = tf.losses.softmax_cross_entropy(tf.one_hot(labels, N_CLASSES),
                                               logits)
    # 训练
    # train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(tf.losses.get_total_loss())
    # 只训练最后一层
    train_step = tf.train.RMSPropOptimizer(LEARNING_RATE).minimize(
        tf.losses.get_total_loss(), var_list=get_trainable_variables())

    # 正确率
    with tf.variable_scope('evaluation'):
        correct_prediction = tf.equal(tf.argmax(logits, 1), labels)
        evaluation_step = tf.reduce_mean(
            tf.cast(correct_prediction, tf.float32))

    ckpt = tf.train.get_checkpoint_state(SAVE_PATH)
    if ckpt and ckpt.model_checkpoint_path:
        # 加载之前训练的参数继续训练
        variables_to_restore = slim.get_model_variables()
        print('continue training from %s' % ckpt)
        step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
        step = int(step)
        ckpt = ckpt.model_checkpoint_path
    else:
        # 没有训练数据,就先迁移一部分训练好的
        ckpt = TRAINED_CKPT_FILE
        variables_to_restore = get_tuned_variable()
        print('loading tuned variables from %s' % TRAINED_CKPT_FILE)
        step = 0

    load_fn = slim.assign_from_checkpoint_fn(ckpt,
                                             variables_to_restore,
                                             ignore_missing_vars=True)

    # 开启会话训练
    saver = tf.train.Saver()
    with tf.Session() as sess:
        # 初始化所有参数
        init = tf.global_variables_initializer()
        sess.run(init)
        load_fn(sess)

        start = 0
        end = BATCH
        for i in range(step + 1, step + 1 + STEPS):
            start_time = time.time()
            # 运行训练,不会更新所有参数
            _, loss_val = sess.run(
                [train_step, loss_fun],
                feed_dict={
                    images: training_images[start:end],
                    labels: training_labels[start:end]
                })
            duration = time.time() - start_time

            #print('current train step duration %.3f' % duration)

            if i % 10 == 0:
                print('after %d train step, loss value is: %.4f' %
                      (i, loss_val))

            # 输出日志
            if i % 100 == 0:
                saver.save(sess, TRAIN_FILE, global_step=i)
                validation_accuracy = sess.run(evaluation_step,
                                               feed_dict={
                                                   images: validation_images,
                                                   labels: validation_labels
                                               })
                print('Step %d Validation accuracy = %.1f%%' %
                      (i, validation_accuracy * 100.0))

            start = end
            if start == n_training_examples:
                start = 0

            end = start + BATCH
            if end > n_training_examples:
                end = n_training_examples

        # 在测试集上测试正确率
        test_accuracy = sess.run(evaluation_step,
                                 feed_dict={
                                     images: testing_images,
                                     labels: testing_labels
                                 })
        print('Final test accuracy = %.1f%%' % (test_accuracy * 100.0))
Ejemplo n.º 35
0
def network():
    input = tf.placeholder(dtype=tf.float32, name='input', shape=input_shape)
    net, end_points = vgg_16(input, spatial_squeeze = False, num_classes=1000, is_training=False)
    return net
Ejemplo n.º 36
0
    mode=args.mode, network=args.network, feature_name=args.feature_name, size=args.img_size)


print("Create networks...")
if args.network == "resnet":
    ft_output = resnet.create_resnet(images,
                                     resnet_out=args.feature_name,
                                     resnet_version=args.resnet_version,
                                     is_training=False)
    # create network
    with slim.arg_scope(slim_utils.resnet_arg_scope(is_training=False)):
        _, end_points = resnet_v1.resnet_v1_152(images, 1000)  # 1000 is the number of softmax class


elif args.network == "vgg":
    _, end_points = vgg.vgg_16(images, is_training=False, dropout_keep_prob=1.0)
    ft_name = os.path.join("vgg_16", args.feature_name)
    ft_output = end_points[ft_name]
else:
    assert False, "Incorrect Network"


extract_features(
    img_input = images,
    ft_output = ft_output,
    dataset_cstor = dataset_cstor,
    dataset_args = {"folder": args.data_dir, "image_builder":image_builder, "crop_builder":crop_builder, "dataset_name": args.dataset_name},
    batchifier_cstor = OracleBatchifier,
    out_dir = args.out_dir,
    set_type = args.set_type,
    network_ckpt=args.ckpt,
Ejemplo n.º 37
0
def get_model(ref_dict, num_point, is_training, bn=False, bn_decay=None, img_size = (137,137), wd=1e-5, FLAGS=None):

    ref_img = ref_dict['imgs']
    ref_pc = ref_dict['pc']
    ref_sample_pc = ref_dict['sample_pc']
    ref_sample_pc_rot = ref_dict['sample_pc_rot']
    trans_mat = ref_dict['trans_mat']
    K = ref_dict['K']
    RT = ref_dict['RT']
    gt_xyshift = ref_dict['shifts']

    batch_size = ref_img.get_shape()[0].value

    # endpoints
    end_points = {}
    end_points['ref_pc'] = ref_pc
    end_points['RT'] = RT
    end_points['K'] = K
    end_points['gt_xyshift'] = gt_xyshift
    end_points['trans_mat'] = trans_mat
    end_points['sample_pc'] = ref_sample_pc #* 10

    # Image extract features
    if ref_img.shape[1] != 224 or ref_img.shape[2] != 224:
        ref_img = tf.image.resize_bilinear(ref_img, [224, 224])
    end_points['ref_img'] = ref_img

    # vgg.vgg_16.default_image_size = (224, 224)
    with slim.arg_scope([slim.conv2d],
                         weights_regularizer=slim.l2_regularizer(wd)):
        ref_feats_embedding, vgg_end_points = vgg.vgg_16(ref_img, num_classes=1024, is_training=False, scope='vgg_16', spatial_squeeze=False)
        ref_feats_embedding_cnn = tf.squeeze(ref_feats_embedding, axis = [1,2]) 
    end_points['embedding'] = ref_feats_embedding_cnn
    print(vgg_end_points.keys())

    with tf.variable_scope("cameraprediction") as scope:
        if FLAGS.shift:
            pred_rotation, pred_translation, pred_RT, pred_xyshift = posenet.get_cam_mat_shft(ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd)
            end_points['pred_rotation'] = pred_rotation
            end_points['pred_translation'] = pred_translation
            end_points['pred_RT'] = pred_RT
            end_points['pred_xyshift'] = pred_xyshift
        else:
            pred_rotation, pred_translation, pred_RT = posenet.get_cam_mat(ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay, wd)
            end_points['pred_rotation'] = pred_rotation
            end_points['pred_translation'] = pred_translation
            end_points['pred_RT'] = pred_RT
            end_points['pred_xyshift'] = None
            pred_xyshift = None

    print('trans_mat', trans_mat.shape)
    sample_img_points, gt_xy = get_img_points(ref_sample_pc, trans_mat, gt_xyshift, FLAGS)
    end_points['sample_img_points'] = sample_img_points
    end_points['gt_xy'] = gt_xy

    K_transpose = tf.transpose(K, perm=[0, 2, 1])
    pred_trans_mat = tf.matmul(pred_RT, K_transpose)
    pred_sample_img_points, pred_xy = get_img_points(ref_sample_pc, pred_trans_mat, pred_xyshift, FLAGS)
    end_points['pred_sample_img_points'] = pred_sample_img_points
    end_points['pred_trans_mat'] = pred_trans_mat
    end_points['pred_xy'] = pred_xy
    print("gt_xy, pred_xy", gt_xy.get_shape(), pred_xy.get_shape())
    return end_points
Ejemplo n.º 38
0
def get_model(input_pls,
              is_training,
              bn=False,
              bn_decay=None,
              img_size=224,
              FLAGS=None):

    if FLAGS.act == "relu":
        activation_fn = tf.nn.relu
    elif FLAGS.act == "elu":
        activation_fn = tf.nn.elu

    input_imgs = input_pls['imgs']
    input_pnts = input_pls['pnts']
    input_gvfs = input_pls['gvfs']
    input_onedge = input_pls['onedge']
    input_trans_mat = input_pls['trans_mats']
    input_obj_rot_mats = input_pls['obj_rot_mats']

    batch_size = input_imgs.get_shape()[0].value

    # endpoints
    end_points = {}
    end_points['pnts'] = input_pnts
    if FLAGS.rot:
        end_points['gt_gvfs_xyz'] = tf.matmul(input_gvfs, input_obj_rot_mats)
        end_points['pnts_rot'] = tf.matmul(input_pnts, input_obj_rot_mats)
    else:
        end_points['gt_gvfs_xyz'] = input_gvfs  #* 10
        end_points['pnts_rot'] = input_pnts
    if FLAGS.edgeweight != 1.0:
        end_points['onedge'] = input_onedge
    input_pnts_rot = end_points['pnts_rot']
    end_points['imgs'] = input_imgs  # B*H*W*3|4

    # Image extract features
    if input_imgs.shape[1] != img_size or input_imgs.shape[2] != img_size:
        if FLAGS.alpha:
            ref_img_rgb = tf.compat.v1.image.resize_bilinear(
                input_imgs[:, :, :, :3], [img_size, img_size])
            ref_img_alpha = tf.image.resize_nearest_neighbor(
                tf.expand_dims(input_imgs[:, :, :, 3], axis=-1),
                [img_size, img_size])
            ref_img = tf.concat([ref_img_rgb, ref_img_alpha], axis=-1)
        else:
            ref_img = tf.compat.v1.image.resize_bilinear(
                input_imgs, [img_size, img_size])
    else:
        ref_img = input_imgs
    end_points['resized_ref_img'] = ref_img
    if FLAGS.encoder[:6] == "vgg_16":
        vgg.vgg_16.default_image_size = img_size
        with slim.arg_scope([slim.conv2d],
                            weights_regularizer=slim.l2_regularizer(FLAGS.wd)):
            ref_feats_embedding, encdr_end_points = vgg.vgg_16(
                ref_img,
                num_classes=FLAGS.num_classes,
                is_training=False,
                scope='vgg_16',
                spatial_squeeze=False)
    elif FLAGS.encoder == "sim_res":
        ref_feats_embedding, encdr_end_points = res_sim_encoder.res_sim_encoder(
            ref_img,
            FLAGS.batch_size,
            is_training=is_training,
            activation_fn=activation_fn,
            bn=bn,
            bn_decay=bn_decay,
            wd=FLAGS.wd)
    elif FLAGS.encoder == "resnet_v1_50":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_50')
        scopelst = [
            "resnet_v1_50/block1", "resnet_v1_50/block2",
            "resnet_v1_50/block3", 'resnet_v1_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v1_101":
        resnet_v1.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v1.resnet_v1_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v1_101')
        scopelst = [
            "resnet_v1_101/block1", "resnet_v1_101/block2",
            "resnet_v1_101/block3", 'resnet_v1_101/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_50":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_50(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_50')
        scopelst = [
            "resnet_v2_50/block1", "resnet_v2_50/block2",
            "resnet_v2_50/block3", 'resnet_v2_50/block4'
        ]
    elif FLAGS.encoder == "resnet_v2_101":
        resnet_v2.default_image_size = img_size
        with slim.arg_scope(resnet_v1.resnet_arg_scope()):
            ref_feats_embedding, encdr_end_points = resnet_v2.resnet_v2_101(
                ref_img,
                FLAGS.num_classes,
                is_training=is_training,
                scope='resnet_v2_101')
        scopelst = [
            "resnet_v2_101/block1", "resnet_v2_101/block2",
            "resnet_v2_101/block3", 'resnet_v2_101/block4'
        ]
    end_points['img_embedding'] = ref_feats_embedding
    point_img_feat = None
    gvfs_feat = None
    sample_img_points = get_img_points(input_pnts,
                                       input_trans_mat)  # B * N * 2

    if FLAGS.img_feat_onestream:
        with tf.compat.v1.variable_scope("sdfimgfeat") as scope:
            if FLAGS.encoder[:3] == "vgg":
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv1/conv1_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv2/conv2_2'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points['vgg_16/conv3/conv3_3'],
                    (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                if FLAGS.encoder[-7:] != "smaller":
                    conv4 = tf.compat.v1.image.resize_bilinear(
                        encdr_end_points['vgg_16/conv4/conv4_3'],
                        (FLAGS.img_h, FLAGS.img_w))
                    point_conv4 = tf.contrib.resampler.resampler(
                        conv4, sample_img_points)
                    point_img_feat = tf.concat(axis=2,
                                               values=[
                                                   point_conv1, point_conv2,
                                                   point_conv3, point_conv4
                                               ])  # small
                else:
                    print("smaller vgg")
                    point_img_feat = tf.concat(
                        axis=2, values=[point_conv1, point_conv2,
                                        point_conv3])  # small
            elif FLAGS.encoder[:3] == "res":
                # print(encdr_end_points.keys())
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[0]], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[1]], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[scopelst[2]], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            else:
                conv1 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[0], (FLAGS.img_h, FLAGS.img_w))
                point_conv1 = tf.contrib.resampler.resampler(
                    conv1, sample_img_points)
                conv2 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[1], (FLAGS.img_h, FLAGS.img_w))
                point_conv2 = tf.contrib.resampler.resampler(
                    conv2, sample_img_points)
                conv3 = tf.compat.v1.image.resize_bilinear(
                    encdr_end_points[2], (FLAGS.img_h, FLAGS.img_w))
                point_conv3 = tf.contrib.resampler.resampler(
                    conv3, sample_img_points)
                # conv4 = tf.compat.v1.image.resize_bilinear(encdr_end_points[scopelst[3]], (FLAGS.img_h, FLAGS.img_w))
                # point_conv4 = tf.contrib.resampler.resampler(conv4, sample_img_points)
                point_img_feat = tf.concat(
                    axis=2, values=[point_conv1, point_conv2, point_conv3])
            print("point_img_feat.shape", point_img_feat.get_shape())
            point_img_feat = tf.expand_dims(point_img_feat, axis=2)
            if FLAGS.decoder == "att":
                gvfs_feat = gvfnet.get_gvf_att_imgfeat(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            elif FLAGS.decoder == "skip":
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream_skip(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
            else:
                gvfs_feat = gvfnet.get_gvf_basic_imgfeat_onestream(
                    input_pnts_rot,
                    ref_feats_embedding,
                    point_img_feat,
                    is_training,
                    batch_size,
                    bn,
                    bn_decay,
                    wd=FLAGS.wd,
                    activation_fn=activation_fn)
    else:
        if not FLAGS.multi_view:
            with tf.compat.v1.variable_scope("sdfprediction") as scope:
                gvfs_feat = gvfnet.get_gvf_basic(input_pnts_rot,
                                                 ref_feats_embedding,
                                                 is_training,
                                                 batch_size,
                                                 bn,
                                                 bn_decay,
                                                 wd=FLAGS.wd,
                                                 activation_fn=activation_fn)
    end_points['pred_gvfs_xyz'], end_points['pred_gvfs_dist'], end_points[
        'pred_gvfs_direction'] = None, None, None
    if FLAGS.XYZ:
        end_points['pred_gvfs_xyz'] = gvfnet.xyz_gvfhead(
            gvfs_feat, batch_size, wd=FLAGS.wd, activation_fn=activation_fn)
        end_points['pred_gvfs_dist'] = tf.sqrt(
            tf.reduce_sum(tf.square(end_points['pred_gvfs_xyz']),
                          axis=2,
                          keepdims=True))
        end_points[
            'pred_gvfs_direction'] = end_points['pred_gvfs_xyz'] / tf.maximum(
                end_points['pred_gvfs_dist'], 1e-6)
    else:
        end_points['pred_gvfs_dist'], end_points[
            'pred_gvfs_direction'] = gvfnet.dist_direct_gvfhead(
                gvfs_feat,
                batch_size,
                wd=FLAGS.wd,
                activation_fn=activation_fn)
        end_points['pred_gvfs_xyz'] = end_points[
            'pred_gvfs_direction'] * end_points['pred_gvfs_dist']

    end_points["sample_img_points"] = sample_img_points
    # end_points["ref_feats_embedding"] = ref_feats_embedding
    end_points["point_img_feat"] = point_img_feat

    return end_points
Ejemplo n.º 39
0
def get_model(ref_dict,
              num_point,
              is_training,
              bn=False,
              bn_decay=None,
              img_size=(137, 137),
              wd=1e-5,
              FLAGS=None):

    ref_img = ref_dict['imgs']
    ref_pc = ref_dict['pc']
    ref_sample_pc = ref_dict['sample_pc']
    ref_sample_pc_rot = ref_dict['sample_pc_rot']
    trans_mat = ref_dict['trans_mat']
    K = ref_dict['K']
    norm_params = ref_dict['norm_params']
    rot_mat_inv = ref_dict['rot_mat_inv']
    regress_mat = ref_dict['regress_mat']
    gt_xyshift = ref_dict['shifts']

    batch_size = ref_img.get_shape()[0].value
    norm_mat_inv = get_inverse_norm_matrix(norm_params, batch_size)
    # endpoints
    end_points = {}
    end_points['ref_pc'] = ref_pc
    end_points['regress_mat'] = regress_mat
    end_points['K'] = K
    end_points['gt_xyshift'] = gt_xyshift
    end_points['trans_mat'] = trans_mat
    end_points['sample_pc'] = ref_sample_pc  #* 10

    # Image extract features
    if ref_img.shape[1] != 224 or ref_img.shape[2] != 224:
        ref_img = tf.image.resize_bilinear(ref_img, [224, 224])
    else:
        print("image size:", img_size)
    end_points['ref_img'] = ref_img

    # vgg.vgg_16.default_image_size = (224, 224)
    with slim.arg_scope([slim.conv2d],
                        weights_regularizer=slim.l2_regularizer(wd)):
        ref_feats_embedding, vgg_end_points = vgg.vgg_16(ref_img,
                                                         num_classes=1024,
                                                         is_training=False,
                                                         scope='vgg_16',
                                                         spatial_squeeze=False)
        ref_feats_embedding_cnn = tf.squeeze(ref_feats_embedding, axis=[1, 2])
    end_points['embedding'] = ref_feats_embedding_cnn
    print(vgg_end_points.keys())

    with tf.variable_scope("cameraprediction") as scope:
        if FLAGS.shift:
            pred_rotation, pred_translation, pred_RT, pred_xyshift = posenet.get_cam_mat_shft(
                ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay,
                wd)
            end_points['pred_rotation'] = pred_rotation
            end_points['pred_translation'] = pred_translation
            end_points['pred_RT'] = pred_RT
            end_points['pred_xyshift'] = pred_xyshift
        elif FLAGS.space_shift:
            pred_rotation, pred_translation, pred_RT, predxyzshift = posenet.get_cam_mat_spaceshft(
                ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay,
                wd)
            end_points['pred_rotation'] = pred_rotation
            end_points['pred_translation'] = pred_translation
            end_points['pred_RT'] = pred_RT
            end_points['pred_xyshift'] = None
            pred_xyshift = None
        else:
            pred_rotation, pred_translation, pred_RT = posenet.get_cam_mat(
                ref_feats_embedding_cnn, is_training, batch_size, bn, bn_decay,
                wd)
            end_points['pred_rotation'] = pred_rotation
            end_points['pred_translation'] = pred_translation
            end_points['pred_RT'] = pred_RT
            end_points['pred_xyshift'] = None
            pred_xyshift = None

    print('trans_mat', trans_mat.shape)
    sample_img_points, gt_xy = get_img_points(ref_sample_pc,
                                              trans_mat,
                                              gt_xyshift,
                                              FLAGS,
                                              img_size=img_size)
    end_points['sample_img_points'] = sample_img_points
    end_points['gt_xy'] = gt_xy

    # K, RT, rot_mat, W2O_mat, norm_mat, pred_transmat = inverse norm_mat * inverse W2O_mat * inverse rot_mat * pred_RT * K inverse
    if FLAGS.space_shift:
        pred_regress_mat = norm_mat_inv @ predxyzshift @ rot_mat_inv @ pred_RT
    else:
        pred_regress_mat = norm_mat_inv @ rot_mat_inv @ pred_RT
    pred_trans_mat = pred_regress_mat @ K
    pred_sample_img_points, pred_xy = get_img_points(ref_sample_pc,
                                                     pred_trans_mat,
                                                     pred_xyshift,
                                                     FLAGS,
                                                     img_size=img_size)
    end_points['pred_sample_img_points'] = pred_sample_img_points
    end_points['pred_trans_mat'] = pred_trans_mat
    end_points['pred_regress_mat'] = pred_regress_mat
    end_points['pred_xy'] = pred_xy
    print("gt_xy, pred_xy", gt_xy.get_shape(), pred_xy.get_shape())
    return end_points