コード例 #1
0
 def test_slim_unit_norm(self):
   graph = tf.Graph()
   with graph.as_default() as g:
     inputs = tf.placeholder(tf.float32, shape=[None,8],
         name='test_slim_unit_norm/input')
     with slim.arg_scope([slim.fully_connected],
         weights_initializer=tf.truncated_normal_initializer(0.0, 0.2),
         weights_regularizer=slim.l2_regularizer(0.0005)):
       net = slim.fully_connected(inputs, 10, scope='fc')
       net = slim.unit_norm(net,1)
   output_name = [net.op.name]
   self._test_tf_model(graph,
       {"test_slim_unit_norm/input:0":[1,8]},
       output_name, delta=1e-2)
コード例 #2
0
  def test_custom_tile(self):
    graph = tf.Graph()
    with graph.as_default() as g:
      inputs = tf.placeholder(tf.float32, shape=[None, 8], name='input')
      with slim.arg_scope([slim.fully_connected],
                          weights_initializer=tf.truncated_normal_initializer(0.0, 0.2),
                          weights_regularizer=slim.l2_regularizer(0.0005)):
        y = slim.fully_connected(inputs, 10, scope='fc')
        y = slim.unit_norm(y, dim=1)

    output_name = [y.op.name]
    coreml_model = self._test_tf_model(graph,
                        {"input:0": [1, 8]},
                        output_name,
                        check_numerical_accuracy=False,
                        add_custom_layers=True)

    spec = coreml_model.get_spec()
    layers = spec.neuralNetwork.layers
    self.assertIsNotNone(layers[9].custom)
    self.assertEqual('Tile', layers[9].custom.className)
コード例 #3
0
    def _build_interpretation(self):
        '''Interprets the logits'''
        softmax_out = tf.nn.softmax(
            self.net['logits'])  # 5 * num_classes vector
        splits = tf.split(softmax_out,
                          num_or_size_splits=self.num_classes,
                          axis=1)
        max_splits = [
            tf.reduce_max(splits[cls], axis=1, keep_dims=True)
            for cls in xrange(self.num_classes)
        ]
        softmax_out_pooled = tf.concat(max_splits, axis=1)
        softmax_out_norm = slim.unit_norm(softmax_out_pooled,
                                          dim=1,
                                          scope='normalize_softmax')

        self.predictions_prob = softmax_out_norm
        self.predictions = tf.argmax(self.predictions_prob,
                                     axis=1,
                                     output_type=tf.int32)
        self.score = tf.reduce_mean(
            tf.to_float(tf.equal(self.predictions, self.labels)))
    def forward(self, inputs, grid, is_training=True, reuse=False):
        def preprocessing(inputs):
            dims = inputs.get_shape()
            if len(dims) == 3:
                inputs = tf.expand_dims(inputs, dim=0)
            mean_BGR = tf.reshape(self.mean_BGR, [1, 1, 1, 3])
            inputs = inputs[:, :, :, ::-1] + mean_BGR
            return inputs

        ## -----------------------depth and normal FCN--------------------------
        inputs = preprocessing(inputs)
        with slim.arg_scope(
            [slim.conv2d, slim.conv2d_transpose],
                activation_fn=tf.nn.relu,
                stride=1,
                padding='SAME',
                weights_initializer=weight_from_caffe(self.pretrain_weight),
                biases_initializer=bias_from_caffe(self.pretrain_weight)):
            with tf.variable_scope('fcn', reuse=reuse):
                ##---------------------vgg depth------------------------------------
                conv1 = slim.repeat(inputs,
                                    2,
                                    slim.conv2d,
                                    64, [3, 3],
                                    scope='conv1')
                pool1 = slim.max_pool2d(conv1, [3, 3],
                                        stride=2,
                                        padding='SAME',
                                        scope='pool1')

                conv2 = slim.repeat(pool1,
                                    2,
                                    slim.conv2d,
                                    128, [3, 3],
                                    scope='conv2')
                pool2 = slim.max_pool2d(conv2, [3, 3],
                                        stride=2,
                                        padding='SAME',
                                        scope='pool2')

                conv3 = slim.repeat(pool2,
                                    3,
                                    slim.conv2d,
                                    256, [3, 3],
                                    scope='conv3')
                pool3 = slim.max_pool2d(conv3, [3, 3],
                                        stride=2,
                                        padding='SAME',
                                        scope='pool3')

                conv4 = slim.repeat(pool3,
                                    3,
                                    slim.conv2d,
                                    512, [3, 3],
                                    scope='conv4')
                pool4 = slim.max_pool2d(conv4, [3, 3],
                                        stride=1,
                                        padding='SAME',
                                        scope='pool4')

                conv5 = slim.repeat(pool4,
                                    3,
                                    slim.conv2d,
                                    512, [3, 3],
                                    rate=2,
                                    scope='conv5')
                pool5 = slim.max_pool2d(conv5, [3, 3],
                                        stride=1,
                                        padding='SAME',
                                        scope='pool5')
                pool5a = slim.avg_pool2d(pool5, [3, 3],
                                         stride=1,
                                         padding='SAME',
                                         scope='pool5a')

                fc6 = slim.conv2d(pool5a,
                                  1024, [3, 3],
                                  stride=1,
                                  rate=12,
                                  scope='fc6')
                fc6 = slim.dropout(fc6,
                                   0.5,
                                   is_training=is_training,
                                   scope='drop6')
                fc7 = slim.conv2d(fc6, 1024, [1, 1], scope='fc7')
                fc7 = slim.dropout(fc7,
                                   0.5,
                                   is_training=is_training,
                                   scope='drop7')

                pool6_1x1 = slim.avg_pool2d(fc7, [61, 81],
                                            stride=[61, 81],
                                            padding='SAME',
                                            scope='pool6_1x1')
                pool6_1x1_norm = slim.unit_norm(pool6_1x1,
                                                dim=3,
                                                scope='pool6_1x1_norm_new')
                pool6_1x1_norm_scale = pool6_1x1_norm * 10
                pool6_1x1_norm_upsample = tf.tile(
                    pool6_1x1_norm_scale, [1, 61, 81, 1],
                    name='pool6_1x1_norm_upsample')

                out = tf.concat([fc7, pool6_1x1_norm_upsample],
                                axis=-1,
                                name='out')

                out_reduce = slim.conv2d(out,
                                         256, [1, 1],
                                         activation_fn=tf.nn.relu,
                                         stride=1,
                                         scope='out_reduce',
                                         padding='SAME',
                                         weights_initializer=weight_from_caffe(
                                             self.pretrain_weight),
                                         biases_initializer=bias_from_caffe(
                                             self.pretrain_weight))
                out_conv = slim.conv2d(out_reduce,
                                       256, [3, 3],
                                       activation_fn=tf.nn.relu,
                                       stride=1,
                                       scope='out_conv',
                                       padding='SAME',
                                       weights_initializer=weight_from_caffe(
                                           self.pretrain_weight),
                                       biases_initializer=bias_from_caffe(
                                           self.pretrain_weight))
                out_conv_increase = slim.conv2d(
                    out_conv,
                    1024, [1, 1],
                    activation_fn=tf.nn.relu,
                    stride=1,
                    scope='out_conv_increase',
                    padding='SAME',
                    weights_initializer=weight_from_caffe(
                        self.pretrain_weight),
                    biases_initializer=bias_from_caffe(self.pretrain_weight))

                fc8_nyu_depth = slim.conv2d(out_conv_increase,
                                            1, [1, 1],
                                            activation_fn=None,
                                            scope='fc8_nyu_depth')
                fc8_upsample = tf.image.resize_images(
                    fc8_nyu_depth, [self.crop_size_h, self.crop_size_w],
                    method=0,
                    align_corners=True)
                # ---------------------------------------vgg depth end ---------------------------------------
                ## ----------------- vgg norm---------------------------------------------------------------
                conv1_norm = slim.repeat(inputs,
                                         2,
                                         slim.conv2d,
                                         64, [3, 3],
                                         scope='conv1_norm')
                pool1_norm = slim.max_pool2d(conv1_norm, [3, 3],
                                             stride=2,
                                             padding='SAME',
                                             scope='pool1_norm')

                conv2_norm = slim.repeat(pool1_norm,
                                         2,
                                         slim.conv2d,
                                         128, [3, 3],
                                         scope='conv2_norm')
                pool2_norm = slim.max_pool2d(conv2_norm, [3, 3],
                                             stride=2,
                                             padding='SAME',
                                             scope='pool2_norm')

                conv3_norm = slim.repeat(pool2_norm,
                                         3,
                                         slim.conv2d,
                                         256, [3, 3],
                                         scope='conv3_norm')
                pool3_norm = slim.max_pool2d(conv3_norm, [3, 3],
                                             stride=2,
                                             padding='SAME',
                                             scope='pool3_norm')

                conv4_norm = slim.repeat(pool3_norm,
                                         3,
                                         slim.conv2d,
                                         512, [3, 3],
                                         scope='conv4_norm')
                pool4_norm = slim.max_pool2d(conv4_norm, [3, 3],
                                             stride=1,
                                             padding='SAME',
                                             scope='pool4_norm')

                conv5_norm = slim.repeat(pool4_norm,
                                         3,
                                         slim.conv2d,
                                         512, [3, 3],
                                         rate=2,
                                         scope='conv5_norm')
                pool5_norm = slim.max_pool2d(conv5_norm, [3, 3],
                                             stride=1,
                                             padding='SAME',
                                             scope='pool5_norm')
                pool5a_norm = slim.avg_pool2d(pool5_norm, [3, 3],
                                              stride=1,
                                              padding='SAME',
                                              scope='pool5a_norm')

                fc6_norm = slim.conv2d(pool5a_norm,
                                       1024, [3, 3],
                                       stride=1,
                                       rate=12,
                                       scope='fc6_norm')
                fc6_norm = slim.dropout(fc6_norm,
                                        0.5,
                                        is_training=is_training,
                                        scope='drop6_norm')
                fc7_norm = slim.conv2d(fc6_norm,
                                       1024, [1, 1],
                                       scope='fc7_norm')
                fc7_norm = slim.dropout(fc7_norm,
                                        0.5,
                                        is_training=is_training,
                                        scope='drop7_norm')

                pool6_1x1_norm_new = slim.avg_pool2d(
                    fc7_norm, [61, 81],
                    stride=[61, 81],
                    padding='SAME',
                    scope='pool6_1x1_norm_new')

                pool6_1x1_norm_norm = slim.unit_norm(
                    pool6_1x1_norm_new, dim=3, scope='pool6_1x1_norm_new')
                pool6_1x1_norm_scale_norm = pool6_1x1_norm_norm * 10
                pool6_1x1_norm_upsample_norm = tf.tile(
                    pool6_1x1_norm_scale_norm, [1, 61, 81, 1],
                    name='pool6_1x1_norm_upsample')
                out_norm = tf.concat([fc7_norm, pool6_1x1_norm_upsample_norm],
                                     axis=-1,
                                     name='out_norm')
                fc8_nyu_norm_norm = slim.conv2d(out_norm,
                                                3, [1, 1],
                                                activation_fn=None,
                                                scope='fc8_nyu_norm_norm')
                fc8_upsample_norm = tf.image.resize_images(
                    fc8_nyu_norm_norm, [self.crop_size_h, self.crop_size_w],
                    method=0,
                    align_corners=True)

                fc8_upsample_norm = slim.unit_norm(fc8_upsample_norm, dim=3)
                # -------------------------------------vgg norm end---------------------------------------------

            # ------------- depth to normal + norm refinement---------------------------------------------------
            with tf.variable_scope('noise', reuse=reuse):
                fc8_upsample_norm = tf.squeeze(fc8_upsample_norm)
                fc8_upsample_norm = tf.reshape(
                    fc8_upsample_norm,
                    [self.batch_size, self.crop_size_h, self.crop_size_w, 3])

                norm_matrix = tf.extract_image_patches(
                    images=fc8_upsample_norm,
                    ksizes=[1, self.k, self.k, 1],
                    strides=[1, 1, 1, 1],
                    rates=[1, self.rate, self.rate, 1],
                    padding='SAME')

                matrix_c = tf.reshape(norm_matrix, [
                    self.batch_size, self.crop_size_h, self.crop_size_w,
                    self.k * self.k, 3
                ])

                fc8_upsample_norm = tf.expand_dims(fc8_upsample_norm, axis=4)

                angle = tf.matmul(matrix_c, fc8_upsample_norm)

                valid_condition = tf.greater(angle, self.thresh)
                valid_condition_all = tf.tile(valid_condition, [1, 1, 1, 1, 3])

                exp_depth = tf.exp(fc8_upsample * 0.69314718056)
                depth_repeat = tf.tile(exp_depth, [1, 1, 1, 3])
                points = tf.multiply(grid, depth_repeat)
                point_matrix = tf.extract_image_patches(
                    images=points,
                    ksizes=[1, self.k, self.k, 1],
                    strides=[1, 1, 1, 1],
                    rates=[1, self.rate, self.rate, 1],
                    padding='SAME')

                matrix_a = tf.reshape(point_matrix, [
                    self.batch_size, self.crop_size_h, self.crop_size_w,
                    self.k * self.k, 3
                ])

                matrix_a_zero = tf.zeros_like(matrix_a, dtype=tf.float32)
                matrix_a_valid = tf.where(valid_condition_all, matrix_a,
                                          matrix_a_zero)

                matrix_a_trans = tf.matrix_transpose(matrix_a_valid,
                                                     name='matrix_transpose')
                matrix_b = tf.ones(shape=[
                    self.batch_size, self.crop_size_h, self.crop_size_w,
                    self.k * self.k, 1
                ])
                point_multi = tf.matmul(matrix_a_trans,
                                        matrix_a_valid,
                                        name='matrix_multiplication')
                with tf.device('cpu:0'):
                    matrix_deter = tf.matrix_determinant(point_multi)
                inverse_condition = tf.greater(matrix_deter, 1e-5)
                inverse_condition = tf.expand_dims(inverse_condition, axis=3)
                inverse_condition = tf.expand_dims(inverse_condition, axis=4)
                inverse_condition_all = tf.tile(inverse_condition,
                                                [1, 1, 1, 3, 3])

                diag_constant = tf.ones([3], dtype=tf.float32)
                diag_element = tf.diag(diag_constant)
                diag_element = tf.expand_dims(diag_element, axis=0)
                diag_element = tf.expand_dims(diag_element, axis=0)
                diag_element = tf.expand_dims(diag_element, axis=0)

                diag_matrix = tf.tile(diag_element, [
                    self.batch_size, self.crop_size_h, self.crop_size_w, 1, 1
                ])

                inversible_matrix = tf.where(inverse_condition_all,
                                             point_multi, diag_matrix)
                with tf.device('cpu:0'):
                    inv_matrix = tf.matrix_inverse(inversible_matrix)

                generated_norm = tf.matmul(
                    tf.matmul(inv_matrix, matrix_a_trans), matrix_b)

                norm_normalize = slim.unit_norm((generated_norm), dim=3)
                norm_normalize = tf.reshape(
                    norm_normalize,
                    [self.batch_size, self.crop_size_h, self.crop_size_w, 3])
                norm_scale = norm_normalize * 10.0

                conv1_noise = slim.repeat(norm_scale,
                                          2,
                                          slim.conv2d,
                                          64, [3, 3],
                                          scope='conv1_noise')
                pool1_noise = slim.max_pool2d(conv1_noise, [3, 3],
                                              stride=2,
                                              padding='SAME',
                                              scope='pool1_noise')  #

                conv2_noise = slim.repeat(pool1_noise,
                                          2,
                                          slim.conv2d,
                                          128, [3, 3],
                                          scope='conv2_noise')
                conv3_noise = slim.repeat(conv2_noise,
                                          3,
                                          slim.conv2d,
                                          256, [3, 3],
                                          scope='conv3_noise')

                fc1_noise = slim.conv2d(conv3_noise,
                                        512, [1, 1],
                                        activation_fn=tf.nn.relu,
                                        stride=1,
                                        scope='fc1_noise',
                                        padding='SAME')
                encode_norm_noise = slim.conv2d(fc1_noise,
                                                3, [3, 3],
                                                activation_fn=None,
                                                stride=1,
                                                scope='encode_norm_noise',
                                                padding='SAME')
                encode_norm_upsample_noise = tf.image.resize_images(
                    encode_norm_noise, [self.crop_size_h, self.crop_size_w],
                    method=0,
                    align_corners=True)

                sum_norm_noise = tf.add(norm_normalize,
                                        encode_norm_upsample_noise)

                norm_pred_noise = slim.unit_norm(sum_norm_noise, dim=3)

                norm_pred_all = tf.concat([
                    tf.expand_dims(tf.squeeze(fc8_upsample_norm), axis=0),
                    norm_pred_noise, inputs * 0.00392156862
                ],
                                          axis=3)

                norm_pred_all = slim.repeat(
                    norm_pred_all,
                    3,
                    slim.conv2d,
                    128, [3, 3],
                    rate=2,
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='conv1_norm_noise_new')
                norm_pred_all = slim.repeat(
                    norm_pred_all,
                    3,
                    slim.conv2d,
                    128, [3, 3],
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='conv2_norm_noise_new')
                norm_pred_final = slim.conv2d(
                    norm_pred_all,
                    3, [3, 3],
                    activation_fn=None,
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='norm_conv3_noise_new')
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)

            # ------------- normal to depth  + depth refinement---------------------------------------------------
            with tf.variable_scope('norm_depth', reuse=reuse):
                grid_patch = tf.extract_image_patches(
                    images=grid,
                    ksizes=[1, self.k, self.k, 1],
                    strides=[1, 1, 1, 1],
                    rates=[1, self.rate, self.rate, 1],
                    padding='SAME')
                grid_patch = tf.reshape(grid_patch, [
                    self.batch_size, self.crop_size_h, self.crop_size_w,
                    self.k * self.k, 3
                ])
                _, _, depth_data = tf.split(value=matrix_a,
                                            num_or_size_splits=3,
                                            axis=4)
                tmp_matrix_zero = tf.zeros_like(angle, dtype=tf.float32)
                valid_angle = tf.where(valid_condition, angle, tmp_matrix_zero)

                lower_matrix = tf.matmul(matrix_c, tf.expand_dims(grid,
                                                                  axis=4))
                condition = tf.greater(lower_matrix, 1e-5)
                tmp_matrix = tf.ones_like(lower_matrix)
                lower_matrix = tf.where(condition, lower_matrix, tmp_matrix)
                lower = tf.reciprocal(lower_matrix)
                valid_angle = tf.where(condition, valid_angle, tmp_matrix_zero)
                upper = tf.reduce_sum(tf.multiply(matrix_c, grid_patch), [4])
                ratio = tf.multiply(lower, tf.expand_dims(upper, axis=4))
                estimate_depth = tf.multiply(ratio, depth_data)

                valid_angle = tf.multiply(
                    valid_angle,
                    tf.reciprocal(
                        tf.tile(
                            tf.reduce_sum(valid_angle, [3, 4], keep_dims=True)
                            + 1e-5, [1, 1, 1, 81, 1])))

                depth_stage1 = tf.reduce_sum(
                    tf.multiply(estimate_depth, valid_angle), [3, 4])
                depth_stage1 = tf.expand_dims(tf.squeeze(depth_stage1), axis=2)
                depth_stage1 = tf.clip_by_value(depth_stage1, 0, 10.0)
                exp_depth = tf.expand_dims(tf.squeeze(exp_depth), axis=2)

                depth_all = tf.expand_dims(tf.concat([
                    depth_stage1, exp_depth,
                    tf.squeeze(inputs) * 0.00392156862
                ],
                                                     axis=2),
                                           axis=0)

                depth_pred_all = slim.repeat(
                    depth_all,
                    3,
                    slim.conv2d,
                    128, [3, 3],
                    rate=2,
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='conv1_depth_noise_new')
                depth_pred_all = slim.repeat(
                    depth_pred_all,
                    3,
                    slim.conv2d,
                    128, [3, 3],
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='conv2_depth_noise_new')
                final_depth = slim.conv2d(
                    depth_pred_all,
                    1, [3, 3],
                    activation_fn=None,
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='depth_conv3_noise_new')
            with tf.variable_scope('edge_refinemet', reuse=reuse):
                print(inputs.shape)
                edges = tf.py_func(myfunc_canny, [inputs], tf.float32)
                edges = tf.reshape(edges,
                                   [1, self.crop_size_h, self.crop_size_w, 1])
                edge_input_depth = final_depth
                edge_input_norm = norm_pred_final

                # edge prediction for depth
                edge_inputs = tf.concat([edges, inputs * 0.00784], axis=3)
                edges_encoder = slim.repeat(
                    edge_inputs,
                    3,
                    slim.conv2d,
                    32, [3, 3],
                    rate=2,
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='conv1_edge_refinement')
                edges_encoder = slim.repeat(
                    edges_encoder,
                    3,
                    slim.conv2d,
                    32, [3, 3],
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='conv2_edge_refinement')

                edges_predictor = slim.conv2d(
                    edges_encoder,
                    8, [3, 3],
                    activation_fn=None,
                    weights_initializer=tf.contrib.layers.xavier_initializer(
                        uniform=False),
                    biases_initializer=tf.constant_initializer(0.0),
                    scope='edge_weight')
                edges_all = edges_predictor + tf.tile(edges, [1, 1, 1, 8])
                edges_all = tf.clip_by_value(edges_all, 0.0, 1.0)

                dlr, drl, dud, ddu, nlr, nrl, nud, ndu = tf.split(
                    edges_all, num_or_size_splits=8, axis=3)

                # 4 iteration depth
                final_depth = propagate(edge_input_depth, dlr, drl, dud, ddu,
                                        1)
                final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1)
                final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1)
                final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1)

                # 4 iteration norm
                norm_pred_final = propagate(edge_input_norm, nlr, nrl, nud,
                                            ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)
                norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud,
                                            ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)
                norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud,
                                            ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)
                norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud,
                                            ndu, 3)
                norm_pred_final = slim.unit_norm((norm_pred_final), dim=3)

        return final_depth, fc8_upsample_norm, norm_pred_final, fc8_upsample