def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope([Conv2D], use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(scale=2.0, mode='fan_out')):
        logits = (LinearWrap(image).Conv2D('conv0', 64, 7, strides=2, activation=BNReLU)())
        logits = (LinearWrap(logits).MaxPooling('pool0', shape=3, stride=2, padding='SAME')
                  .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1)
                  .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2)
                  .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2)
                  .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)())
        logits = (LinearWrap(logits).GlobalAvgPooling('gap')())
        logits = (LinearWrap(logits).FullyConnected('linear', 1000)())
    return logits
Ejemplo n.º 2
0
def resnet_backbone(image, num_blocks, block_func):
    with argscope(Conv2D,
                  nl=tf.identity,
                  use_bias=False,
                  W_init=variance_scaling_initializer(mode='FAN_OUT')):
        logits = (LinearWrap(image).Conv2D(
            'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling(
                'pool0', shape=3, stride=2, padding='SAME').apply(
                    resnet_group,
                    'group0',
                    block_func,
                    64,
                    num_blocks[0],
                    1,
                    first=True).apply(
                        resnet_group, 'group1', block_func, 128, num_blocks[1],
                        2).apply(resnet_group, 'group2', block_func, 256,
                                 num_blocks[2],
                                 2).apply(resnet_group, 'group3', block_func,
                                          512, num_blocks[3],
                                          2).BNReLU('bnlast').GlobalAvgPooling(
                                              'gap').FullyConnected(
                                                  'linear',
                                                  1000,
                                                  nl=tf.identity)())
    return logits
Ejemplo n.º 3
0
def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D,
                  use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0,
                      mode='fan_out',
                      distribution='untruncated_normal')):
        logits = (LinearWrap(image).tf.pad([
            [0, 0], [0, 0], [3, 3], [3, 3]
        ]).Conv2D('conv0',
                  64,
                  7,
                  strides=2,
                  activation=NormAct,
                  padding='VALID').tf.pad([[0, 0], [0, 0], [1, 1], [
                      1, 1
                  ]]).MaxPooling('pool0', shape=3, stride=2,
                                 padding='VALID').apply(
                                     group_func, 'group0',
                                     block_func, 64,
                                     num_blocks[0],
                                     1).apply(group_func, 'group1',
                                              block_func, 128, num_blocks[1],
                                              2).apply(group_func, 'group2',
                                                       block_func, 256,
                                                       num_blocks[2], 2).
                  apply(group_func, 'group3', block_func, 512, num_blocks[3],
                        2).GlobalAvgPooling('gap').FullyConnected(
                            'linear',
                            1000,
                            kernel_initializer=tf.random_normal_initializer(
                                stddev=0.01))())
    return logits
Ejemplo n.º 4
0
def resnet_backbone(image, num_blocks, group_func, group_func_dilation,
                    block_func, block_func_dilation):
    with argscope(Conv2D,
                  use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out')):
        # with freeze_variables(stop_gradient=True, skip_collection=True):
        logits = (LinearWrap(image).Conv2D(
            'conv0', 64, 7, strides=2, activation=BNReLU).MaxPooling(
                'pool0', shape=3, stride=2, padding='SAME').apply(
                    group_func, 'group0', block_func, 64, num_blocks[0],
                    1).apply(group_func, 'group1', block_func, 128,
                             num_blocks[1],
                             2).apply(group_func_dilation, 'group2',
                                      block_func_dilation, 256, num_blocks[2],
                                      1, 2).apply(group_func_dilation,
                                                  'group3',
                                                  block_func_dilation, 512,
                                                  num_blocks[3], 1, 4))
        logits = (logits.Conv2D('conv102',
                                21,
                                1,
                                stride=1,
                                activation=tf.identity)())

        # logits = logits.Conv2D('conv102', 21, 1, stride=1, nl=tf.identity)()
        # tf.get_default_graph().clear_collection(tf.GraphKeys.TRAINABLE_VARIABLES)
        # with tf.variable_scope('conv102', reuse=True):
        #     W = tf.get_variable('W')
        #     tf.add_to_collection(tf.GraphKeys.TRAINABLE_VARIABLES, W)

    return logits
Ejemplo n.º 5
0
    def _get_logits(self, image, pose):
        with argscope([Conv2D, MaxPooling],
                      data_format=self.data_format,
                      padding='same'), argscope(
                          [Conv2D, FullyConnected],
                          activation=tf.nn.relu), argscope([Conv2D],
                                                           strides=1):
            im_fc3 = (
                LinearWrap(image)
                # 1_1
                .Conv2D('conv1_1', 64, 7).MaxPooling('pool1_1', 1)
                # 1_2
                .Conv2D('conv1_2', 64, 5, activation=LocalNorm)
                # .LocalNorm('conv1_2_norm', cfg.radius, cfg.alpha, cfg.beta, cfg.bias)
                .MaxPooling('pool1_2', 2)
                # 2_1
                .Conv2D('conv2_1', 64, 3).MaxPooling('pool2_1', 1)
                # 2_2
                .Conv2D('conv2_2', 64, 3, activation=LocalNorm)
                # .LocalNorm('conv2_2_norm', cfg.radius, cfg.alpha, cfg.beta, cfg.bias)
                .MaxPooling('pool2_2', 1).FullyConnected('fc3', 1024)())
            if cfg.drop_fc3:
                im_fc3 = tf.nn.dropout(fc3, cfg.fc3_drop_rate)
            pc1 = FullyConnected('pc1', pose, 16)

        fc4_im = FullyConnected('fc4_im', im_fc3, 1024, activation=tf.identity)
        fc4_pose = FullyConnected('fc4_pose',
                                  pc1,
                                  1024,
                                  activation=tf.identity)
        fc4 = tf.nn.relu(fc4_im + fc4_pose)
        fc5 = FullyConnected('fc5', fc4, 2)

        return fc5
Ejemplo n.º 6
0
def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D,
                  use_bias=False,
                  kernel_initializer=tf.variance_scaling_initializer(
                      scale=2.0, mode='fan_out')):
        logits = (LinearWrap(image).Conv2D(
            'conv0', 64, 7, strides=2,
            nl=BNReLU).MaxPooling('pool0', 3, strides=2, padding='SAME').apply(
                group_func, 'group0', block_func, 64, num_blocks[0],
                1).apply(group_func, 'group1', block_func, 128, num_blocks[1],
                         2).apply(group_func, 'group2', block_func, 256,
                                  num_blocks[2], 2).
                  apply(group_func, 'group3', block_func, 512, num_blocks[3],
                        2).GlobalAvgPooling('gap').FullyConnected(
                            'linear',
                            1000,
                            kernel_initializer=tf.random_normal_initializer(
                                stddev=0.01))())
    """
    Sec 5.1:
    The 1000-way fully-connected layer is initialized by
    drawing weights from a zero-mean Gaussian with standard
    deviation of 0.01.
    """
    return logits
Ejemplo n.º 7
0
def initial_convolution(image, init_channel, s_type='basic', name='init_conv0'):
    with tf.variable_scope(name):
        if s_type == 'basic':
            l = Conv2D('conv0', image, init_channel, 3)
        elif s_type == 'imagenet':
            l = (LinearWrap(image)
                 .Conv2D('conv0', init_channel, 7, strides=2, activation=tf.identity)
                 .MaxPooling('pool0', 3, strides=2, padding='same')())
        elif s_type == 'conv7':
            l = Conv2D('conv0_7x7', image, init_channel, 7, strides=2)
        elif s_type == 'conv3':
            l = Conv2D('conv0_3x3', image, init_channel, 3, strides=2)
        else:
            raise Exception("Unknown starting type (s_type): {}".format(s_type))
        l = BatchNorm('init_bn', l)
    return l
Ejemplo n.º 8
0
def resnet_backbone(image, num_blocks, group_func, block_func, output_dims):
    with argscope(Conv2D,
                  nl=tf.identity,
                  use_bias=False,
                  W_init=variance_scaling_initializer(mode='FAN_OUT')):
        logits = (LinearWrap(image)
                  .Conv2D('conv0', 64, 7, stride = 2, nl = BNReLU)
                  .MaxPooling('pool0', shape = 3, stride = 2, padding = 'SAME')
                  .apply(group_func, 'group0', block_func, 64, num_blocks[0], 1)
                  .apply(group_func, 'group1', block_func, 128, num_blocks[1], 2)
                  .apply(group_func, 'group2', block_func, 256, num_blocks[2], 2)
                  .apply(group_func, 'group3', block_func, 512, num_blocks[3], 2)
                  .GlobalAvgPooling('gap')
                  .FullyConnected('linear_C{}'.format(output_dims), output_dims, \
                                  nl = tf.identity)())

    return logits
Ejemplo n.º 9
0
def resnet_backbone(image, num_blocks, grp_fun, blck_fun, nfeatures):
    # from tf.contrib.layers import variance_scaling_initializer
    with argscope(Conv2D, nl=tf.identity, use_bias=False,
                  W_init=tf.variance_scaling_initializer(scale=2.0,
                                                         mode='fan_out')):
        # TODO evaluate conv depth
        logits = (LinearWrap(image)
                  .Conv2D('conv0', 64, 7, stride=2, nl=BNReLU)
                  .MaxPooling('pool0', shape=3, stride=2, padding='SAME')
                  .apply(grp_fun, 'group0', blck_fun, 64,  num_blocks[0], 1)
                  .apply(grp_fun, 'group1', blck_fun, 128, num_blocks[1], 2)
                  .apply(grp_fun, 'group2', blck_fun, 256, num_blocks[2], 2)
                  # .apply(grp_fun, 'group3', blck_fun, 512, num_blocks[3], 2)
                  .apply(grp_fun, 'group3', blck_fun, 256, num_blocks[3], 1)
                  .GlobalAvgPooling('gap')
                  .FullyConnected('fc0', 1000)
                  .FullyConnected('fc1', 500)
                  .FullyConnected('linear', nfeatures, nl=tf.identity)())
    return logits
Ejemplo n.º 10
0
def resnet_backbone(image, num_blocks, group_func, block_func):
    with argscope(Conv2D,
                  nl=tf.identity,
                  use_bias=False,
                  W_init=tf.variance_scaling_initializer(scale=2.0,
                                                         mode='fan_out')):
        logits = (LinearWrap(image).Conv2D(
            'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling(
                'pool0', shape=3, stride=2, padding='SAME').apply(
                    group_func, 'group0', block_func, 64, num_blocks[0],
                    1).apply(group_func, 'group1', block_func,
                             128, num_blocks[1], 2).apply(
                                 group_func, 'group2', block_func, 256,
                                 num_blocks[2], 2).apply(
                                     group_func, 'group3', block_func, 512,
                                     num_blocks[3],
                                     2).GlobalAvgPooling('gap').FullyConnected(
                                         'linear', NUM_CLASSES,
                                         nl=tf.identity)())
    return logits
Ejemplo n.º 11
0
def residual_bottleneck_layer(name, l, out_filters, strides, data_format):
    data_format = get_data_format(data_format, keras_mode=False)
    ch_dim = 3 if data_format == 'NHWC' else 1
    ch_in = _get_dim(l, ch_dim)

    ch_base = out_filters
    ch_last = ch_base * 4
    l_in = l
    with tf.variable_scope('{}.0'.format(name)):
        l = BatchNorm('bn0', l)
        l = tf.nn.relu(l)
        l = (LinearWrap(l)
             .Conv2D('conv1x1_0', ch_base, 1, activation=BNReLU)
             .Conv2D('conv3x3_1', ch_base, 3, strides=strides, activation=BNReLU)
             .Conv2D('conv1x1_2', ch_last, 1)())
        l = BatchNorm('bn_3', l)

        shortcut = l_in
        if ch_in != ch_last:
            shortcut = Conv2D('conv_short', shortcut, ch_last, 1, strides=strides)
            shortcut = BatchNorm('bn_short', shortcut)
        l = l + shortcut
    return l
Ejemplo n.º 12
0
    def get_logits(self, image):
        num_blocks = [3, 4, 6, 3]

        with argscope([Conv2D, MaxPooling, GlobalAvgPooling, BatchNorm], data_format=self.data_format), \
                argscope(Conv2D, use_bias=False,
                         kernel_initializer=tf.variance_scaling_initializer(
                             scale=2.0, mode='fan_out', distribution='untruncated_normal')), \
                argscope(BatchNorm, epsilon=1.001e-5):
            logits = (LinearWrap(image).tf.pad([[0, 0], [0, 0], [3, 3], [
                3, 3
            ]]).Conv2D('conv0', 64, 7, strides=2, padding='VALID').apply(
                self.norm_func, 'conv0').tf.nn.relu().tf.pad([
                    [0, 0], [0, 0], [1, 1], [1, 1]
                ]).MaxPooling('pool0', shape=3,
                              stride=2, padding='VALID').apply(
                                  self.resnet_group,
                                  'group0', 64, num_blocks[0], 1).apply(
                                      self.resnet_group,
                                      'group1', 128, num_blocks[1], 2).apply(
                                          self.resnet_group,
                                          'group2',
                                          256,
                                          num_blocks[2], 2).apply(
                                              self.resnet_group, 'group3', 512,
                                              num_blocks[3],
                                              2).GlobalAvgPooling('gap')())
            if self.num_output is not None:
                for idx, no in enumerate(self.num_output):
                    logits = FullyConnected(
                        'linear{}_{}'.format(idx, no),
                        logits,
                        no,
                        kernel_initializer=tf.random_normal_initializer(
                            stddev=0.01))
                    if idx != len(self.num_output) - 1:
                        logits = tf.nn.relu(logits)
            return logits
Ejemplo n.º 13
0
    def _build_graph(self, inputs):
        image, label = inputs
        self.batch_size = tf.shape(image)[0]
        self.image_size = tf.shape(image)[1:3]
        org_label = label
        # when show image summary, first convert to RGB format
        image_rgb = tf.reverse(image, axis=[-1])
        label_shown = tf.where(tf.equal(label, cfg.ignore_label),
                               tf.zeros_like(label), label)
        label_shown = tf.cast(label_shown * 10, tf.uint8)
        tf.summary.image('input-image', image_rgb, max_outputs=3)
        tf.summary.image('input-label', label_shown, max_outputs=3)

        image = DeeplabModel.image_preprocess(image, bgr=True)

        if self.data_format == "NCHW":
            image = tf.transpose(image, [0, 3, 1, 2])

        # the backbone part
        logits, low_level_features = self._get_logits(image)
        # import pdb
        # pdb.set_trace()
        encoder_output = atrous_spatial_pyramid_pooling(logits)

        with tf.variable_scope('decoder'):
            with tf.variable_scope('low_level_features'):
                # graph = tf.get_default_graph()
                # import pdb
                # pdb.set_trace()
                # self.low_level_features_name = 'tower0/group0/block2/conv1/Relu'
                # low_level_features = graph.get_operation_by_name(self.low_level_features_name).values()[0]
                # low_level_features = end_points[cfg.base_architecture_tp + '/group0/block2/conv1']
                low_level_features = Conv2D('conv_1x1',
                                            low_level_features,
                                            48,
                                            1,
                                            strides=1,
                                            activation=tf.nn.relu)
                low_level_features_size = tf.shape(low_level_features)[1:3]
            with tf.variable_scope('upsampling_logits'):
                net = tf.image.resize_bilinear(encoder_output,
                                               low_level_features_size,
                                               name='upsample_1')
                net = tf.concat([net, low_level_features],
                                axis=3,
                                name='concat')
                with argscope(Conv2D,
                              filters=256,
                              kernel_size=3,
                              strides=1,
                              activation=tf.nn.relu):
                    net = (LinearWrap(net).Conv2D('conv_3x3_1').Conv2D(
                        'conv_3x3_2').Conv2D('conv_1x1',
                                             filters=cfg.num_classes,
                                             kernel_size=1,
                                             strides=1,
                                             activation=None)())

        # Compute softmax cross entropy loss for logits
        logits = tf.image.resize_bilinear(net,
                                          self.image_size,
                                          align_corners=True)
        label_flatten = tf.reshape(label, shape=[-1])
        mask = tf.to_float(tf.not_equal(label_flatten, cfg.ignore_label)) * 1.0
        one_hot_label = tf.one_hot(label_flatten,
                                   cfg.num_classes,
                                   on_value=1.0,
                                   off_value=0.0)

        loss = tf.losses.softmax_cross_entropy(
            one_hot_label,
            tf.reshape(logits, shape=[-1, cfg.num_classes]),
            weights=mask)
        if cfg.weight_decay > 0:
            wd_cost = regularize_cost('.*/W',
                                      l2_regularizer(cfg.weight_decay),
                                      name='l2_regularize_loss')
        else:
            wd_cost = tf.constant(0.0)

        self.cost = tf.add_n([loss, wd_cost], name='cost')

        pred = tf.argmax(tf.nn.softmax(logits), 3, name='predicts')
        pred_shown = pred * 10
        pred_shown = tf.cast(tf.expand_dims(pred_shown, -1), tf.uint8)
        pred_shown = tf.where(tf.equal(label, cfg.ignore_label),
                              tf.zeros_like(label), pred_shown)
        tf.summary.image('input-preds',
                         tf.cast(pred_shown, tf.uint8),
                         max_outputs=3)

        # compute the mean_iou
        pred_flatten = tf.reshape(pred, shape=[-1])
        label_flatten = tf.where(tf.equal(label_flatten, cfg.ignore_label),
                                 tf.zeros_like(label_flatten), label_flatten)
        label_flatten = tf.cast(label_flatten, tf.int64)
        miou, miou_update_op = tf.metrics.mean_iou(label_flatten,
                                                   pred_flatten,
                                                   cfg.num_classes,
                                                   weights=mask,
                                                   name="metric_scope")
        running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES,
                                         scope="InferenceTower/metric_scope")
        # running_vars = tf.get_collection(tf.GraphKeys.LOCAL_VARIABLES)
        miou_reset_op = tf.variables_initializer(var_list=running_vars,
                                                 name='miou_reset_op')
        miou = tf.identity(miou, name='miou')
        miou_update_op = tf.identity(miou_update_op, name='miou_update_op')

        add_moving_summary(loss, wd_cost, self.cost)
Ejemplo n.º 14
0
def resnet_backbone(image,
                    num_blocks,
                    group_func,
                    block_func,
                    class_num,
                    ASPP=False):
    with argscope(Conv2D,
                  nl=tf.identity,
                  use_bias=False,
                  W_init=variance_scaling_initializer(mode='FAN_OUT')):
        resnet_head = (LinearWrap(image).Conv2D(
            'conv0', 64, 7, stride=2, nl=BNReLU).MaxPooling(
                'pool0', shape=3, stride=2, padding='SAME').apply(
                    group_func,
                    'group0',
                    block_func,
                    64,
                    num_blocks[0],
                    1,
                    dilation=1,
                    stride_first=False).apply(group_func,
                                              'group1',
                                              block_func,
                                              128,
                                              num_blocks[1],
                                              2,
                                              dilation=1,
                                              stride_first=True).apply(
                                                  group_func,
                                                  'group2',
                                                  block_func,
                                                  256,
                                                  num_blocks[2],
                                                  2,
                                                  dilation=2,
                                                  stride_first=True).apply(
                                                      group_func,
                                                      'group3',
                                                      block_func,
                                                      512,
                                                      num_blocks[3],
                                                      1,
                                                      dilation=4,
                                                      stride_first=False)())

    def aspp_branch(input, rate):
        input = AtrousConv2D('aspp{}_conv'.format(rate),
                             input,
                             class_num,
                             kernel_shape=3,
                             rate=rate)
        return input

    if ASPP:
        output = aspp_branch(resnet_head, 6) + aspp_branch(
            resnet_head, 12) + aspp_branch(resnet_head, 18) + aspp_branch(
                resnet_head, 24)
    else:
        output = aspp_branch(resnet_head, 6)
    output = tf.image.resize_bilinear(output, image.shape[1:3])
    return output