Example #1
0
    def get_pixel_fb_classification(self, x, anchor_stride,
                                    anchor_per_location):
        '''
        Get the pixel classification of foreground and background
        :return:
        '''
        sh_in = x.get_shape().as_list()[-1]

        # Here 2*anchor_per_location = 6, where 2 indicates the binary classification of Foreground and background and anchor_per_location = 3
        x = ops.conv_layer(x,
                           k_shape=[1, 1, sh_in, 2 * anchor_per_location],
                           stride=anchor_stride,
                           padding='VALID',
                           scope_name='rpn_class_raw',
                           trainable=True)
        logging.info('RPN - Conv Class: %s', str(x.get_shape().as_list()))

        # Here we convert {anchor_per_location = 3}
        # [batch_size, h, w, num_anchors] to [batch_size, h*w*anchor_per_location, 2]
        # For each image, at each pixel classify 3 anchors as foreground or background
        self.rpn_class_logits = tf.reshape(x, [tf.shape(x)[0], -1, 2])
        # self.rpn_class_logits = tf.reshape(x, [x.get_shape().as_list()[0], -1, 2])
        logging.info('rpn_class_logits: %s',
                     self.rpn_class_logits.get_shape().as_list())

        # Do a softmax classificaion to get output probabilities
        self.rpn_class_probs = tf.nn.softmax(self.rpn_class_logits,
                                             name='rpn_class_xxx')
        logging.info('rpn_class_probs: %s',
                     self.rpn_class_probs.get_shape().as_list())

        print('(RPN) Class Logits (shape) ', self.rpn_class_logits.shape)
        print('(RPN) Class Probs (shape) ', self.rpn_class_probs.shape)
Example #2
0
    def build(self):
        shared = ops.conv_layer(
            self.xrpn,
            k_shape=[3, 3, self.xrpn.get_shape().as_list()[-1], 512],
            stride=self.rpn_anchor_stride,
            padding='SAME',
            scope_name='rpn_conv_shared',
            trainable=True)
        shared = ops.activation(shared, 'relu', scope_name='rpn_relu_shared')
        logging.info('RPN - Shared_conv: %s',
                     str(shared.get_shape().as_list()))

        ## Classification Output: Binary classification, # Get the pixel wise Classification
        self.get_pixel_fb_classification(shared, self.rpn_anchor_stride,
                                         len(self.rpn_anchor_ratios))

        ## Bounding Box Output: Get the coordinates , height and width of bounding box
        self.get_bounding_box(shared, self.rpn_anchor_stride,
                              len(self.rpn_anchor_ratios))
Example #3
0
    def get_bounding_box(self, x, anchor_stride, anchor_per_location):
        '''
        ALL ABOUT THIS MODULE

        Input:
        anchor_stride: controls the number of anchors,
            for instance: if stride = 1, feature_map = 32x32, num_anchors = 9
                          then number of anchors = 32 x 32 x 9
                          if stride = 2, feature_map = 32x32, num_anchors = 9
                          then number of anchors = (32 x 32 x 9)/2
        anchor_per_location: How many anchors to build per location


        Outputs:
        This module generates 4 values
        self.rpn_bbox = [batch_size, num_anchors, (dy, dx, log(dh), log(dw))]
            1. dy = center y pixel
            2. dx = center x pixel
            3. log(dh) = height of bounding box
            4. log(dw) = width of bounding box

        This is a linear classifier
        :param x:
        :return:
        '''
        sh_in = x.get_shape().as_list()[-1]

        # Here 4*len(anchor_ratio) = 8, where 4 is the count of bounding box output
        x = ops.conv_layer(x,
                           k_shape=[1, 1, sh_in, 4 * anchor_per_location],
                           stride=anchor_stride,
                           padding='VALID',
                           scope_name='rpn_bbox_pred',
                           trainable=True)
        logging.info('RPN - Conv Bbox: %s', str(x.get_shape().as_list()))

        # The shape of rpn_bbox = [None, None, 4] =  Which says for each image for each pixel position of a feature map the output of box is 4 -> center_x, center_y, width and height. Since we do it in pixel basis, we would end up having many many bounding boxes overlapping and hence we use non-max suppression to overcome this situation.
        self.rpn_bbox = tf.reshape(x, [tf.shape(x)[0], -1, 4])
        # self.rpn_bbox = tf.reshape(x, [x.get_shape().as_list()[0], -1, 4])
        logging.info('rpn_bbox: %s', self.rpn_bbox.get_shape().as_list())
        print('(RPN) Bbox (shape) ', self.rpn_bbox.shape)
Example #4
0
    def classifier_with_fpn_tf(self):

        rois_shape = self.pooled_rois.get_shape().as_list()

        # Note we dont perform batch normalization, because as per matterport github implementation of Mask RCNN,
        # it is suggested to not have it becasue batch norm doesnt perform well with very small batches.
        # FC Layer 1
        x = tf.concat([
            tf.stack([
                ops.activation(
                    ops.conv_layer(
                        self.pooled_rois[i],
                        k_shape=self.pool_shape + [rois_shape[-1], 1024],
                        stride=1,
                        padding='VALID',
                        scope_name='mrcnn_class_conv1'), 'relu', 'FC1_relu')
                for i in range(0, rois_shape[0])
            ])
        ],
                      axis=0)
        self.FC1 = x if self.DEBUG else []

        # FC Layer 2
        x = tf.concat([
            tf.stack([
                ops.activation(
                    ops.conv_layer(x[i],
                                   k_shape=[1, 1, 1024, 1024],
                                   stride=1,
                                   padding='VALID',
                                   scope_name='mrcnn_class_conv2'), 'relu',
                    'FC2_relu') for i in range(0, rois_shape[0])
            ])
        ],
                      axis=0)
        self.FC2 = x if self.DEBUG else []

        # Squeeze the 2nd and 3rd dimension [num_batch, num_proposals, 1, 1, 1024] to [num_batch, num_proposals, 1024]
        shared = tf.squeeze(x, [2, 3])
        self.shared = shared if self.DEBUG else []

        with tf.variable_scope('mrcnn_class_scores'):
            mrcnn_class_logits = tf.concat([
                tf.stack([
                    ops.fc_layers(shared[i],
                                  k_shape=[1024, self.num_classes],
                                  scope_name='mrcnn_class_logits')
                    for i in range(0, rois_shape[0])
                ])
            ],
                                           axis=0)

            self.mrcnn_class_probs = tf.concat([
                tf.stack([
                    ops.activation(mrcnn_class_logits[i],
                                   'softmax',
                                   scope_name='mrcnn_class')
                    for i in range(0, rois_shape[0])
                ])
            ],
                                               axis=0)

        with tf.variable_scope('mrcnn_class_bbox'):
            x = tf.concat([
                tf.stack([
                    ops.fc_layers(shared[i],
                                  k_shape=[1024, self.num_classes * 4],
                                  scope_name='mrcnn_bbox')
                    for i in range(0, rois_shape[0])
                ])
            ],
                          axis=0)

            s = tf.shape(x)
            self.mrcnn_bbox = tf.reshape(x, [s[0], s[1], self.num_classes, 4],
                                         name="mrcnn_bbox")
Example #5
0
    def identity_block(self, x_in, filters, stage, block):
        '''
        No Convolution applied to Shortcut or (layer to be used for skip connection)
        '''
        f1, f2, f3 = filters

        conv_name = 'res' + str(stage) + block + '_branch'
        bn_name = 'bn' + str(stage) + block + '_branch'
        relu_name = 'relu' + str(stage) + block + '_branch'

        x_shape = x_in.get_shape().as_list()

        ## BRANCH 2a
        x = ops.conv_layer(x_in, [1, 1, x_shape[-1], f1],
                           stride=1,
                           padding='SAME',
                           scope_name=conv_name + '2a')
        x = tf.layers.batch_normalization(x,
                                          axis=-1,
                                          name=bn_name + '2a',
                                          trainable=False)
        # x = ops.batch_norm(x, axis=[0, 1, 2], scope_name=bn_name + '2a')
        # x = BatchNorm(name=bn_name + '2a')(x, training=False)
        x = ops.activation(x, 'relu', relu_name + '2a')
        logging.info('%s: %s', str(conv_name + '2a'),
                     str(x.get_shape().as_list()))

        ## BRANCH 2b
        x = ops.conv_layer(x, [3, 3, f1, f2],
                           stride=1,
                           padding='SAME',
                           scope_name=conv_name + '2b')
        x = tf.layers.batch_normalization(x,
                                          axis=-1,
                                          name=bn_name + '2b',
                                          trainable=False)
        # x = ops.batch_norm(x, axis=[0, 1, 2], scope_name=bn_name + '2b')
        # x = BatchNorm(name=bn_name + '2b')(x, training=False)
        x = ops.activation(x, 'relu', relu_name + '2b')
        logging.info('%s: %s', str(conv_name + '2b'),
                     str(x.get_shape().as_list()))

        ## BRANCH 2c
        x = ops.conv_layer(x, [1, 1, f2, f3],
                           stride=1,
                           padding='SAME',
                           scope_name=conv_name + '2c')
        x = tf.layers.batch_normalization(x,
                                          axis=-1,
                                          name=bn_name + '2c',
                                          trainable=False)
        # x = ops.batch_norm(x, axis=[0, 1, 2], scope_name=bn_name + '2c')
        # x = BatchNorm(name=bn_name + '2c')(x, training=False)
        logging.info('%s: %s', str(conv_name + '2c'),
                     str(x.get_shape().as_list()))

        ## Add
        x = x + x_in
        x = ops.activation(x, 'relu', relu_name + '_out')
        logging.info('%s: %s', str(relu_name + '_out'),
                     str(x.get_shape().as_list()))

        return x
Example #6
0
    def fpn_top_down_graph(self):
        '''
        Feature Pyramid Networks: Detecting objects at different scale is difficult, especially time consuming and memory intensive . Here C1,C2,C3,C4,C5 can be thought as feature maps for each stage. They are useful to build the Feature Pyramid Network, each C's are down-sampled at every stage.
        P1, P2, P3, P4, P5 are the feature map layer for prediction
        '''
        logging.info(
            'Initiating FPN TOP-DOWN .................................')

        # Feature Map 1
        M5 = ops.conv_layer(
            self.C5, [1, 1, self.C5.get_shape().as_list()[-1], 256],
            stride=1,
            padding='SAME',
            scope_name='fpn_c5p5',
            trainable=True)  # to reduce the channel depth
        logging.info('FPN - M5: %s', str(M5.get_shape().as_list()))

        # Feature Map 2
        m4_c = ops.conv_layer(
            self.C4, [1, 1, self.C4.get_shape().as_list()[-1], 256],
            stride=1,
            padding='SAME',
            scope_name='fpn_c4p4',
            trainable=True)
        m4_up = KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(M5)
        M4 = KL.Add(name="fpn_p4add")([m4_up, m4_c])
        logging.info('FPN - M4: %s', str(M4.get_shape().as_list()))

        # Feature Map 3
        m3_c = ops.conv_layer(
            self.C3, [1, 1, self.C3.get_shape().as_list()[-1], 256],
            stride=1,
            padding='SAME',
            scope_name='fpn_c3p3',
            trainable=True)
        m3_up = KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(M4)
        M3 = KL.Add(name="fpn_p3add")([m3_up, m3_c])
        logging.info('FPN - M3: %s', str(M3.get_shape().as_list()))

        # Feature Map 4
        m2_c = ops.conv_layer(
            self.C2, [1, 1, self.C2.get_shape().as_list()[-1], 256],
            stride=1,
            padding='SAME',
            scope_name='fpn_c2p2',
            trainable=True)
        m2_up = KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(M3)
        M2 = KL.Add(name="fpn_p2add")([m2_up, m2_c])
        logging.info('FPN - M2: %s', str(M2.get_shape().as_list()))

        #### CREATE THE FEATURE MAP FOR PREDICTION
        self.P2 = ops.conv_layer(M2, [3, 3, 256, 256],
                                 stride=1,
                                 padding='SAME',
                                 scope_name='fpn_p2',
                                 trainable=True)
        self.P3 = ops.conv_layer(M3, [3, 3, 256, 256],
                                 stride=1,
                                 padding='SAME',
                                 scope_name='fpn_p3',
                                 trainable=True)
        self.P4 = ops.conv_layer(M4, [3, 3, 256, 256],
                                 stride=1,
                                 padding='SAME',
                                 scope_name='fpn_p4',
                                 trainable=True)
        self.P5 = ops.conv_layer(M5, [3, 3, 256, 256],
                                 stride=1,
                                 padding='SAME',
                                 scope_name='fpn_p5',
                                 trainable=True)

        self.P6 = tf.layers.max_pooling2d(self.P5,
                                          pool_size=1,
                                          strides=2,
                                          padding='SAME',
                                          name='fpn_p6')

        logging.info('FPN - P2 = %s, P3 = %s, P4 = %s, P5 = %s, P6 = %s:',
                     str(self.P2.get_shape().as_list()),
                     str(self.P3.get_shape().as_list()),
                     str(self.P4.get_shape().as_list()),
                     str(self.P5.get_shape().as_list()),
                     str(self.P6.get_shape().as_list()))

        print('(FPN) P2: (shape) ', self.P2.shape)
        print('(FPN) P3: (shape) ', self.P3.shape)
        print('(FPN) P4: (shape) ', self.P4.shape)
        print('(FPN) P5: (shape) ', self.P5.shape)
        print('(FPN) P6: (shape) ', self.P6.shape)
Example #7
0
    def fpn_bottom_up_graph(self):
        '''
        Here we implement a Resnet101 model, and make sure that at every stage we capture the feature map to be used by
        the top-down FPN network. This is required in assistance to further work on the feature map.
        
        :param input_image:
        :param stage_5:
        :return:
        '''
        assert self.resnet_model in ["resnet50", "resnet101"]

        h, w = self.conf.IMAGE_SHAPE[:2]
        logging.info('Image height = %s, width = %s ................', str(h),
                     str(w))
        if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6):
            raise Exception(
                "Image size must be dividable by 2 at least 6 times "
                "to avoid fractions when downscaling and upscaling."
                "For example, use 256, 320, 384, 448, 512, ... etc. ")

        logging.info(
            'Initiating FPN BOTTOM-UP .................................')
        x = tf.pad(self.input_image, paddings=[[0, 0], [3, 3], [3, 3], [0, 0]])
        logging.info('Zero_padded: %s', str(x.get_shape().as_list()))

        # STAGE 1
        logging.info('STAGE 1 ...........................')
        x = ops.conv_layer(x, [7, 7, 3, 64],
                           stride=2,
                           padding='VALID',
                           scope_name='conv1')
        x = tf.layers.batch_normalization(x,
                                          axis=-1,
                                          name='bn_conv1',
                                          trainable=False)
        # x = BatchNorm(name='bn_conv1')(x, training=False)
        # x = ops.batch_norm(x, axis=[0, 1, 2], scope_name='bn_conv1')
        x = ops.activation(x, 'relu', 'relu_conv1')
        logging.info('Conv2D: %s', str(x.get_shape().as_list()))
        x = tf.layers.max_pooling2d(x, pool_size=3, strides=2, padding="SAME")
        logging.info('MaxPool2d: %s', str(x.get_shape().as_list()))
        # self.C1 = x

        # STAGE 2
        logging.info('STAGE 2 ...........................')
        x = self.conv_block(x,
                            filters=[64, 64, 256],
                            strides=1,
                            stage=2,
                            block='a')
        x = self.identity_block(x, filters=[64, 64, 256], stage=2, block='b')
        x = self.identity_block(x, filters=[64, 64, 256], stage=2, block='c')
        self.C2 = x

        # STAGE 3
        logging.info('STAGE 3 ...........................')
        x = self.conv_block(x,
                            filters=[128, 128, 512],
                            strides=2,
                            stage=3,
                            block='a')
        x = self.identity_block(x, filters=[128, 128, 512], stage=3, block='b')
        x = self.identity_block(x, filters=[128, 128, 512], stage=3, block='c')
        x = self.identity_block(x, filters=[128, 128, 512], stage=3, block='d')
        self.C3 = x

        # STAGE 4
        logging.info('STAGE 4 ...........................')
        x = self.conv_block(x,
                            filters=[256, 256, 1024],
                            strides=2,
                            stage=4,
                            block='a')
        block_count = {"resnet50": 5, "resnet101": 22}[self.resnet_model]
        for i in range(block_count):
            x = self.identity_block(x,
                                    filters=[256, 256, 1024],
                                    stage=4,
                                    block=chr(98 + i))
        self.C4 = x

        # STAGE 5
        logging.info('STAGE 5 ...........................')
        if self.stage_5:
            x = self.conv_block(x,
                                filters=[512, 512, 2048],
                                strides=2,
                                stage=5,
                                block='a')
            x = self.identity_block(x,
                                    filters=[512, 512, 2048],
                                    stage=5,
                                    block='b')
            x = self.identity_block(x,
                                    filters=[512, 512, 2048],
                                    stage=5,
                                    block='c')
            self.C5 = x
        else:
            self.C5 = None

        # print('(FPN) C1: (shape) ', self.C1.shape)
        print('(FPN) C2: (shape) ', self.C2.shape)
        print('(FPN) C3: (shape) ', self.C3.shape)
        print('(FPN) C4: (shape) ', self.C4.shape)
        print('(FPN) C5: (shape) ', self.C5.shape)