Esempio n. 1
0
    def test(self):
        import time
        print(self.gt_boxes)
        # time.sleep(10)
        with tf.Session() as sess:
            rois = self.gt_boxes[:, :4]
            rois = rois + np.random.randint(-3, 3, (self.N, 4))
            bgs = np.random.randint(0, 60, (self.N + 2, 2))
            bgs = np.hstack(
                (bgs, bgs + np.random.randint(20, 30, (self.N + 2, 2))))
            bgs = bgs.astype(np.float32)
            rois = np.vstack((rois, bgs))
            self.rois = rois
            print(rois)
            print(self.gt_boxes)
            labels, bbox_targets, bbox_inside_weights = \
                    roi_encoder(self.gt_boxes, self.rois, self.num_classes)
            self.labels = labels.eval()
            self.bbox_targets = bbox_targets.eval()
            self.bbox_inside_weights = bbox_inside_weights.eval()

            print(self.labels.shape)
            print(self.labels)
            print(self.bbox_targets.shape)
            print(self.bbox_inside_weights.shape)
            print('learning targets:')
            for i in range(self.labels.size):
                s = int(4 * self.labels[i])
                e = s + 4
                print(self.labels[i], self.bbox_targets[i, s:e],
                      self.bbox_inside_weights[i, s:e])

            scores = np.random.rand(self.rois.shape[0], self.num_classes)
            scores = scores.astype(np.float32)
            final_boxes, classes, scores = \
                    roi_decoder(self.bbox_targets, scores, self.rois, 100, 100)
            self.final_boxes = final_boxes.eval()
            self.scores = scores.eval()
            self.classes = classes.eval()
            print('rois:')
            print(self.rois)
            print('final_boxes:')
            print(self.final_boxes)
Esempio n. 2
0
    def test(self):
        import time
        print (self.gt_boxes)
        # time.sleep(10)
        with tf.Session() as sess:
            rois = self.gt_boxes[:, :4]
            rois = rois + np.random.randint(-3, 3, (self.N, 4))
            bgs = np.random.randint(0, 60, (self.N + 2, 2))
            bgs = np.hstack((bgs, bgs + np.random.randint(20, 30, (self.N + 2, 2))))
            bgs = bgs.astype(np.float32)
            rois = np.vstack((rois, bgs))
            self.rois = rois
            print (rois)
            print (self.gt_boxes)
            labels, bbox_targets, bbox_inside_weights = \
                    roi_encoder(self.gt_boxes, self.rois, self.num_classes)
            self.labels = labels.eval()
            self.bbox_targets = bbox_targets.eval()
            self.bbox_inside_weights = bbox_inside_weights.eval()

            print (self.labels.shape)
            print (self.labels)
            print (self.bbox_targets.shape)
            print (self.bbox_inside_weights.shape)
            print ('learning targets:')
            for i in range(self.labels.size):
                s = int(4 * self.labels[i])
                e = s + 4
                print (self.labels[i], self.bbox_targets[i, s:e], self.bbox_inside_weights[i, s:e])

            scores = np.random.rand(self.rois.shape[0], self.num_classes)
            scores = scores.astype(np.float32)
            final_boxes, classes, scores = \
                    roi_decoder(self.bbox_targets, scores, self.rois, 100, 100)
            self.final_boxes = final_boxes.eval()
            self.scores = scores.eval()
            self.classes = classes.eval()
            print ('rois:')
            print (self.rois)
            print ('final_boxes:')
            print (self.final_boxes)
Esempio n. 3
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    with slim.arg_scope(arg_scope):
        # for p in pyramid:
        for i in range(5, 1, -1):
            p = 'P%d' % i
            stride = 2**i
            outputs[p] = {}

            ## rpn head
            shape = tf.shape(pyramid[p])
            height, width = shape[1], shape[2]
            rpn = slim.conv2d(pyramid[p],
                              256, [3, 3],
                              stride=1,
                              activation_fn=tf.nn.relu,
                              scope='%s/rpn' % p)
            box = slim.conv2d(rpn,
                              base_anchors * 4, [1, 1],
                              stride=1,
                              scope='%s/rpn/box' % p)
            cls = slim.conv2d(rpn,
                              base_anchors * 2, [1, 1],
                              stride=1,
                              scope='%s/rpn/cls' % p)
            outputs[p]['rpn'] = {'box': box, 'cls': cls}

            ## decode, sample and crop
            all_anchors = gen_all_anchors(height, width, stride)
            cls_prob = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(cls, [1, shape[1], shape[2], base_anchors, 2])),
                [1, shape[1], shape[2], base_anchors * 2])
            rois, classes, scores = \
                      anchor_decoder(box, cls_prob, all_anchors, ih, iw)
            rois, scores = sample_rpn_outputs(rois, scores)
            cropped = ROIAlign(
                pyramid[p],
                rois,
                False,
                stride=2**i,
                pooled_height=7,
                pooled_width=7,
            )

            # rois of an image, sampled from rpn output
            outputs[p]['roi'] = {
                'box': rois,
                'scores': scores,
                'cropped': cropped
            }

            ## refine head
            refine = slim.flatten(cropped)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(refine,
                                        num_classes,
                                        activation_fn=None)
            box = slim.fully_connected(refine,
                                       num_classes * 4,
                                       activation_fn=None)
            outputs[p]['refined'] = {'box': box, 'cls': cls2}

            ## decode refine net outputs
            cls2_prob = tf.nn.softmax(cls2)
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2_prob, rois, ih, iw)

            # for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes

            ## mask head
            m = ROIAlign(pyramid[p],
                         rois,
                         False,
                         stride=2**i,
                         pooled_height=14,
                         pooled_width=14)
            outputs[p]['roi']['cropped_mask'] = m
            for _ in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            m = slim.conv2d_transpose(m,
                                      256, [2, 2],
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            m = slim.conv2d(m,
                            num_classes * 2, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs[p]['mask'] = {
                'mask': m,
                'classes': classes,
                'scores': scores
            }

    return outputs
Esempio n. 4
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  arg_scope = _extra_conv_arg_scope(activation_fn=None)
  my_sigmoid = None
  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        # for p in pyramid:
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ## rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

          anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)]
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ## gather all rois
        # print (outputs['rpn'])
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

        outputs['rpn']['box'] = rpn_boxes
        outputs['rpn']['cls'] = rpn_clses
        outputs['rpn']['anchor'] = rpn_anchors
        # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)
        # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
        rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

        # if is_training:
        #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
        #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)
        
        outputs['roi'] = {'box': rois, 'score': scores}

        ## cropping regions
        [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
        cropped_rois = []
        for i in range(5, 1, -1):
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
        cropped_rois = tf.concat(values=cropped_rois, axis=0)

        outputs['roi']['cropped_rois'] = cropped_rois
        tf.add_to_collection('__CROPPED__', cropped_rois)

        ## refine head
        # to 7 x 7
        cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
        refine = slim.flatten(cropped_regions)
        refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.01))
        box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))

        outputs['refined'] = {'box': box, 'cls': cls2}
        
        ## decode refine net outputs
        cls2_prob = tf.nn.softmax(cls2)
        final_boxes, classes, scores = \
                roi_decoder(box, cls2_prob, rois, ih, iw)
         
        ## for testing, maskrcnn takes refined boxes as inputs
        if not is_training:
          rois = final_boxes
          # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
          #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
          for i in range(5, 1, -1):
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            p = 'P%d'%i
            cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
          cropped_rois = tf.concat(values=cropped_rois, axis=0)
          
        ## mask head
        m = cropped_rois
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None)
          
        # add a mask, given the predicted boxes and classes
        outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores}
          
  return outputs
Esempio n. 5
0
def build_heads(pyramid,
                py_scope,
                slim_scope,
                image_height,
                image_width,
                num_classes,
                base_anchors,
                is_training=False,
                gt_boxes=None):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    # if _BN is True:
    #   if is_training is True:
    #     arg_scope = _extra_conv_arg_scope_with_bn()
    #   else:
    #     arg_scope = _extra_conv_arg_scope_with_bn(batch_norm_decay=0.0)
    #   # arg_scope = _extra_conv_arg_scope_with_bn(is_training=is_training)
    # else:
    #   arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu)
    with tf.name_scope(py_scope) as py_scope:
        with slim.arg_scope(slim_scope) as slim_scope:
            ### for p in pyramid
            outputs['rpn'] = {}
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i
                """Build RPN head
          RPN takes features from each layer of pyramid network. 
          strides are respectively set to [4, 8, 16, 32] for pyramid feature layer P2,P3,P4,P5 
          anchor_scales are set to [2 **(i-2), 2 ** (i-1), 2 **(i)] in all pyramid layers (*This is probably inconsistent with original paper where the only scale is 8)
          It generates 2 outputs.
          box: an array of shape (1, pyramid_height, pyramid_width, num_anchorx4). box regression values [shift_x, shift_y, scale_width, scale_height] are stored in the last dimension of the array.
          cls: an array of shape (1, pyramid_height, pyramid_width, num_anchorx2). Note that this value is before softmax   
          """
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]
                rpn = slim.conv2d(pyramid[p],
                                  256, [3, 3],
                                  stride=1,
                                  activation_fn=tf.nn.relu,
                                  scope='pyramid/%s/rpn' % p)
                box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='pyramid/%s/rpn/box' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=None, normalizer_fn=None)
                cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='pyramid/%s/rpn/cls' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01), activation_fn=None, normalizer_fn=None)

                anchor_scales = [8]  #[2 **(i-2), 2 ** (i-1), 2 **(i)]
                print("anchor_scales = ", anchor_scales)
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                outputs['rpn'][p] = {
                    'box': box,
                    'cls': cls,
                    'anchor': all_anchors,
                    'shape': shape
                }

            ### gather boxes, clses, anchors from all pyramid layers
            rpn_boxes = [
                tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_clses = [
                tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1])
                for p in range(5, 1, -1)
            ]
            rpn_anchors = [
                tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
            rpn_clses = tf.concat(values=rpn_clses, axis=0)
            rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

            ### softmax to get probability
            rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
            ### decode anchors and box regression values into proposed bounding boxes
            rpn_final_boxes, rpn_final_clses, rpn_final_scores = anchor_decoder(
                rpn_boxes, rpn_probs, rpn_anchors, image_height, image_width)

            outputs['rpn_boxes'] = rpn_boxes
            outputs['rpn_clses'] = rpn_clses
            outputs['rpn_anchor'] = rpn_anchors
            outputs['rpn_final_boxes'] = rpn_final_boxes
            outputs['rpn_final_clses'] = rpn_final_clses
            outputs['rpn_final_scores'] = rpn_final_scores

            if is_training is True:
                ### for training, rcnn and maskrcnn take rpn proposed bounding boxes as inputs
                rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_rois_to_mask, rpn_scores_to_mask, rpn_batch_inds_to_mask = \
                      sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, is_training=is_training, only_positive=False)#True
            else:
                ### for testing, only rcnn takes rpn boxes as inputs. maskrcnn takes rcnn boxes as inputs
                rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn = sample_rpn_outputs(
                    rpn_final_boxes, rpn_final_scores, only_positive=False)

            ### assign pyramid layer indexs to rcnn network's ROIs.
            [rcnn_assigned_rois, rcnn_assigned_batch_inds, rcnn_assigned_layer_inds] = \
                  assign_boxes(rpn_rois_to_rcnn, [rpn_rois_to_rcnn, rpn_batch_inds_to_rcnn], [2, 3, 4, 5])

            ### crop features from pyramid using ROIs. Note that this will change order of the ROIs, so ROIs are also reordered.
            rcnn_cropped_features = []
            rcnn_ordered_rois = []
            for i in range(5, 1, -1):
                p = 'P%d' % i
                rcnn_splitted_roi = rcnn_assigned_rois[i - 2]
                rcnn_batch_ind = rcnn_assigned_batch_inds[i - 2]
                rcnn_cropped_feature, rcnn_rois_to_crop_and_resize = ROIAlign(
                    pyramid[p],
                    rcnn_splitted_roi,
                    rcnn_batch_ind,
                    image_height,
                    image_width,
                    stride=2**i,
                    pooled_height=14,
                    pooled_width=14)
                rcnn_cropped_features.append(rcnn_cropped_feature)
                rcnn_ordered_rois.append(rcnn_splitted_roi)

            rcnn_cropped_features = tf.concat(values=rcnn_cropped_features,
                                              axis=0)
            rcnn_ordered_rois = tf.concat(values=rcnn_ordered_rois, axis=0)
            """Build rcnn head
        rcnn takes cropped features and generates 2 outputs. 
        rcnn_boxes: an array of shape (num_ROIs, num_classes x 4). Box regression values of each classes [shift_x, shift_y, scale_width, scale_height] are stored in the last dimension of the array.
        rcnn_clses: an array of shape (num_ROIs, num_classes). Class prediction values (before softmax) are stored
        """
            rcnn = slim.max_pool2d(rcnn_cropped_features, [3, 3],
                                   stride=2,
                                   padding='SAME')
            rcnn = slim.flatten(rcnn)
            rcnn = slim.fully_connected(
                rcnn,
                1024,
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected")
            rcnn = slim.dropout(rcnn, keep_prob=0.75,
                                is_training=is_training)  #is_training
            rcnn = slim.fully_connected(
                rcnn,
                1024,
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected_1")
            rcnn = slim.dropout(rcnn, keep_prob=0.75,
                                is_training=is_training)  #is_training
            rcnn_clses = slim.fully_connected(
                rcnn,
                num_classes,
                activation_fn=None,
                normalizer_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected_2")
            rcnn_boxes = slim.fully_connected(
                rcnn,
                num_classes * 4,
                activation_fn=None,
                normalizer_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected_3")

            ### softmax to get probability
            rcnn_scores = tf.nn.softmax(rcnn_clses)

            ### decode ROIs and box regression values into bounding boxes
            rcnn_final_boxes, rcnn_final_classes, rcnn_final_scores = roi_decoder(
                rcnn_boxes, rcnn_scores, rcnn_ordered_rois, image_height,
                image_width)

            outputs['rcnn_ordered_rois'] = rcnn_ordered_rois
            outputs['rcnn_cropped_features'] = rcnn_cropped_features
            tf.add_to_collection('__CROPPED__', rcnn_cropped_features)
            outputs['rcnn_boxes'] = rcnn_boxes
            outputs['rcnn_clses'] = rcnn_clses
            outputs['rcnn_scores'] = rcnn_scores
            outputs['rcnn_final_boxes'] = rcnn_final_boxes
            outputs['rcnn_final_clses'] = rcnn_final_classes
            outputs['rcnn_final_scores'] = rcnn_final_scores

            if is_training:
                ### assign pyramid layer indexs to mask network's ROIs
                [mask_assigned_rois, mask_assigned_batch_inds, mask_assigned_layer_inds] = \
                     assign_boxes(rpn_rois_to_mask, [rpn_rois_to_mask, rpn_batch_inds_to_mask], [2, 3, 4, 5])

                ### crop features from pyramid using ROIs. Again, this will change order of the ROIs, so ROIs are reordered.
                mask_cropped_features = []
                mask_ordered_rois = []

                ### crop features from pyramid for mask network
                for i in range(5, 1, -1):
                    p = 'P%d' % i
                    mask_splitted_roi = mask_assigned_rois[i - 2]
                    mask_batch_ind = mask_assigned_batch_inds[i - 2]
                    mask_cropped_feature, mask_rois_to_crop_and_resize = ROIAlign(
                        pyramid[p],
                        mask_splitted_roi,
                        mask_batch_ind,
                        image_height,
                        image_width,
                        stride=2**i,
                        pooled_height=14,
                        pooled_width=14)
                    mask_cropped_features.append(mask_cropped_feature)
                    mask_ordered_rois.append(mask_splitted_roi)

                mask_cropped_features = tf.concat(values=mask_cropped_features,
                                                  axis=0)
                mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)

            else:
                ### for testing, mask network takes rcnn boxes as inputs
                rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask = sample_rcnn_outputs(
                    rcnn_final_boxes,
                    rcnn_final_classes,
                    rcnn_scores,
                    class_agnostic=False)
                [mask_assigned_rois, mask_assigned_clses, mask_assigned_scores, mask_assigned_batch_inds, mask_assigned_layer_inds] =\
                     assign_boxes(rcnn_rois_to_mask, [rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask], [2, 3, 4, 5])

                mask_cropped_features = []
                mask_ordered_rois = []
                mask_ordered_clses = []
                mask_ordered_scores = []
                for i in range(5, 1, -1):
                    p = 'P%d' % i
                    mask_splitted_roi = mask_assigned_rois[i - 2]
                    mask_splitted_cls = mask_assigned_clses[i - 2]
                    mask_splitted_score = mask_assigned_scores[i - 2]
                    mask_batch_ind = mask_assigned_batch_inds[i - 2]
                    mask_cropped_feature, mask_rois_to_crop_and_resize = ROIAlign(
                        pyramid[p],
                        mask_splitted_roi,
                        mask_batch_ind,
                        image_height,
                        image_width,
                        stride=2**i,
                        pooled_height=14,
                        pooled_width=14)
                    mask_cropped_features.append(mask_cropped_feature)
                    mask_ordered_rois.append(mask_splitted_roi)
                    mask_ordered_clses.append(mask_splitted_cls)
                    mask_ordered_scores.append(mask_splitted_score)

                mask_cropped_features = tf.concat(values=mask_cropped_features,
                                                  axis=0)
                mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)
                mask_ordered_clses = tf.concat(values=mask_ordered_clses,
                                               axis=0)
                mask_ordered_scores = tf.concat(values=mask_ordered_scores,
                                                axis=0)

                outputs['mask_final_clses'] = mask_ordered_clses
                outputs['mask_final_scores'] = mask_ordered_scores
            """Build mask rcnn head
        mask rcnn takes cropped features and generates masks for each classes. 
        m: an array of shape (28, 28, num_classes). Note that this value is before sigmoid.
        """
            m = mask_cropped_features
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv")
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv_1")
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv_2")
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv_3")
            m = slim.conv2d_transpose(m,
                                      256,
                                      2,
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu,
                                      scope="pyramid/Conv2d_transpose")
            tf.add_to_collection('__TRANSPOSED__', m)
            m = slim.conv2d(m,
                            num_classes, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None,
                            normalizer_fn=None,
                            scope="pyramid/Conv_4")

            outputs['mask_ordered_rois'] = mask_ordered_rois
            outputs['mask_cropped_features'] = mask_cropped_features
            outputs['mask_mask'] = m
            outputs['mask_final_mask'] = tf.nn.sigmoid(m)

            return outputs, py_scope, slim_scope
Esempio n. 6
0
def build_heads(pyramid,
                ih,
                iw,
                num_classes,
                base_anchors,
                is_training=False,
                gt_boxes=None):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    my_sigmoid = None
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('pyramid'):
            # for p in pyramid:
            outputs['rpn'] = {}
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i

                ## rpn head
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]
                rpn = slim.conv2d(pyramid[p],
                                  256, [3, 3],
                                  stride=1,
                                  activation_fn=tf.nn.relu,
                                  scope='%s/rpn' % p)
                box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
                cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

                anchor_scales = [2**(i - 2), 2**(i - 1), 2**(i)]
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                outputs['rpn'][p] = {
                    'box': box,
                    'cls': cls,
                    'anchor': all_anchors
                }

            ## gather all rois
            # print (outputs['rpn'])
            rpn_boxes = [
                tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_clses = [
                tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1])
                for p in range(5, 1, -1)
            ]
            rpn_anchors = [
                tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
            rpn_clses = tf.concat(values=rpn_clses, axis=0)
            rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

            outputs['rpn']['box'] = rpn_boxes
            outputs['rpn']['cls'] = rpn_clses
            outputs['rpn']['anchor'] = rpn_anchors
            # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}

            rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
            rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs,
                                                      rpn_anchors, ih, iw)
            # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
            rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                    sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

            # if is_training:
            #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
            #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)

            outputs['roi'] = {'box': rois, 'score': scores}

            ## cropping regions
            [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                    assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
            cropped_rois = []
            for i in range(5, 1, -1):
                p = 'P%d' % i
                splitted_rois = assigned_rois[i - 2]
                batch_inds = assigned_batch_inds[i - 2]
                cropped = ROIAlign(pyramid[p],
                                   splitted_rois,
                                   batch_inds,
                                   stride=2**i,
                                   pooled_height=14,
                                   pooled_width=14)
                cropped_rois.append(cropped)
            cropped_rois = tf.concat(values=cropped_rois, axis=0)

            outputs['roi']['cropped_rois'] = cropped_rois
            tf.add_to_collection('__CROPPED__', cropped_rois)

            ## refine head
            # to 7 x 7
            cropped_regions = slim.max_pool2d(cropped_rois, [3, 3],
                                              stride=2,
                                              padding='SAME')
            refine = slim.flatten(cropped_regions)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(
                refine,
                num_classes,
                activation_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01))
            box = slim.fully_connected(
                refine,
                num_classes * 4,
                activation_fn=my_sigmoid,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001))

            outputs['refined'] = {'box': box, 'cls': cls2}

            ## decode refine net outputs
            cls2_prob = tf.nn.softmax(cls2)
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2_prob, rois, ih, iw)

            ## for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes
                # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
                for i in range(5, 1, -1):
                    splitted_rois = assigned_rois[i - 2]
                    batch_inds = assigned_batch_inds[i - 2]
                    p = 'P%d' % i
                    cropped = ROIAlign(pyramid[p],
                                       splitted_rois,
                                       batch_inds,
                                       stride=2**i,
                                       pooled_height=14,
                                       pooled_width=14)
                    cropped_rois.append(cropped)
                cropped_rois = tf.concat(values=cropped_rois, axis=0)

            ## mask head
            m = cropped_rois
            for _ in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            # to 28 x 28
            m = slim.conv2d_transpose(m,
                                      256,
                                      2,
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            tf.add_to_collection('__TRANSPOSED__', m)
            m = slim.conv2d(m,
                            num_classes, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs['mask'] = {'mask': m, 'cls': classes, 'score': scores}

    return outputs
Esempio n. 7
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  if _BN is True:
    arg_scope = _extra_conv_arg_scope_with_bn()
    # arg_scope = _extra_conv_arg_scope_with_bn(is_training=is_training)
  else:
    arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu)

  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        ### for p in pyramid
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ### rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=None, normalizer_fn=None)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01), activation_fn=None, normalizer_fn=None)

          anchor_scales = [2, 4, 8, 16, 32]#[2 **(i-2), 2 ** (i-1), 2 **(i)]
          print("anchor_scales = " , anchor_scales)
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ### gather all rois
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rpn_final_boxes, rpn_final_clses, rpn_final_scores, indexs = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)

        outputs['rpn']['P5']['index'] = indexs[0:(tf.shape(tf.reshape(outputs['rpn']['P5']['box'], [-1, 4]))[0])] 
        for i in range(4, 1, -1):
          p = 'P%d'%i
          outputs['rpn'][p]['index'] = indexs[outputs['rpn']['P%d'%(i+1)]['index'][-1] + 1 :outputs['rpn']['P%d'%(i+1)]['index'][-1] + 1 + tf.shape(tf.reshape(outputs['rpn']['P%d'%(i)]['box'], [-1, 4]))[0]] 

        outputs['rpn_boxes'] = rpn_boxes
        outputs['rpn_clses'] = rpn_clses
        outputs['rpn_anchor'] = rpn_anchors
        outputs['rpn_final_boxes'] = rpn_final_boxes
        outputs['rpn_final_clses'] = rpn_final_clses
        outputs['rpn_final_scores'] = rpn_final_scores
        outputs['rpn_indexs'] = indexs

        if is_training is True:
          ### for training, rcnn and maskrcnn take rpn boxes as inputs
          rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_indexs_to_rcnn, rpn_rois_to_mask, rpn_scores_to_mask, rpn_batch_inds_to_mask, rpn_indexs_to_mask = \
                sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, indexs, is_training=is_training, only_positive=False)
          # rcnn_rois, rcnn_scores, rcnn_batch_inds, rcnn_indexs, mask_rois, mask_scores, mask_batch_inds, mask_indexs = \
          #       sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, indexs, is_training=is_training, only_positive=True)
        else:
          ### for testing, only rcnn takes rpn boxes as inputs. maskrcnn takes rcnn boxes as inputs
          rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_indexs_to_rcnn = sample_rpn_outputs(rpn_final_boxes, rpn_final_scores, indexs, only_positive=True)

        ### assign pyramid layer indexs to rcnn network's ROIs
        [rcnn_assigned_rois, rcnn_assigned_batch_inds, rcnn_assigned_indexs, rcnn_assigned_layer_inds] = \
                assign_boxes(rpn_rois_to_rcnn, [rpn_rois_to_rcnn, rpn_batch_inds_to_rcnn, rpn_indexs_to_rcnn], [2, 3, 4, 5])

        ### crop features from pyramid for rcnn network
        rcnn_cropped_features = []
        rcnn_ordered_rois = []
        rcnn_ordered_index = []
        for i in range(5, 1, -1):
            p = 'P%d'%i
            rcnn_splitted_roi = rcnn_assigned_rois[i-2]
            rcnn_batch_ind = rcnn_assigned_batch_inds[i-2]
            rcnn_index = rcnn_assigned_indexs[i-2]
            rcnn_cropped_feature, rcnn_rois_to_crop_and_resize, rcnn_py_shape, rcnn_ihiw = ROIAlign(pyramid[p], rcnn_splitted_roi, rcnn_batch_ind, ih, iw, stride=2**i,
                               pooled_height=14, pooled_width=14)
            rcnn_cropped_features.append(rcnn_cropped_feature)
            rcnn_ordered_rois.append(rcnn_splitted_roi)
            rcnn_ordered_index.append(rcnn_index)
            
        rcnn_cropped_features = tf.concat(values=rcnn_cropped_features, axis=0)
        rcnn_ordered_rois = tf.concat(values=rcnn_ordered_rois, axis=0)
        rcnn_ordered_index = tf.concat(values=rcnn_ordered_index, axis=0)

        ### rcnn head
        # to 7 x 7
        rcnn = slim.max_pool2d(rcnn_cropped_features, [3, 3], stride=2, padding='SAME')
        rcnn = slim.flatten(rcnn)
        rcnn = slim.fully_connected(rcnn, 1024, activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn = slim.dropout(rcnn, keep_prob=0.75, is_training=is_training)
        rcnn = slim.fully_connected(rcnn,  1024, activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn = slim.dropout(rcnn, keep_prob=0.75, is_training=is_training)
        rcnn_clses = slim.fully_connected(rcnn, num_classes, activation_fn=None, normalizer_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn_boxes = slim.fully_connected(rcnn, num_classes*4, activation_fn=None, normalizer_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn_scores = tf.nn.softmax(rcnn_clses)

        ### decode rcnn network final outputs
        rcnn_final_boxes, rcnn_final_classes, rcnn_final_scores = roi_decoder(rcnn_boxes, rcnn_scores, rcnn_ordered_rois, ih, iw)

        outputs['rcnn_ordered_rois'] = rcnn_ordered_rois
        outputs['rcnn_ordered_index'] = rcnn_ordered_index
        outputs['rcnn_cropped_features'] = rcnn_cropped_features
        tf.add_to_collection('__CROPPED__', rcnn_cropped_features)
        outputs['rcnn_boxes'] = rcnn_boxes
        outputs['rcnn_clses'] = rcnn_clses
        outputs['rcnn_scores'] = rcnn_scores
        outputs['rcnn_final_boxes'] = rcnn_final_boxes
        outputs['rcnn_final_clses'] = rcnn_final_classes
        outputs['rcnn_final_scores'] = rcnn_final_scores
        
        ### assign pyramid layer indexs to mask network's ROIs
        if is_training:
          [mask_assigned_rois, mask_assigned_batch_inds, mask_assigned_indexs, mask_assigned_layer_inds] = \
               assign_boxes(rpn_rois_to_mask, [rpn_rois_to_mask, rpn_batch_inds_to_mask, rpn_indexs_to_mask], [2, 3, 4, 5])

          mask_cropped_features = []
          mask_ordered_rois = []
          mask_ordered_indexs = []
          ### crop features from pyramid for mask network
          for i in range(5, 1, -1):
              p = 'P%d'%i
              mask_splitted_roi = mask_assigned_rois[i-2]
              mask_batch_ind = mask_assigned_batch_inds[i-2]
              mask_index = mask_assigned_indexs[i-2]
              mask_cropped_feature, mask_rois_to_crop_and_resize, mask_py_shape, mask_ihiw = ROIAlign(pyramid[p], mask_splitted_roi, mask_batch_ind, ih, iw, stride=2**i,
                                 pooled_height=14, pooled_width=14)
              mask_cropped_features.append(mask_cropped_feature)
              mask_ordered_rois.append(mask_splitted_roi)
              mask_ordered_indexs.append(mask_index)
              
          mask_cropped_features = tf.concat(values=mask_cropped_features, axis=0)
          mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)
          mask_ordered_indexs = tf.concat(values=mask_ordered_indexs, axis=0)

        else:
          ### for testing, mask network takes rcnn boxes as inputs
          rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask, rcnn_indexs_to_mask = sample_rcnn_outputs(rcnn_final_boxes, rcnn_final_classes, rcnn_scores, rcnn_ordered_index) 
          # mask_rois, mask_clses, mask_scores, mask_batch_inds, mask_indexs = sample_rcnn_outputs(rcnn_final_boxes, rcnn_final_classes, rcnn_scores, rcnn_ordered_index) 
          [mask_assigned_rois, mask_assigned_clses, mask_assigned_scores, mask_assigned_batch_inds, mask_assign_indexs, mask_assigned_layer_inds] =\
               assign_boxes(rcnn_rois_to_mask, [rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask, rcnn_indexs_to_mask], [2, 3, 4, 5])

          mask_cropped_features = []
          mask_ordered_rois = []
          mask_ordered_indexs = []
          mask_ordered_clses = []
          mask_ordered_scores = []
          for i in range(5, 1, -1):
            p = 'P%d'%i
            mask_splitted_roi = mask_assigned_rois[i-2]
            mask_splitted_cls = mask_assigned_clses[i-2]
            mask_splitted_score = mask_assigned_scores[i-2]
            mask_batch_ind = mask_assigned_batch_inds[i-2]
            mask_index = mask_assign_indexs[i-2]
            mask_cropped_feature, mask_rois_to_crop_and_resize, mask_py_shape, mask_ihiw = ROIAlign(pyramid[p], mask_splitted_roi, mask_batch_ind, ih, iw, stride=2**i,
                               pooled_height=14, pooled_width=14)
            mask_cropped_features.append(mask_cropped_feature)
            mask_ordered_rois.append(mask_splitted_roi)
            mask_ordered_indexs.append(mask_index)
            mask_ordered_clses.append(mask_splitted_cls)
            mask_ordered_scores.append(mask_splitted_score)

          mask_cropped_features = tf.concat(values=mask_cropped_features, axis=0)
          mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)
          mask_ordered_indexs = tf.concat(values=mask_ordered_indexs, axis=0)
          mask_ordered_clses = tf.concat(values=mask_ordered_clses, axis=0)
          mask_ordered_scores = tf.concat(values=mask_ordered_scores, axis=0)

          outputs['mask_final_clses'] = mask_ordered_clses
          outputs['mask_final_scores'] = mask_ordered_scores

        ### mask head
        m = mask_cropped_features
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None, normalizer_fn=None)

        outputs['mask_ordered_rois'] = mask_ordered_rois
        outputs['mask_ordered_indexs'] = mask_ordered_indexs
        outputs['mask_cropped_features'] = mask_cropped_features 
        outputs['mask_mask'] = m
        outputs['mask_final_mask'] = tf.nn.sigmoid(m)
          
        return outputs
Esempio n. 8
0
def build_heads(pyramid,
                ih,
                iw,
                num_classes,
                base_anchors,
                is_training=False,
                gt_boxes=None):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    #arg_scope = _extra_conv_arg_scope(activation_fn=None)
    arg_scope = _extra_conv_arg_scope_with_bn(activation_fn=None)
    my_sigmoid = None
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('pyramid'):
            # for p in pyramid:
            outputs['rpn'] = {}
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i

                ## rpn head
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]
                rpn = slim.conv2d(pyramid[p],
                                  256, [3, 3],
                                  stride=1,
                                  activation_fn=tf.nn.relu,
                                  scope='%s/rpn' % p)
                box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
                cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

                anchor_scales = [2**(i - 2), 2**(i - 1), 2**(i)]
                print("anchor_scales = ", anchor_scales)
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                outputs['rpn'][p] = {
                    'box': box,
                    'cls': cls,
                    'anchor': all_anchors
                }

            ## gather all rois
            # print (outputs['rpn'])
            rpn_boxes = [
                tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_clses = [
                tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1])
                for p in range(5, 1, -1)
            ]
            rpn_anchors = [
                tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
            rpn_clses = tf.concat(values=rpn_clses, axis=0)
            rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

            outputs['rpn']['box'] = rpn_boxes
            outputs['rpn']['cls'] = rpn_clses
            outputs['rpn']['anchor'] = rpn_anchors
            # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}

            rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
            rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs,
                                                      rpn_anchors, ih, iw)
            # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
            rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                    sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

            # if is_training:
            #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
            #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)

            outputs['roi'] = {'box': rois, 'score': scores}

            ## cropping regions
            [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                    assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])

            outputs['assigned_rois'] = assigned_rois
            outputs['assigned_layer_inds'] = assigned_layer_inds

            cropped_rois = []
            ordered_rois = []
            pyramid_feature = []
            for i in range(5, 1, -1):
                print(i)
                p = 'P%d' % i
                splitted_rois = assigned_rois[i - 2]
                batch_inds = assigned_batch_inds[i - 2]

                cropped, boxes_in_crop = ROIAlign_(pyramid[p],
                                                   splitted_rois,
                                                   batch_inds,
                                                   ih,
                                                   iw,
                                                   stride=2**i,
                                                   pooled_height=14,
                                                   pooled_width=14)
                # cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                #                    pooled_height=14, pooled_width=14)
                cropped_rois.append(cropped)
                ordered_rois.append(splitted_rois)
                pyramid_feature.append(tf.transpose(pyramid[p], [0, 3, 1, 2]))
                # if i is 5:
                #     outputs['tmp_0'] = tf.transpose(pyramid[p],[0,3,1,2])
                #     outputs['tmp_1'] = splitted_rois
                #     outputs['tmp_2'] = tf.transpose(cropped,[0,3,1,2])
                #     outputs['tmp_3'] = boxes_in_crop
                #     outputs['tmp_4'] = [ih, iw]

            cropped_rois = tf.concat(values=cropped_rois, axis=0)
            ordered_rois = tf.concat(values=ordered_rois, axis=0)

            outputs['ordered_rois'] = ordered_rois
            outputs['pyramid_feature'] = pyramid_feature

            outputs['roi']['cropped_rois'] = cropped_rois
            tf.add_to_collection('__CROPPED__', cropped_rois)

            ## refine head
            # to 7 x 7
            cropped_regions = slim.max_pool2d(cropped_rois, [3, 3],
                                              stride=2,
                                              padding='SAME')
            refine = slim.flatten(cropped_regions)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(
                refine,
                num_classes,
                activation_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.05))
            box = slim.fully_connected(
                refine,
                num_classes * 4,
                activation_fn=my_sigmoid,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.05))

            outputs['refined'] = {'box': box, 'cls': cls2}

            ## decode refine net outputs
            cls2_prob = tf.nn.softmax(cls2)
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2_prob, ordered_rois, ih, iw)

            #outputs['tmp_0'] = ordered_rois
            #outputs['tmp_1'] = assigned_rois
            #outputs['tmp_2'] = box
            #outputs['tmp_3'] = final_boxes
            #outputs['tmp_4'] = cls2_prob

            #outputs['final_boxes'] = {'box': final_boxes, 'cls': classes}
            outputs['final_boxes'] = {
                'box': final_boxes,
                'cls': classes,
                'prob': cls2_prob
            }
            ## for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes
                # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
                for i in range(5, 1, -1):
                    p = 'P%d' % i
                    splitted_rois = assigned_rois[i - 2]
                    batch_inds = assigned_batch_inds[i - 2]
                    cropped = ROIAlign(pyramid[p],
                                       splitted_rois,
                                       batch_inds,
                                       stride=2**i,
                                       pooled_height=14,
                                       pooled_width=14)
                    cropped_rois.append(cropped)
                    ordered_rois.append(splitted_rois)
                cropped_rois = tf.concat(values=cropped_rois, axis=0)
                ordered_rois = tf.concat(values=ordered_rois, axis=0)

            ## mask head
            ms = []
            m = cropped_rois
            for _ in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            # to 28 x 28

            m = slim.conv2d_transpose(m,
                                      256,
                                      2,
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            batch_size = 0
            # # m_shape = m.get_shape().as_list()
            # # m_take_shape = m_shape[1:]
            # # m = tf.reshape(m, shape=[batch_size] + m_take_shape)
            # m = tf.reshape(m, shape=[batch_size, 28, 28, 256])
            tf.add_to_collection('__TRANSPOSED__', m)
            # # print('m ', m.shape)
            ### add the capsule block between the convolutional layers
            with tf.variable_scope('PrimaryCaps_layer'):
                primaryCaps, activation = capslayer.layers.primaryCaps(
                    m,
                    filters=32,
                    kernel_size=3,
                    strides=2,
                    out_caps_shape=[8, 1],
                    padding='SAME')  # return [batch_size, 10,10,32, 8,1]

            with tf.variable_scope('fc_reshape_Caps_layer'):
                fc_Caps, activation = capslayer.layers.fully_connected(
                    primaryCaps,
                    activation,
                    num_outputs=7 * 7 * 1,
                    out_caps_shape=[8, 1],
                    routing_method='DynamicRouting')
                fc_Caps = tf.reshape(fc_Caps,
                                     shape=[batch_size, 7, 7, 1, 8, 1])

            with tf.variable_scope('dePrimaryCaps_layer'):
                output = capslayer.layers.dePrimaryCaps(
                    fc_Caps,
                    activation,
                    num_outputs=128,
                    kernel_size=3,
                    strides=2
                )  # [batch, 16, 16, ngf * 8 ] => [batch, 4, 4, ngf * 8]
                tf.layers.conv2d_transpose(output,
                                           256,
                                           kernel_size=9,
                                           strides=2)

            m = slim.conv2d(m,
                            num_classes, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs['mask'] = {'mask': m, 'cls': classes, 'score': scores}

    return outputs
Esempio n. 9
0
def build_head(pyramid, num_classes, base_anchors, is_training=False):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer
    3. Build roi layer
    4. Process the results of roi layer
    5. Build the mask layer
    6. Build losses
  """
    outputs = {}
    inshape = pyramid['inputs'].get_shape()
    ih, iw = inshape[1].value, inshape[2].value
    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    with slim.arg_scope(arg_scope):
        # for p in pyramid:
        for i in range(5, 1, -1):
            p = 'P%d' % i
            stride = 2**i
            outputs[p] = {}

            # rpn head
            height, width = pyramid[p].get_shape(
            )[1].value, pyramid[p].get_shape()[1].value
            rpn = slim.conv2d(pyramid[p],
                              256, [3, 3],
                              stride=1,
                              activation_fn=tf.nn.relu,
                              scope='%s/rpn' % p)
            box = slim.conv2d(rpn,
                              num_classes * base_anchors * 4, [1, 1],
                              stride=1,
                              scope='%s/rpn/box' % p)
            cls = slim.conv2d(rpn,
                              num_classes * base_anchors * 2, [1, 1],
                              stride=1,
                              scope='%s/rpn/cls' % p)
            outputs[p]['rpn'] = {'box': box, 'classes': cls}

            # decode, sample and crop
            all_anchors = gen_all_anchors(height, width, stride)
            rois, classes, scores = \
                      anchor_decoder(box, cls, all_anchors, ih, iw)
            rois, class_ids, scores = sample_rpn_outputs(rois, scores)
            cropped = ROIAlign(
                pyramid[p],
                rois,
                False,
                stride=2**i,
                pooled_height=7,
                pooled_width=7,
            )

            # refine head
            refine = slim.fully_connected(cropped,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(refine,
                                        num_classes,
                                        activation_fn=None)
            box = slim.fully_connected(refine,
                                       num_classes * 4,
                                       activation_fn=None)
            outputs[p]['refined'] = {'box': box, 'classes': cls2}

            # decode refine net outputs
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2, rois, ih, iw)

            # for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes

            # mask head
            # rois, class_ids, scores = sample_rpn_outputs(rois, scores)
            m = ROIAlign(pyramid[p],
                         rois,
                         False,
                         stride=2**i,
                         pooled_height=14,
                         pooled_width=14)
            for i in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            m = slim.conv2d_transpose(m,
                                      256, [2, 2],
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            m = slim.conv2d(m,
                            81, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs[p]['mask'] = {
                'mask': m,
                'classes': classes,
                'scores': scores
            }

    return outputs