コード例 #1
0
 def test(self):
     cfg.FLAGS.fg_threshold = 0.7
     with tf.Session() as sess:
         all_anchors = gen_all_anchors(self.height / 4,
                                       self.width / 4,
                                       stride=4,
                                       scales=2**np.arange(1, 5))
         all_anchors = tf.reshape(all_anchors, [-1, 4])
         self.all_anchors = np.reshape(all_anchors.eval(), (-1, 4))
         labels, bbox_targets, bbox_inside_weights = \
                 anchor_encoder(self.gt_boxes, all_anchors, self.height / 4, self.width / 4, 4)
         self.labels = labels.eval()
         self.bbox_targets = bbox_targets.eval()
         self.bbox_inside_weights = bbox_inside_weights.eval()
         print(self.labels.shape)
         print(self.bbox_targets.shape)
         print(self.bbox_inside_weights.shape)
         print(self.gt_boxes)
         # print (self.all_anchors[0:120:15, ])
         np_labels = self.labels.reshape((-1, ))
         np_bbox_targets = self.bbox_targets.reshape((-1, 4))
         np_bbox_inside_weights = self.bbox_inside_weights.reshape((-1, 4))
         encoded_gt_boxes = []
         for i in range(np_labels.shape[0]):
             if np_labels[i] >= 1:
                 # print (self.all_anchors[i, :], np_bbox_targets[i, :], np_bbox_inside_weights[i, :])
                 encoded_gt_boxes.append(np_bbox_targets[i, :])
         encoded_gt_boxes = np.asarray(encoded_gt_boxes, dtype=np.float32)
         encoded_gt_boxes = encoded_gt_boxes.reshape((-1, 4))
         # print (np.max(np_labels))
         # print (np.sum(np_labels >= 1))
         scores = np.zeros((np_labels.shape[0], 2), dtype=np.float32)
         for i in range(np_labels.shape[0]):
             if np_labels[i] > 0:
                 scores[i, 0] = 0
                 scores[i, 1] = 1
         scores = scores.astype(np.float32)
         boxes, classes, scores = \
                 anchor_decoder(self.bbox_targets, scores, all_anchors, self.height, self.width)
         self.npboxes = boxes.eval().reshape((-1, 4))
         npscores = scores.eval().reshape((-1, 1))
         self.npboxes = np.hstack((self.npboxes, npscores))
         # print (self.npboxes.shape, npscores.shape)
         bbox_targets_np = self.bbox_targets.reshape([-1, 4])
         all_anchors_np = all_anchors.eval().reshape([-1, 4])
         for i in range(self.npboxes.shape[0]):
             if self.npboxes[i, 4] >= 1:
                 print(bbox_targets_np[i], self.npboxes[i],
                       all_anchors_np[i])
コード例 #2
0
ファイル: layer_test.py プロジェクト: Kairobo/FastMaskRCNN
 def test(self):
     cfg.FLAGS.fg_threshold = 0.7
     with tf.Session() as sess:
         all_anchors = gen_all_anchors(self.height / 4, self.width / 4, stride = 4, scales = 2**np.arange(1,5))
         all_anchors = tf.reshape(all_anchors, [-1, 4])
         self.all_anchors =  np.reshape(all_anchors.eval(), (-1, 4))
         labels, bbox_targets, bbox_inside_weights = \
                 anchor_encoder(self.gt_boxes, all_anchors, self.height / 4, self.width / 4, 4)
         self.labels = labels.eval()
         self.bbox_targets = bbox_targets.eval()
         self.bbox_inside_weights = bbox_inside_weights.eval()
         print (self.labels.shape)
         print (self.bbox_targets.shape)
         print (self.bbox_inside_weights.shape)
         print (self.gt_boxes)
         # print (self.all_anchors[0:120:15, ])
         np_labels = self.labels.reshape((-1,))
         np_bbox_targets = self.bbox_targets.reshape((-1, 4))
         np_bbox_inside_weights = self.bbox_inside_weights.reshape((-1, 4))
         encoded_gt_boxes = []
         for i in range(np_labels.shape[0]):
             if np_labels[i] >= 1:
                 # print (self.all_anchors[i, :], np_bbox_targets[i, :], np_bbox_inside_weights[i, :])
                 encoded_gt_boxes.append (np_bbox_targets[i, :])
         encoded_gt_boxes = np.asarray(encoded_gt_boxes, dtype = np.float32)
         encoded_gt_boxes = encoded_gt_boxes.reshape((-1, 4))
         # print (np.max(np_labels))
         # print (np.sum(np_labels >= 1))
         scores = np.zeros((np_labels.shape[0], 2), dtype=np.float32)
         for i in range(np_labels.shape[0]):
             if np_labels[i] > 0:
                 scores[i, 0] = 0
                 scores[i, 1] = 1
         scores = scores.astype(np.float32)
         boxes, classes, scores = \
                 anchor_decoder(self.bbox_targets, scores, all_anchors, self.height, self.width)
         self.npboxes = boxes.eval().reshape((-1, 4))
         npscores = scores.eval().reshape((-1, 1))
         self.npboxes = np.hstack((self.npboxes, npscores))
         # print (self.npboxes.shape, npscores.shape)
         bbox_targets_np = self.bbox_targets.reshape([-1, 4])
         all_anchors_np = all_anchors.eval().reshape([-1, 4])
         for i in range(self.npboxes.shape[0]):
             if self.npboxes[i, 4] >= 1:
                 print (bbox_targets_np[i], self.npboxes[i], all_anchors_np[i])
コード例 #3
0
def build_losses(pyramid,
                 outputs,
                 gt_boxes,
                 gt_masks,
                 num_classes,
                 base_anchors,
                 rpn_box_lw=1.0,
                 rpn_cls_lw=1.0,
                 refined_box_lw=1.0,
                 refined_cls_lw=1.0,
                 mask_lw=1.0):
    """Building 3-way output losses, totally 5 losses
  Params:
  ------
  outputs: output of build_heads
  gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class]
  gt_masks: A tensor of shape (G, ih, iw),  {0, 1}
  *_lw: loss weight of rpn, refined and mask losses
  
  Returns:
  -------
  l: a loss tensor
  """
    for i in range(5, 1, -1):
        p = 'P%d' % i
        stride = 2**i
        shape = tf.shape(pyramid[p])
        height, width = shape[1], shape[2]

        ### rpn losses
        # 1. encode ground truth
        # 2. compute distances
        all_anchors = gen_all_anchors(height, width, stride)
        labels, bbox_targets, bbox_inside_weights = \
          anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder')
        boxes = outputs[p]['rpn']['box']
        classes = tf.reshape(outputs[p]['rpn']['cls'],
                             (1, height, width, base_anchors, 2))

        labels, classes, boxes, bbox_targets, bbox_inside_weights = \
                _filter_negative_samples(tf.reshape(labels, [-1]), [
                    tf.reshape(labels, [-1]),
                    tf.reshape(classes, [-1, 2]),
                    tf.reshape(boxes, [-1, 4]),
                    tf.reshape(bbox_targets, [-1, 4]),
                    tf.reshape(bbox_inside_weights, [-1, 4])
                    ])
        rpn_box_loss = bbox_inside_weights * _smooth_l1_dist(
            boxes, bbox_targets)
        rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4])
        rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1)
        rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss)
        tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss)

        # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses
        # BUT these examples still count when computing the average of softmax_cross_entropy,
        # the loss become smaller by a factor (None_negtive_labels / all_labels)
        # So the BEST practise still should be gathering all none-negative examples
        labels = slim.one_hot_encoding(
            labels, 2, on_value=1.0,
            off_value=0.0)  # this will set -1 label to all zeros
        rpn_cls_loss = rpn_cls_lw * tf.losses.softmax_cross_entropy(
            labels, classes)

        ### refined loss
        # 1. encode ground truth
        # 2. compute distances
        rois = outputs[p]['roi']['box']

        boxes = outputs[p]['refined']['box']
        classes = outputs[p]['refined']['cls']
        labels, bbox_targets, bbox_inside_weights = \
          roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder')

        labels, classes, boxes, bbox_targets, bbox_inside_weights = \
                _filter_negative_samples(tf.reshape(labels, [-1]),[
                    tf.reshape(labels, [-1]),
                    tf.reshape(classes, [-1, num_classes]),
                    tf.reshape(boxes, [-1, num_classes * 4]),
                    tf.reshape(bbox_targets, [-1, num_classes * 4]),
                    tf.reshape(bbox_inside_weights, [-1, num_classes * 4])
                    ] )
        refined_box_loss = bbox_inside_weights * _smooth_l1_dist(
            boxes, bbox_targets)
        refined_box_loss = tf.reshape(refined_box_loss, [-1, 4])
        refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1)
        refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss)
        tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss)

        labels = slim.one_hot_encoding(labels,
                                       num_classes,
                                       on_value=1.0,
                                       off_value=0.0)
        refined_cls_loss = refined_cls_lw * tf.losses.softmax_cross_entropy(
            classes, labels)

        ### mask loss
        # mask of shape (N, h, w, num_classes*2)
        masks = outputs[p]['mask']['mask']
        mask_shape = tf.shape(masks)
        masks = tf.reshape(masks, (mask_shape[0], mask_shape[1], mask_shape[2],
                                   tf.cast(mask_shape[3] / 2, tf.int32), 2))
        labels, mask_targets, mask_inside_weights = \
          mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder')
        labels, masks, mask_targets, mask_inside_weights = \
                _filter_negative_samples(tf.reshape(labels, [-1]), [
                    tf.reshape(labels, [-1]),
                    masks,
                    mask_targets,
                    mask_inside_weights,
                    ])
        mask_targets = slim.one_hot_encoding(mask_targets,
                                             2,
                                             on_value=1.0,
                                             off_value=0.0)
        mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy(
            masks, mask_targets)

    return rpn_box_loss + rpn_cls_loss + refined_box_loss + refined_cls_loss + mask_binary_loss
コード例 #4
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    with slim.arg_scope(arg_scope):
        # for p in pyramid:
        for i in range(5, 1, -1):
            p = 'P%d' % i
            stride = 2**i
            outputs[p] = {}

            ## rpn head
            shape = tf.shape(pyramid[p])
            height, width = shape[1], shape[2]
            rpn = slim.conv2d(pyramid[p],
                              256, [3, 3],
                              stride=1,
                              activation_fn=tf.nn.relu,
                              scope='%s/rpn' % p)
            box = slim.conv2d(rpn,
                              base_anchors * 4, [1, 1],
                              stride=1,
                              scope='%s/rpn/box' % p)
            cls = slim.conv2d(rpn,
                              base_anchors * 2, [1, 1],
                              stride=1,
                              scope='%s/rpn/cls' % p)
            outputs[p]['rpn'] = {'box': box, 'cls': cls}

            ## decode, sample and crop
            all_anchors = gen_all_anchors(height, width, stride)
            cls_prob = tf.reshape(
                tf.nn.softmax(
                    tf.reshape(cls, [1, shape[1], shape[2], base_anchors, 2])),
                [1, shape[1], shape[2], base_anchors * 2])
            rois, classes, scores = \
                      anchor_decoder(box, cls_prob, all_anchors, ih, iw)
            rois, scores = sample_rpn_outputs(rois, scores)
            cropped = ROIAlign(
                pyramid[p],
                rois,
                False,
                stride=2**i,
                pooled_height=7,
                pooled_width=7,
            )

            # rois of an image, sampled from rpn output
            outputs[p]['roi'] = {
                'box': rois,
                'scores': scores,
                'cropped': cropped
            }

            ## refine head
            refine = slim.flatten(cropped)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(refine,
                                        num_classes,
                                        activation_fn=None)
            box = slim.fully_connected(refine,
                                       num_classes * 4,
                                       activation_fn=None)
            outputs[p]['refined'] = {'box': box, 'cls': cls2}

            ## decode refine net outputs
            cls2_prob = tf.nn.softmax(cls2)
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2_prob, rois, ih, iw)

            # for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes

            ## mask head
            m = ROIAlign(pyramid[p],
                         rois,
                         False,
                         stride=2**i,
                         pooled_height=14,
                         pooled_width=14)
            outputs[p]['roi']['cropped_mask'] = m
            for _ in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            m = slim.conv2d_transpose(m,
                                      256, [2, 2],
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            m = slim.conv2d(m,
                            num_classes * 2, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs[p]['mask'] = {
                'mask': m,
                'classes': classes,
                'scores': scores
            }

    return outputs
コード例 #5
0
def build_losses(pyramid,
                 outputs,
                 gt_boxes,
                 gt_masks,
                 num_classes,
                 rpn_box_lw=1.0,
                 rpn_cls_lw=1.0,
                 refined_box_lw=1.0,
                 refined_cls_lw=1.0,
                 mask_lw=1.0):
    """Building 3-way output losses, totally 5 losses
  Params:
  ------
  outputs: output of build_heads
  gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class]
  gt_masks: A tensor of shape (G, ih, iw),  {0, 1}
  *_lw: loss weight of rpn, refined and mask losses
  
  Returns:
  -------
  l: a loss tensor
  """
    for i in range(5, 1, -1):
        p = 'P%d' % i
        stride = 2**i
        shape = tf.shape(pyramid[p])
        height, width = shape[1], shape[2]

        ### rpn losses
        # 1. encode ground truth
        # 2. compute distances
        all_anchors = gen_all_anchors(height, width, stride)
        labels, bbox_targets, bbox_inside_weights = \
          anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder')
        boxes = outputs[p]['rpn']['box']
        classes = outputs[p]['rpn']['cls']
        rpn_box_loss = bbox_inside_weights * _smooth_l1_dist(
            boxes, bbox_targets)
        rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4])
        rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1)
        rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss)

        labels = slim.one_hot_encoding(labels, 2, on_value=1.0, off_value=0.0)
        rpn_cls_loss = rpn_cls_lw * tf.losses.softmax_cross_entropy(
            classes, labels)

        ### refined loss
        # 1. encode ground truth
        # 2. compute distances
        rois = outputs[p]['roi']['box']

        boxes = outputs[p]['refined']['box']
        classes = outputs[p]['refined']['cls']
        labels, bbox_targets, bbox_inside_weights = \
          roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder')
        refined_box_loss = bbox_inside_weights * _smooth_l1_dist(
            boxes, bbox_targets)
        refined_box_loss = tf.reshape(refined_box_loss, [-1, 4])
        refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1)
        refined_box_loss = refined_box_lw * tf.reduce_mean(refined_box_loss)

        labels = slim.one_hot_encoding(labels,
                                       num_classes,
                                       on_value=1.0,
                                       off_value=0.0)
        refined_cls_loss = refined_cls_lw * tf.losses.softmax_cross_entropy(
            classes, labels)

        ### mask loss
        # {'mask': m, 'classes': classes, 'scores': scores}
        masks = outputs[p]['mask']['mask']
        labels, mask_targets, mask_inside_weights = \
          mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder')

    return
コード例 #6
0
def build_losses(pyramid,
                 outputs,
                 gt_boxes,
                 gt_masks,
                 num_classes,
                 base_anchors,
                 rpn_box_lw=1.0,
                 rpn_cls_lw=1.0,
                 refined_box_lw=1.0,
                 refined_cls_lw=1.0,
                 mask_lw=1.0):
    """Building 3-way output losses, totally 5 losses
  Params:
  ------
  outputs: output of build_heads
  gt_boxes: A tensor of shape (G, 5), [x1, y1, x2, y2, class]
  gt_masks: A tensor of shape (G, ih, iw),  {0, 1}Ì[MaÌ[MaÌ]]
  *_lw: loss weight of rpn, refined and mask losses
  
  Returns:
  -------
  l: a loss tensor
  """

    # losses for pyramid
    losses = []
    rpn_box_losses, rpn_cls_losses = [], []
    refined_box_losses, refined_cls_losses = [], []
    mask_losses = []

    # watch some info during training
    rpn_batch = []
    refine_batch = []
    mask_batch = []
    rpn_batch_pos = []
    refine_batch_pos = []
    mask_batch_pos = []

    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('pyramid'):

            ## assigning gt_boxes
            assigned_gt_boxes = assign_boxes(gt_boxes, [2, 3, 4, 5])
            assigned_layer_inds = assigned_gt_boxes[-1]

            ## build losses for PFN
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]

                gt_boxes = assigned_gt_boxes[i - 2]

                ### rpn losses
                # 1. encode ground truth
                # 2. compute distances
                anchor_scales = [2**(i - 2), 2**(i - 1), 2**(i)]
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                labels, bbox_targets, bbox_inside_weights = \
                  anchor_encoder(gt_boxes, all_anchors, height, width, stride, scope='AnchorEncoder')
                boxes = outputs[p]['rpn']['box']
                classes = tf.reshape(outputs[p]['rpn']['cls'],
                                     (1, height, width, base_anchors, 2))

                labels, classes, boxes, bbox_targets, bbox_inside_weights = \
                        _filter_negative_samples(tf.reshape(labels, [-1]), [
                            tf.reshape(labels, [-1]),
                            tf.reshape(classes, [-1, 2]),
                            tf.reshape(boxes, [-1, 4]),
                            tf.reshape(bbox_targets, [-1, 4]),
                            tf.reshape(bbox_inside_weights, [-1, 4])
                            ])
                _, frac_ = _get_valid_sample_fraction(labels)
                rpn_batch.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(labels, 0), tf.float32)))
                rpn_batch_pos.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(labels, 1), tf.float32)))
                rpn_box_loss = bbox_inside_weights * _smooth_l1_dist(
                    boxes, bbox_targets)
                rpn_box_loss = tf.reshape(rpn_box_loss, [-1, 4])
                rpn_box_loss = tf.reduce_sum(rpn_box_loss, axis=1)
                rpn_box_loss = rpn_box_lw * tf.reduce_mean(rpn_box_loss)
                tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_box_loss)
                rpn_box_losses.append(rpn_box_loss)

                # NOTE: examples with negative labels are ignore when compute one_hot_encoding and entropy losses
                # BUT these examples still count when computing the average of softmax_cross_entropy,
                # the loss become smaller by a factor (None_negtive_labels / all_labels)
                # So the BEST practise still should be gathering all none-negative examples
                labels = slim.one_hot_encoding(
                    labels, 2, on_value=1.0,
                    off_value=0.0)  # this will set -1 label to all zeros
                rpn_cls_loss = rpn_cls_lw * tf.nn.softmax_cross_entropy_with_logits(
                    labels=labels, logits=classes)
                rpn_cls_loss = tf.reduce_mean(rpn_cls_loss)
                tf.add_to_collection(tf.GraphKeys.LOSSES, rpn_cls_loss)
                rpn_cls_losses.append(rpn_cls_loss)

                ### refined loss
                # 1. encode ground truth
                # 2. compute distances
                rois = outputs[p]['roi']['box']

                boxes = outputs[p]['refined']['box']
                classes = outputs[p]['refined']['cls']
                labels, bbox_targets, bbox_inside_weights = \
                  roi_encoder(gt_boxes, rois, num_classes, scope='ROIEncoder')

                labels, classes, boxes, bbox_targets, bbox_inside_weights = \
                        _filter_negative_samples(tf.reshape(labels, [-1]),[
                            tf.reshape(labels, [-1]),
                            tf.reshape(classes, [-1, num_classes]),
                            tf.reshape(boxes, [-1, num_classes * 4]),
                            tf.reshape(bbox_targets, [-1, num_classes * 4]),
                            tf.reshape(bbox_inside_weights, [-1, num_classes * 4])
                            ] )
                frac, frac_ = _get_valid_sample_fraction(labels)
                refine_batch.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(labels, 0), tf.float32)))
                refine_batch_pos.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(labels, 1), tf.float32)))

                refined_box_loss = bbox_inside_weights * _smooth_l1_dist(
                    boxes, bbox_targets)
                refined_box_loss = tf.reshape(refined_box_loss, [-1, 4])
                refined_box_loss = tf.reduce_sum(refined_box_loss, axis=1)
                refined_box_loss = refined_box_lw * tf.reduce_mean(
                    refined_box_loss) * frac_
                tf.add_to_collection(tf.GraphKeys.LOSSES, refined_box_loss)
                refined_box_losses.append(refined_box_loss)

                labels = slim.one_hot_encoding(labels,
                                               num_classes,
                                               on_value=1.0,
                                               off_value=0.0)
                refined_cls_loss = refined_cls_lw * tf.nn.softmax_cross_entropy_with_logits(
                    labels=labels, logits=classes)
                refined_cls_loss = tf.reduce_mean(refined_cls_loss) * frac_
                tf.add_to_collection(tf.GraphKeys.LOSSES, refined_cls_loss)
                refined_cls_losses.append(refined_cls_loss)

                ### mask loss
                # mask of shape (N, h, w, num_classes*2)
                masks = outputs[p]['mask']['mask']
                # mask_shape = tf.shape(masks)
                # masks = tf.reshape(masks, (mask_shape[0], mask_shape[1],
                #                            mask_shape[2], tf.cast(mask_shape[3]/2, tf.int32), 2))
                labels, mask_targets, mask_inside_weights = \
                  mask_encoder(gt_masks, gt_boxes, rois, num_classes, 28, 28, scope='MaskEncoder')
                labels, masks, mask_targets, mask_inside_weights = \
                        _filter_negative_samples(tf.reshape(labels, [-1]), [
                            tf.reshape(labels, [-1]),
                            masks,
                            mask_targets,
                            mask_inside_weights,
                            ])
                _, frac_ = _get_valid_sample_fraction(labels)
                mask_batch.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(labels, 0), tf.float32)))
                mask_batch_pos.append(
                    tf.reduce_sum(
                        tf.cast(tf.greater_equal(labels, 1), tf.float32)))
                # mask_targets = slim.one_hot_encoding(mask_targets, 2, on_value=1.0, off_value=0.0)
                # mask_binary_loss = mask_lw * tf.losses.softmax_cross_entropy(mask_targets, masks)
                # NOTE: w/o competition between classes.
                mask_targets = tf.cast(mask_targets, tf.float32)
                mask_loss = mask_lw * tf.nn.sigmoid_cross_entropy_with_logits(
                    labels=mask_targets, logits=masks)
                mask_loss = tf.reduce_mean(mask_loss)
                mask_loss = tf.cond(tf.greater(tf.size(labels),
                                               0), lambda: mask_loss,
                                    lambda: tf.constant(0.0))
                tf.add_to_collection(tf.GraphKeys.LOSSES, mask_loss)
                mask_losses.append(mask_loss)

    rpn_box_losses = tf.add_n(rpn_box_losses)
    rpn_cls_losses = tf.add_n(rpn_cls_losses)
    refined_box_losses = tf.add_n(refined_box_losses)
    refined_cls_losses = tf.add_n(refined_cls_losses)
    mask_losses = tf.add_n(mask_losses)
    losses = [
        rpn_box_losses, rpn_cls_losses, refined_box_losses, refined_cls_losses,
        mask_losses
    ]
    total_loss = tf.add_n(losses)

    rpn_batch = tf.cast(tf.add_n(rpn_batch), tf.float32)
    refine_batch = tf.cast(tf.add_n(refine_batch), tf.float32)
    mask_batch = tf.cast(tf.add_n(mask_batch), tf.float32)
    rpn_batch_pos = tf.cast(tf.add_n(rpn_batch_pos), tf.float32)
    refine_batch_pos = tf.cast(tf.add_n(refine_batch_pos), tf.float32)
    mask_batch_pos = tf.cast(tf.add_n(mask_batch_pos), tf.float32)

    return total_loss, losses, [rpn_batch_pos, rpn_batch, \
                                refine_batch_pos, refine_batch, \
                                mask_batch_pos, mask_batch]
コード例 #7
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  arg_scope = _extra_conv_arg_scope(activation_fn=None)
  my_sigmoid = None
  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        # for p in pyramid:
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ## rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

          anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)]
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ## gather all rois
        # print (outputs['rpn'])
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

        outputs['rpn']['box'] = rpn_boxes
        outputs['rpn']['cls'] = rpn_clses
        outputs['rpn']['anchor'] = rpn_anchors
        # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)
        # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
        rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

        # if is_training:
        #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
        #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)
        
        outputs['roi'] = {'box': rois, 'score': scores}

        ## cropping regions
        [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
        cropped_rois = []
        for i in range(5, 1, -1):
            p = 'P%d'%i
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
        cropped_rois = tf.concat(values=cropped_rois, axis=0)

        outputs['roi']['cropped_rois'] = cropped_rois
        tf.add_to_collection('__CROPPED__', cropped_rois)

        ## refine head
        # to 7 x 7
        cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME')
        refine = slim.flatten(cropped_regions)
        refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        refine = slim.fully_connected(refine,  1024, activation_fn=tf.nn.relu)
        refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training)
        cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.01))
        box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))

        outputs['refined'] = {'box': box, 'cls': cls2}
        
        ## decode refine net outputs
        cls2_prob = tf.nn.softmax(cls2)
        final_boxes, classes, scores = \
                roi_decoder(box, cls2_prob, rois, ih, iw)
         
        ## for testing, maskrcnn takes refined boxes as inputs
        if not is_training:
          rois = final_boxes
          # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
          #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
          for i in range(5, 1, -1):
            splitted_rois = assigned_rois[i-2]
            batch_inds = assigned_batch_inds[i-2]
            p = 'P%d'%i
            cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                               pooled_height=14, pooled_width=14)
            cropped_rois.append(cropped)
          cropped_rois = tf.concat(values=cropped_rois, axis=0)
          
        ## mask head
        m = cropped_rois
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None)
          
        # add a mask, given the predicted boxes and classes
        outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores}
          
  return outputs
コード例 #8
0
def build_heads(pyramid,
                py_scope,
                slim_scope,
                image_height,
                image_width,
                num_classes,
                base_anchors,
                is_training=False,
                gt_boxes=None):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    # if _BN is True:
    #   if is_training is True:
    #     arg_scope = _extra_conv_arg_scope_with_bn()
    #   else:
    #     arg_scope = _extra_conv_arg_scope_with_bn(batch_norm_decay=0.0)
    #   # arg_scope = _extra_conv_arg_scope_with_bn(is_training=is_training)
    # else:
    #   arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu)
    with tf.name_scope(py_scope) as py_scope:
        with slim.arg_scope(slim_scope) as slim_scope:
            ### for p in pyramid
            outputs['rpn'] = {}
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i
                """Build RPN head
          RPN takes features from each layer of pyramid network. 
          strides are respectively set to [4, 8, 16, 32] for pyramid feature layer P2,P3,P4,P5 
          anchor_scales are set to [2 **(i-2), 2 ** (i-1), 2 **(i)] in all pyramid layers (*This is probably inconsistent with original paper where the only scale is 8)
          It generates 2 outputs.
          box: an array of shape (1, pyramid_height, pyramid_width, num_anchorx4). box regression values [shift_x, shift_y, scale_width, scale_height] are stored in the last dimension of the array.
          cls: an array of shape (1, pyramid_height, pyramid_width, num_anchorx2). Note that this value is before softmax   
          """
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]
                rpn = slim.conv2d(pyramid[p],
                                  256, [3, 3],
                                  stride=1,
                                  activation_fn=tf.nn.relu,
                                  scope='pyramid/%s/rpn' % p)
                box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='pyramid/%s/rpn/box' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=None, normalizer_fn=None)
                cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='pyramid/%s/rpn/cls' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01), activation_fn=None, normalizer_fn=None)

                anchor_scales = [8]  #[2 **(i-2), 2 ** (i-1), 2 **(i)]
                print("anchor_scales = ", anchor_scales)
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                outputs['rpn'][p] = {
                    'box': box,
                    'cls': cls,
                    'anchor': all_anchors,
                    'shape': shape
                }

            ### gather boxes, clses, anchors from all pyramid layers
            rpn_boxes = [
                tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_clses = [
                tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1])
                for p in range(5, 1, -1)
            ]
            rpn_anchors = [
                tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
            rpn_clses = tf.concat(values=rpn_clses, axis=0)
            rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

            ### softmax to get probability
            rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
            ### decode anchors and box regression values into proposed bounding boxes
            rpn_final_boxes, rpn_final_clses, rpn_final_scores = anchor_decoder(
                rpn_boxes, rpn_probs, rpn_anchors, image_height, image_width)

            outputs['rpn_boxes'] = rpn_boxes
            outputs['rpn_clses'] = rpn_clses
            outputs['rpn_anchor'] = rpn_anchors
            outputs['rpn_final_boxes'] = rpn_final_boxes
            outputs['rpn_final_clses'] = rpn_final_clses
            outputs['rpn_final_scores'] = rpn_final_scores

            if is_training is True:
                ### for training, rcnn and maskrcnn take rpn proposed bounding boxes as inputs
                rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_rois_to_mask, rpn_scores_to_mask, rpn_batch_inds_to_mask = \
                      sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, is_training=is_training, only_positive=False)#True
            else:
                ### for testing, only rcnn takes rpn boxes as inputs. maskrcnn takes rcnn boxes as inputs
                rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn = sample_rpn_outputs(
                    rpn_final_boxes, rpn_final_scores, only_positive=False)

            ### assign pyramid layer indexs to rcnn network's ROIs.
            [rcnn_assigned_rois, rcnn_assigned_batch_inds, rcnn_assigned_layer_inds] = \
                  assign_boxes(rpn_rois_to_rcnn, [rpn_rois_to_rcnn, rpn_batch_inds_to_rcnn], [2, 3, 4, 5])

            ### crop features from pyramid using ROIs. Note that this will change order of the ROIs, so ROIs are also reordered.
            rcnn_cropped_features = []
            rcnn_ordered_rois = []
            for i in range(5, 1, -1):
                p = 'P%d' % i
                rcnn_splitted_roi = rcnn_assigned_rois[i - 2]
                rcnn_batch_ind = rcnn_assigned_batch_inds[i - 2]
                rcnn_cropped_feature, rcnn_rois_to_crop_and_resize = ROIAlign(
                    pyramid[p],
                    rcnn_splitted_roi,
                    rcnn_batch_ind,
                    image_height,
                    image_width,
                    stride=2**i,
                    pooled_height=14,
                    pooled_width=14)
                rcnn_cropped_features.append(rcnn_cropped_feature)
                rcnn_ordered_rois.append(rcnn_splitted_roi)

            rcnn_cropped_features = tf.concat(values=rcnn_cropped_features,
                                              axis=0)
            rcnn_ordered_rois = tf.concat(values=rcnn_ordered_rois, axis=0)
            """Build rcnn head
        rcnn takes cropped features and generates 2 outputs. 
        rcnn_boxes: an array of shape (num_ROIs, num_classes x 4). Box regression values of each classes [shift_x, shift_y, scale_width, scale_height] are stored in the last dimension of the array.
        rcnn_clses: an array of shape (num_ROIs, num_classes). Class prediction values (before softmax) are stored
        """
            rcnn = slim.max_pool2d(rcnn_cropped_features, [3, 3],
                                   stride=2,
                                   padding='SAME')
            rcnn = slim.flatten(rcnn)
            rcnn = slim.fully_connected(
                rcnn,
                1024,
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected")
            rcnn = slim.dropout(rcnn, keep_prob=0.75,
                                is_training=is_training)  #is_training
            rcnn = slim.fully_connected(
                rcnn,
                1024,
                activation_fn=tf.nn.relu,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected_1")
            rcnn = slim.dropout(rcnn, keep_prob=0.75,
                                is_training=is_training)  #is_training
            rcnn_clses = slim.fully_connected(
                rcnn,
                num_classes,
                activation_fn=None,
                normalizer_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected_2")
            rcnn_boxes = slim.fully_connected(
                rcnn,
                num_classes * 4,
                activation_fn=None,
                normalizer_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001),
                scope="pyramid/fully_connected_3")

            ### softmax to get probability
            rcnn_scores = tf.nn.softmax(rcnn_clses)

            ### decode ROIs and box regression values into bounding boxes
            rcnn_final_boxes, rcnn_final_classes, rcnn_final_scores = roi_decoder(
                rcnn_boxes, rcnn_scores, rcnn_ordered_rois, image_height,
                image_width)

            outputs['rcnn_ordered_rois'] = rcnn_ordered_rois
            outputs['rcnn_cropped_features'] = rcnn_cropped_features
            tf.add_to_collection('__CROPPED__', rcnn_cropped_features)
            outputs['rcnn_boxes'] = rcnn_boxes
            outputs['rcnn_clses'] = rcnn_clses
            outputs['rcnn_scores'] = rcnn_scores
            outputs['rcnn_final_boxes'] = rcnn_final_boxes
            outputs['rcnn_final_clses'] = rcnn_final_classes
            outputs['rcnn_final_scores'] = rcnn_final_scores

            if is_training:
                ### assign pyramid layer indexs to mask network's ROIs
                [mask_assigned_rois, mask_assigned_batch_inds, mask_assigned_layer_inds] = \
                     assign_boxes(rpn_rois_to_mask, [rpn_rois_to_mask, rpn_batch_inds_to_mask], [2, 3, 4, 5])

                ### crop features from pyramid using ROIs. Again, this will change order of the ROIs, so ROIs are reordered.
                mask_cropped_features = []
                mask_ordered_rois = []

                ### crop features from pyramid for mask network
                for i in range(5, 1, -1):
                    p = 'P%d' % i
                    mask_splitted_roi = mask_assigned_rois[i - 2]
                    mask_batch_ind = mask_assigned_batch_inds[i - 2]
                    mask_cropped_feature, mask_rois_to_crop_and_resize = ROIAlign(
                        pyramid[p],
                        mask_splitted_roi,
                        mask_batch_ind,
                        image_height,
                        image_width,
                        stride=2**i,
                        pooled_height=14,
                        pooled_width=14)
                    mask_cropped_features.append(mask_cropped_feature)
                    mask_ordered_rois.append(mask_splitted_roi)

                mask_cropped_features = tf.concat(values=mask_cropped_features,
                                                  axis=0)
                mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)

            else:
                ### for testing, mask network takes rcnn boxes as inputs
                rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask = sample_rcnn_outputs(
                    rcnn_final_boxes,
                    rcnn_final_classes,
                    rcnn_scores,
                    class_agnostic=False)
                [mask_assigned_rois, mask_assigned_clses, mask_assigned_scores, mask_assigned_batch_inds, mask_assigned_layer_inds] =\
                     assign_boxes(rcnn_rois_to_mask, [rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask], [2, 3, 4, 5])

                mask_cropped_features = []
                mask_ordered_rois = []
                mask_ordered_clses = []
                mask_ordered_scores = []
                for i in range(5, 1, -1):
                    p = 'P%d' % i
                    mask_splitted_roi = mask_assigned_rois[i - 2]
                    mask_splitted_cls = mask_assigned_clses[i - 2]
                    mask_splitted_score = mask_assigned_scores[i - 2]
                    mask_batch_ind = mask_assigned_batch_inds[i - 2]
                    mask_cropped_feature, mask_rois_to_crop_and_resize = ROIAlign(
                        pyramid[p],
                        mask_splitted_roi,
                        mask_batch_ind,
                        image_height,
                        image_width,
                        stride=2**i,
                        pooled_height=14,
                        pooled_width=14)
                    mask_cropped_features.append(mask_cropped_feature)
                    mask_ordered_rois.append(mask_splitted_roi)
                    mask_ordered_clses.append(mask_splitted_cls)
                    mask_ordered_scores.append(mask_splitted_score)

                mask_cropped_features = tf.concat(values=mask_cropped_features,
                                                  axis=0)
                mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)
                mask_ordered_clses = tf.concat(values=mask_ordered_clses,
                                               axis=0)
                mask_ordered_scores = tf.concat(values=mask_ordered_scores,
                                                axis=0)

                outputs['mask_final_clses'] = mask_ordered_clses
                outputs['mask_final_scores'] = mask_ordered_scores
            """Build mask rcnn head
        mask rcnn takes cropped features and generates masks for each classes. 
        m: an array of shape (28, 28, num_classes). Note that this value is before sigmoid.
        """
            m = mask_cropped_features
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv")
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv_1")
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv_2")
            m = slim.conv2d(m,
                            256, [3, 3],
                            stride=1,
                            padding='SAME',
                            activation_fn=tf.nn.relu,
                            scope="pyramid/Conv_3")
            m = slim.conv2d_transpose(m,
                                      256,
                                      2,
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu,
                                      scope="pyramid/Conv2d_transpose")
            tf.add_to_collection('__TRANSPOSED__', m)
            m = slim.conv2d(m,
                            num_classes, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None,
                            normalizer_fn=None,
                            scope="pyramid/Conv_4")

            outputs['mask_ordered_rois'] = mask_ordered_rois
            outputs['mask_cropped_features'] = mask_cropped_features
            outputs['mask_mask'] = m
            outputs['mask_final_mask'] = tf.nn.sigmoid(m)

            return outputs, py_scope, slim_scope
コード例 #9
0
def build_heads(pyramid,
                ih,
                iw,
                num_classes,
                base_anchors,
                is_training=False,
                gt_boxes=None):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    my_sigmoid = None
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('pyramid'):
            # for p in pyramid:
            outputs['rpn'] = {}
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i

                ## rpn head
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]
                rpn = slim.conv2d(pyramid[p],
                                  256, [3, 3],
                                  stride=1,
                                  activation_fn=tf.nn.relu,
                                  scope='%s/rpn' % p)
                box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
                cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

                anchor_scales = [2**(i - 2), 2**(i - 1), 2**(i)]
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                outputs['rpn'][p] = {
                    'box': box,
                    'cls': cls,
                    'anchor': all_anchors
                }

            ## gather all rois
            # print (outputs['rpn'])
            rpn_boxes = [
                tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_clses = [
                tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1])
                for p in range(5, 1, -1)
            ]
            rpn_anchors = [
                tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
            rpn_clses = tf.concat(values=rpn_clses, axis=0)
            rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

            outputs['rpn']['box'] = rpn_boxes
            outputs['rpn']['cls'] = rpn_clses
            outputs['rpn']['anchor'] = rpn_anchors
            # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}

            rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
            rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs,
                                                      rpn_anchors, ih, iw)
            # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
            rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                    sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

            # if is_training:
            #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
            #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)

            outputs['roi'] = {'box': rois, 'score': scores}

            ## cropping regions
            [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                    assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
            cropped_rois = []
            for i in range(5, 1, -1):
                p = 'P%d' % i
                splitted_rois = assigned_rois[i - 2]
                batch_inds = assigned_batch_inds[i - 2]
                cropped = ROIAlign(pyramid[p],
                                   splitted_rois,
                                   batch_inds,
                                   stride=2**i,
                                   pooled_height=14,
                                   pooled_width=14)
                cropped_rois.append(cropped)
            cropped_rois = tf.concat(values=cropped_rois, axis=0)

            outputs['roi']['cropped_rois'] = cropped_rois
            tf.add_to_collection('__CROPPED__', cropped_rois)

            ## refine head
            # to 7 x 7
            cropped_regions = slim.max_pool2d(cropped_rois, [3, 3],
                                              stride=2,
                                              padding='SAME')
            refine = slim.flatten(cropped_regions)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(
                refine,
                num_classes,
                activation_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.01))
            box = slim.fully_connected(
                refine,
                num_classes * 4,
                activation_fn=my_sigmoid,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.001))

            outputs['refined'] = {'box': box, 'cls': cls2}

            ## decode refine net outputs
            cls2_prob = tf.nn.softmax(cls2)
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2_prob, rois, ih, iw)

            ## for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes
                # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
                for i in range(5, 1, -1):
                    splitted_rois = assigned_rois[i - 2]
                    batch_inds = assigned_batch_inds[i - 2]
                    p = 'P%d' % i
                    cropped = ROIAlign(pyramid[p],
                                       splitted_rois,
                                       batch_inds,
                                       stride=2**i,
                                       pooled_height=14,
                                       pooled_width=14)
                    cropped_rois.append(cropped)
                cropped_rois = tf.concat(values=cropped_rois, axis=0)

            ## mask head
            m = cropped_rois
            for _ in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            # to 28 x 28
            m = slim.conv2d_transpose(m,
                                      256,
                                      2,
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            tf.add_to_collection('__TRANSPOSED__', m)
            m = slim.conv2d(m,
                            num_classes, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs['mask'] = {'mask': m, 'cls': classes, 'score': scores}

    return outputs
コード例 #10
0
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None):
  """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
  outputs = {}
  if _BN is True:
    arg_scope = _extra_conv_arg_scope_with_bn()
    # arg_scope = _extra_conv_arg_scope_with_bn(is_training=is_training)
  else:
    arg_scope = _extra_conv_arg_scope(activation_fn=tf.nn.relu)

  with slim.arg_scope(arg_scope):
    with tf.variable_scope('pyramid'):
        ### for p in pyramid
        outputs['rpn'] = {}
        for i in range(5, 1, -1):
          p = 'P%d'%i
          stride = 2 ** i
          
          ### rpn head
          shape = tf.shape(pyramid[p])
          height, width = shape[1], shape[2]
          rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p)
          box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=None, normalizer_fn=None)
          cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                  weights_initializer=tf.truncated_normal_initializer(stddev=0.01), activation_fn=None, normalizer_fn=None)

          anchor_scales = [2, 4, 8, 16, 32]#[2 **(i-2), 2 ** (i-1), 2 **(i)]
          print("anchor_scales = " , anchor_scales)
          all_anchors = gen_all_anchors(height, width, stride, anchor_scales)
          outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors}

        ### gather all rois
        rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)]  
        rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)]  
        rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
        rpn_clses = tf.concat(values=rpn_clses, axis=0)
        rpn_anchors = tf.concat(values=rpn_anchors, axis=0)
        
        rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
        rpn_final_boxes, rpn_final_clses, rpn_final_scores, indexs = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw)

        outputs['rpn']['P5']['index'] = indexs[0:(tf.shape(tf.reshape(outputs['rpn']['P5']['box'], [-1, 4]))[0])] 
        for i in range(4, 1, -1):
          p = 'P%d'%i
          outputs['rpn'][p]['index'] = indexs[outputs['rpn']['P%d'%(i+1)]['index'][-1] + 1 :outputs['rpn']['P%d'%(i+1)]['index'][-1] + 1 + tf.shape(tf.reshape(outputs['rpn']['P%d'%(i)]['box'], [-1, 4]))[0]] 

        outputs['rpn_boxes'] = rpn_boxes
        outputs['rpn_clses'] = rpn_clses
        outputs['rpn_anchor'] = rpn_anchors
        outputs['rpn_final_boxes'] = rpn_final_boxes
        outputs['rpn_final_clses'] = rpn_final_clses
        outputs['rpn_final_scores'] = rpn_final_scores
        outputs['rpn_indexs'] = indexs

        if is_training is True:
          ### for training, rcnn and maskrcnn take rpn boxes as inputs
          rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_indexs_to_rcnn, rpn_rois_to_mask, rpn_scores_to_mask, rpn_batch_inds_to_mask, rpn_indexs_to_mask = \
                sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, indexs, is_training=is_training, only_positive=False)
          # rcnn_rois, rcnn_scores, rcnn_batch_inds, rcnn_indexs, mask_rois, mask_scores, mask_batch_inds, mask_indexs = \
          #       sample_rpn_outputs_with_gt(rpn_final_boxes, rpn_final_scores, gt_boxes, indexs, is_training=is_training, only_positive=True)
        else:
          ### for testing, only rcnn takes rpn boxes as inputs. maskrcnn takes rcnn boxes as inputs
          rpn_rois_to_rcnn, rpn_scores_to_rcnn, rpn_batch_inds_to_rcnn, rpn_indexs_to_rcnn = sample_rpn_outputs(rpn_final_boxes, rpn_final_scores, indexs, only_positive=True)

        ### assign pyramid layer indexs to rcnn network's ROIs
        [rcnn_assigned_rois, rcnn_assigned_batch_inds, rcnn_assigned_indexs, rcnn_assigned_layer_inds] = \
                assign_boxes(rpn_rois_to_rcnn, [rpn_rois_to_rcnn, rpn_batch_inds_to_rcnn, rpn_indexs_to_rcnn], [2, 3, 4, 5])

        ### crop features from pyramid for rcnn network
        rcnn_cropped_features = []
        rcnn_ordered_rois = []
        rcnn_ordered_index = []
        for i in range(5, 1, -1):
            p = 'P%d'%i
            rcnn_splitted_roi = rcnn_assigned_rois[i-2]
            rcnn_batch_ind = rcnn_assigned_batch_inds[i-2]
            rcnn_index = rcnn_assigned_indexs[i-2]
            rcnn_cropped_feature, rcnn_rois_to_crop_and_resize, rcnn_py_shape, rcnn_ihiw = ROIAlign(pyramid[p], rcnn_splitted_roi, rcnn_batch_ind, ih, iw, stride=2**i,
                               pooled_height=14, pooled_width=14)
            rcnn_cropped_features.append(rcnn_cropped_feature)
            rcnn_ordered_rois.append(rcnn_splitted_roi)
            rcnn_ordered_index.append(rcnn_index)
            
        rcnn_cropped_features = tf.concat(values=rcnn_cropped_features, axis=0)
        rcnn_ordered_rois = tf.concat(values=rcnn_ordered_rois, axis=0)
        rcnn_ordered_index = tf.concat(values=rcnn_ordered_index, axis=0)

        ### rcnn head
        # to 7 x 7
        rcnn = slim.max_pool2d(rcnn_cropped_features, [3, 3], stride=2, padding='SAME')
        rcnn = slim.flatten(rcnn)
        rcnn = slim.fully_connected(rcnn, 1024, activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn = slim.dropout(rcnn, keep_prob=0.75, is_training=is_training)
        rcnn = slim.fully_connected(rcnn,  1024, activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn = slim.dropout(rcnn, keep_prob=0.75, is_training=is_training)
        rcnn_clses = slim.fully_connected(rcnn, num_classes, activation_fn=None, normalizer_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn_boxes = slim.fully_connected(rcnn, num_classes*4, activation_fn=None, normalizer_fn=None, 
                weights_initializer=tf.truncated_normal_initializer(stddev=0.001))
        rcnn_scores = tf.nn.softmax(rcnn_clses)

        ### decode rcnn network final outputs
        rcnn_final_boxes, rcnn_final_classes, rcnn_final_scores = roi_decoder(rcnn_boxes, rcnn_scores, rcnn_ordered_rois, ih, iw)

        outputs['rcnn_ordered_rois'] = rcnn_ordered_rois
        outputs['rcnn_ordered_index'] = rcnn_ordered_index
        outputs['rcnn_cropped_features'] = rcnn_cropped_features
        tf.add_to_collection('__CROPPED__', rcnn_cropped_features)
        outputs['rcnn_boxes'] = rcnn_boxes
        outputs['rcnn_clses'] = rcnn_clses
        outputs['rcnn_scores'] = rcnn_scores
        outputs['rcnn_final_boxes'] = rcnn_final_boxes
        outputs['rcnn_final_clses'] = rcnn_final_classes
        outputs['rcnn_final_scores'] = rcnn_final_scores
        
        ### assign pyramid layer indexs to mask network's ROIs
        if is_training:
          [mask_assigned_rois, mask_assigned_batch_inds, mask_assigned_indexs, mask_assigned_layer_inds] = \
               assign_boxes(rpn_rois_to_mask, [rpn_rois_to_mask, rpn_batch_inds_to_mask, rpn_indexs_to_mask], [2, 3, 4, 5])

          mask_cropped_features = []
          mask_ordered_rois = []
          mask_ordered_indexs = []
          ### crop features from pyramid for mask network
          for i in range(5, 1, -1):
              p = 'P%d'%i
              mask_splitted_roi = mask_assigned_rois[i-2]
              mask_batch_ind = mask_assigned_batch_inds[i-2]
              mask_index = mask_assigned_indexs[i-2]
              mask_cropped_feature, mask_rois_to_crop_and_resize, mask_py_shape, mask_ihiw = ROIAlign(pyramid[p], mask_splitted_roi, mask_batch_ind, ih, iw, stride=2**i,
                                 pooled_height=14, pooled_width=14)
              mask_cropped_features.append(mask_cropped_feature)
              mask_ordered_rois.append(mask_splitted_roi)
              mask_ordered_indexs.append(mask_index)
              
          mask_cropped_features = tf.concat(values=mask_cropped_features, axis=0)
          mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)
          mask_ordered_indexs = tf.concat(values=mask_ordered_indexs, axis=0)

        else:
          ### for testing, mask network takes rcnn boxes as inputs
          rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask, rcnn_indexs_to_mask = sample_rcnn_outputs(rcnn_final_boxes, rcnn_final_classes, rcnn_scores, rcnn_ordered_index) 
          # mask_rois, mask_clses, mask_scores, mask_batch_inds, mask_indexs = sample_rcnn_outputs(rcnn_final_boxes, rcnn_final_classes, rcnn_scores, rcnn_ordered_index) 
          [mask_assigned_rois, mask_assigned_clses, mask_assigned_scores, mask_assigned_batch_inds, mask_assign_indexs, mask_assigned_layer_inds] =\
               assign_boxes(rcnn_rois_to_mask, [rcnn_rois_to_mask, rcnn_clses_to_mask, rcnn_scores_to_mask, rcnn_batch_inds_to_mask, rcnn_indexs_to_mask], [2, 3, 4, 5])

          mask_cropped_features = []
          mask_ordered_rois = []
          mask_ordered_indexs = []
          mask_ordered_clses = []
          mask_ordered_scores = []
          for i in range(5, 1, -1):
            p = 'P%d'%i
            mask_splitted_roi = mask_assigned_rois[i-2]
            mask_splitted_cls = mask_assigned_clses[i-2]
            mask_splitted_score = mask_assigned_scores[i-2]
            mask_batch_ind = mask_assigned_batch_inds[i-2]
            mask_index = mask_assign_indexs[i-2]
            mask_cropped_feature, mask_rois_to_crop_and_resize, mask_py_shape, mask_ihiw = ROIAlign(pyramid[p], mask_splitted_roi, mask_batch_ind, ih, iw, stride=2**i,
                               pooled_height=14, pooled_width=14)
            mask_cropped_features.append(mask_cropped_feature)
            mask_ordered_rois.append(mask_splitted_roi)
            mask_ordered_indexs.append(mask_index)
            mask_ordered_clses.append(mask_splitted_cls)
            mask_ordered_scores.append(mask_splitted_score)

          mask_cropped_features = tf.concat(values=mask_cropped_features, axis=0)
          mask_ordered_rois = tf.concat(values=mask_ordered_rois, axis=0)
          mask_ordered_indexs = tf.concat(values=mask_ordered_indexs, axis=0)
          mask_ordered_clses = tf.concat(values=mask_ordered_clses, axis=0)
          mask_ordered_scores = tf.concat(values=mask_ordered_scores, axis=0)

          outputs['mask_final_clses'] = mask_ordered_clses
          outputs['mask_final_scores'] = mask_ordered_scores

        ### mask head
        m = mask_cropped_features
        for _ in range(4):
            m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu)
        # to 28 x 28
        m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu)
        tf.add_to_collection('__TRANSPOSED__', m)
        m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None, normalizer_fn=None)

        outputs['mask_ordered_rois'] = mask_ordered_rois
        outputs['mask_ordered_indexs'] = mask_ordered_indexs
        outputs['mask_cropped_features'] = mask_cropped_features 
        outputs['mask_mask'] = m
        outputs['mask_final_mask'] = tf.nn.sigmoid(m)
          
        return outputs
コード例 #11
0
def build_heads(pyramid,
                ih,
                iw,
                num_classes,
                base_anchors,
                is_training=False,
                gt_boxes=None):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer, decode the output into rois 
    3. Sample rois 
    4. Build roi layer
    5. Process the results of roi layer, decode the output into boxes
    6. Build the mask layer
    7. Build losses
  """
    outputs = {}
    #arg_scope = _extra_conv_arg_scope(activation_fn=None)
    arg_scope = _extra_conv_arg_scope_with_bn(activation_fn=None)
    my_sigmoid = None
    with slim.arg_scope(arg_scope):
        with tf.variable_scope('pyramid'):
            # for p in pyramid:
            outputs['rpn'] = {}
            for i in range(5, 1, -1):
                p = 'P%d' % i
                stride = 2**i

                ## rpn head
                shape = tf.shape(pyramid[p])
                height, width = shape[1], shape[2]
                rpn = slim.conv2d(pyramid[p],
                                  256, [3, 3],
                                  stride=1,
                                  activation_fn=tf.nn.relu,
                                  scope='%s/rpn' % p)
                box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid)
                cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \
                        weights_initializer=tf.truncated_normal_initializer(stddev=0.01))

                anchor_scales = [2**(i - 2), 2**(i - 1), 2**(i)]
                print("anchor_scales = ", anchor_scales)
                all_anchors = gen_all_anchors(height, width, stride,
                                              anchor_scales)
                outputs['rpn'][p] = {
                    'box': box,
                    'cls': cls,
                    'anchor': all_anchors
                }

            ## gather all rois
            # print (outputs['rpn'])
            rpn_boxes = [
                tf.reshape(outputs['rpn']['P%d' % p]['box'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_clses = [
                tf.reshape(outputs['rpn']['P%d' % p]['cls'], [-1, 1])
                for p in range(5, 1, -1)
            ]
            rpn_anchors = [
                tf.reshape(outputs['rpn']['P%d' % p]['anchor'], [-1, 4])
                for p in range(5, 1, -1)
            ]
            rpn_boxes = tf.concat(values=rpn_boxes, axis=0)
            rpn_clses = tf.concat(values=rpn_clses, axis=0)
            rpn_anchors = tf.concat(values=rpn_anchors, axis=0)

            outputs['rpn']['box'] = rpn_boxes
            outputs['rpn']['cls'] = rpn_clses
            outputs['rpn']['anchor'] = rpn_anchors
            # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors}

            rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2]))
            rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs,
                                                      rpn_anchors, ih, iw)
            # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1])
            rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \
                    sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training)

            # if is_training:
            #     # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes)
            #     rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2)

            outputs['roi'] = {'box': rois, 'score': scores}

            ## cropping regions
            [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                    assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])

            outputs['assigned_rois'] = assigned_rois
            outputs['assigned_layer_inds'] = assigned_layer_inds

            cropped_rois = []
            ordered_rois = []
            pyramid_feature = []
            for i in range(5, 1, -1):
                print(i)
                p = 'P%d' % i
                splitted_rois = assigned_rois[i - 2]
                batch_inds = assigned_batch_inds[i - 2]

                cropped, boxes_in_crop = ROIAlign_(pyramid[p],
                                                   splitted_rois,
                                                   batch_inds,
                                                   ih,
                                                   iw,
                                                   stride=2**i,
                                                   pooled_height=14,
                                                   pooled_width=14)
                # cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i,
                #                    pooled_height=14, pooled_width=14)
                cropped_rois.append(cropped)
                ordered_rois.append(splitted_rois)
                pyramid_feature.append(tf.transpose(pyramid[p], [0, 3, 1, 2]))
                # if i is 5:
                #     outputs['tmp_0'] = tf.transpose(pyramid[p],[0,3,1,2])
                #     outputs['tmp_1'] = splitted_rois
                #     outputs['tmp_2'] = tf.transpose(cropped,[0,3,1,2])
                #     outputs['tmp_3'] = boxes_in_crop
                #     outputs['tmp_4'] = [ih, iw]

            cropped_rois = tf.concat(values=cropped_rois, axis=0)
            ordered_rois = tf.concat(values=ordered_rois, axis=0)

            outputs['ordered_rois'] = ordered_rois
            outputs['pyramid_feature'] = pyramid_feature

            outputs['roi']['cropped_rois'] = cropped_rois
            tf.add_to_collection('__CROPPED__', cropped_rois)

            ## refine head
            # to 7 x 7
            cropped_regions = slim.max_pool2d(cropped_rois, [3, 3],
                                              stride=2,
                                              padding='SAME')
            refine = slim.flatten(cropped_regions)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(
                refine,
                num_classes,
                activation_fn=None,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.05))
            box = slim.fully_connected(
                refine,
                num_classes * 4,
                activation_fn=my_sigmoid,
                weights_initializer=tf.truncated_normal_initializer(
                    stddev=0.05))

            outputs['refined'] = {'box': box, 'cls': cls2}

            ## decode refine net outputs
            cls2_prob = tf.nn.softmax(cls2)
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2_prob, ordered_rois, ih, iw)

            #outputs['tmp_0'] = ordered_rois
            #outputs['tmp_1'] = assigned_rois
            #outputs['tmp_2'] = box
            #outputs['tmp_3'] = final_boxes
            #outputs['tmp_4'] = cls2_prob

            #outputs['final_boxes'] = {'box': final_boxes, 'cls': classes}
            outputs['final_boxes'] = {
                'box': final_boxes,
                'cls': classes,
                'prob': cls2_prob
            }
            ## for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes
                # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \
                #       assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5])
                for i in range(5, 1, -1):
                    p = 'P%d' % i
                    splitted_rois = assigned_rois[i - 2]
                    batch_inds = assigned_batch_inds[i - 2]
                    cropped = ROIAlign(pyramid[p],
                                       splitted_rois,
                                       batch_inds,
                                       stride=2**i,
                                       pooled_height=14,
                                       pooled_width=14)
                    cropped_rois.append(cropped)
                    ordered_rois.append(splitted_rois)
                cropped_rois = tf.concat(values=cropped_rois, axis=0)
                ordered_rois = tf.concat(values=ordered_rois, axis=0)

            ## mask head
            ms = []
            m = cropped_rois
            for _ in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            # to 28 x 28

            m = slim.conv2d_transpose(m,
                                      256,
                                      2,
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            batch_size = 0
            # # m_shape = m.get_shape().as_list()
            # # m_take_shape = m_shape[1:]
            # # m = tf.reshape(m, shape=[batch_size] + m_take_shape)
            # m = tf.reshape(m, shape=[batch_size, 28, 28, 256])
            tf.add_to_collection('__TRANSPOSED__', m)
            # # print('m ', m.shape)
            ### add the capsule block between the convolutional layers
            with tf.variable_scope('PrimaryCaps_layer'):
                primaryCaps, activation = capslayer.layers.primaryCaps(
                    m,
                    filters=32,
                    kernel_size=3,
                    strides=2,
                    out_caps_shape=[8, 1],
                    padding='SAME')  # return [batch_size, 10,10,32, 8,1]

            with tf.variable_scope('fc_reshape_Caps_layer'):
                fc_Caps, activation = capslayer.layers.fully_connected(
                    primaryCaps,
                    activation,
                    num_outputs=7 * 7 * 1,
                    out_caps_shape=[8, 1],
                    routing_method='DynamicRouting')
                fc_Caps = tf.reshape(fc_Caps,
                                     shape=[batch_size, 7, 7, 1, 8, 1])

            with tf.variable_scope('dePrimaryCaps_layer'):
                output = capslayer.layers.dePrimaryCaps(
                    fc_Caps,
                    activation,
                    num_outputs=128,
                    kernel_size=3,
                    strides=2
                )  # [batch, 16, 16, ngf * 8 ] => [batch, 4, 4, ngf * 8]
                tf.layers.conv2d_transpose(output,
                                           256,
                                           kernel_size=9,
                                           strides=2)

            m = slim.conv2d(m,
                            num_classes, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs['mask'] = {'mask': m, 'cls': classes, 'score': scores}

    return outputs
コード例 #12
0
def build_head(pyramid, num_classes, base_anchors, is_training=False):
    """Build the 3-way outputs, i.e., class, box and mask in the pyramid
  Algo
  ----
  For each layer:
    1. Build anchor layer
    2. Process the results of anchor layer
    3. Build roi layer
    4. Process the results of roi layer
    5. Build the mask layer
    6. Build losses
  """
    outputs = {}
    inshape = pyramid['inputs'].get_shape()
    ih, iw = inshape[1].value, inshape[2].value
    arg_scope = _extra_conv_arg_scope(activation_fn=None)
    with slim.arg_scope(arg_scope):
        # for p in pyramid:
        for i in range(5, 1, -1):
            p = 'P%d' % i
            stride = 2**i
            outputs[p] = {}

            # rpn head
            height, width = pyramid[p].get_shape(
            )[1].value, pyramid[p].get_shape()[1].value
            rpn = slim.conv2d(pyramid[p],
                              256, [3, 3],
                              stride=1,
                              activation_fn=tf.nn.relu,
                              scope='%s/rpn' % p)
            box = slim.conv2d(rpn,
                              num_classes * base_anchors * 4, [1, 1],
                              stride=1,
                              scope='%s/rpn/box' % p)
            cls = slim.conv2d(rpn,
                              num_classes * base_anchors * 2, [1, 1],
                              stride=1,
                              scope='%s/rpn/cls' % p)
            outputs[p]['rpn'] = {'box': box, 'classes': cls}

            # decode, sample and crop
            all_anchors = gen_all_anchors(height, width, stride)
            rois, classes, scores = \
                      anchor_decoder(box, cls, all_anchors, ih, iw)
            rois, class_ids, scores = sample_rpn_outputs(rois, scores)
            cropped = ROIAlign(
                pyramid[p],
                rois,
                False,
                stride=2**i,
                pooled_height=7,
                pooled_width=7,
            )

            # refine head
            refine = slim.fully_connected(cropped,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            refine = slim.fully_connected(refine,
                                          1024,
                                          activation_fn=tf.nn.relu)
            refine = slim.dropout(refine,
                                  keep_prob=0.75,
                                  is_training=is_training)
            cls2 = slim.fully_connected(refine,
                                        num_classes,
                                        activation_fn=None)
            box = slim.fully_connected(refine,
                                       num_classes * 4,
                                       activation_fn=None)
            outputs[p]['refined'] = {'box': box, 'classes': cls2}

            # decode refine net outputs
            final_boxes, classes, scores = \
                    roi_decoder(box, cls2, rois, ih, iw)

            # for testing, maskrcnn takes refined boxes as inputs
            if not is_training:
                rois = final_boxes

            # mask head
            # rois, class_ids, scores = sample_rpn_outputs(rois, scores)
            m = ROIAlign(pyramid[p],
                         rois,
                         False,
                         stride=2**i,
                         pooled_height=14,
                         pooled_width=14)
            for i in range(4):
                m = slim.conv2d(m,
                                256, [3, 3],
                                stride=1,
                                padding='SAME',
                                activation_fn=tf.nn.relu)
            m = slim.conv2d_transpose(m,
                                      256, [2, 2],
                                      stride=2,
                                      padding='VALID',
                                      activation_fn=tf.nn.relu)
            m = slim.conv2d(m,
                            81, [1, 1],
                            stride=1,
                            padding='VALID',
                            activation_fn=None)

            # add a mask, given the predicted boxes and classes
            outputs[p]['mask'] = {
                'mask': m,
                'classes': classes,
                'scores': scores
            }

    return outputs