Ejemplo n.º 1
0
 def _squeeze_excitation(self, input, num_channels, name=None):
     mixed_precision_enabled = mixed_precision_global_state() is not None
     pool = fluid.layers.pool2d(
         input=input,
         pool_size=0,
         pool_type='avg',
         global_pooling=True,
         use_cudnn=mixed_precision_enabled)
     stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0)
     squeeze = fluid.layers.fc(
         input=pool,
         size=int(num_channels / self.reduction_ratio),
         act='relu',
         param_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Uniform(-stdv, stdv),
             name=name + '_sqz_weights'),
         bias_attr=ParamAttr(name=name + '_sqz_offset'))
     stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0)
     excitation = fluid.layers.fc(
         input=squeeze,
         size=num_channels,
         act='sigmoid',
         param_attr=fluid.param_attr.ParamAttr(
             initializer=fluid.initializer.Uniform(-stdv, stdv),
             name=name + '_exc_weights'),
         bias_attr=ParamAttr(name=name + '_exc_offset'))
     scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0)
     return scale
Ejemplo n.º 2
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']

        mixed_precision_enabled = mixed_precision_global_state() is not None

        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        body_feats = self.backbone(im)

        if isinstance(body_feats, OrderedDict):
            body_feat_names = list(body_feats.keys())
            body_feats = [body_feats[name] for name in body_feat_names]

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]

        if mode == 'train':
            gt_box = feed_vars['gt_box']
            gt_label = feed_vars['gt_label']
            gt_score = feed_vars['gt_score']

            return {
                'loss': self.yolo_head.get_loss(body_feats, gt_box, gt_label,
                                                gt_score)
            }
        else:
            im_size = feed_vars['im_size']
            return self.yolo_head.get_prediction(body_feats, im_size)
Ejemplo n.º 3
0
    def __call__(self, roi_feat):
        fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]

        mixed_precision_enabled = mixed_precision_global_state() is not None

        if mixed_precision_enabled:
            roi_feat = fluid.layers.cast(roi_feat, 'float16')

        fc6 = fluid.layers.fc(input=roi_feat,
                              size=self.mlp_dim,
                              act='relu',
                              name='fc6',
                              param_attr=ParamAttr(
                                  name='fc6_w',
                                  initializer=Xavier(fan_out=fan)),
                              bias_attr=ParamAttr(name='fc6_b',
                                                  learning_rate=2.,
                                                  regularizer=L2Decay(0.)))
        head_feat = fluid.layers.fc(input=fc6,
                                    size=self.mlp_dim,
                                    act='relu',
                                    name='fc7',
                                    param_attr=ParamAttr(name='fc7_w',
                                                         initializer=Xavier()),
                                    bias_attr=ParamAttr(
                                        name='fc7_b',
                                        learning_rate=2.,
                                        regularizer=L2Decay(0.)))

        if mixed_precision_enabled:
            head_feat = fluid.layers.cast(head_feat, 'float32')

        return head_feat
Ejemplo n.º 4
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        if mode == 'train':
            gt_labels = feed_vars['gt_label']
            gt_targets = feed_vars['gt_target']
            fg_num = feed_vars['fg_num']
        else:
            im_info = feed_vars['im_info']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')
        body_feats = self.backbone(im)
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(f, 'float32') for f in body_feats]
        body_feats = self.fpn(body_feats)

        # XXX not used for training, but the parameters are needed when
        # exporting inference model
        anchors = self.anchor_grid()

        if mode == 'train':
            loss = self.efficient_head.get_loss(body_feats, gt_labels,
                                                gt_targets, fg_num)
            loss_cls = loss['loss_cls']
            loss_bbox = loss['loss_bbox']
            total_loss = loss_cls + self.box_loss_weight * loss_bbox
            loss.update({'loss': total_loss})
            return loss
        else:
            pred = self.efficient_head.get_prediction(body_feats, anchors,
                                                      im_info)
            return pred
Ejemplo n.º 5
0
    def build(self, feed_vars, mode='train', exclude_nms=False):
        im = feed_vars['image']

        mixed_precision_enabled = mixed_precision_global_state() is not None

        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        body_feats = self.backbone(im)

        if isinstance(body_feats, OrderedDict):
            body_feat_names = list(body_feats.keys())
            body_feats = [body_feats[name] for name in body_feat_names]

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]

        predict_hm, predict_wh = self.ttf_head.get_output(
            body_feats, 'ttf_head', is_test=mode == 'test')
        if mode == 'train':
            heatmap = feed_vars['ttf_heatmap']
            box_target = feed_vars['ttf_box_target']
            reg_weight = feed_vars['ttf_reg_weight']
            loss = self.ttf_head.get_loss(predict_hm, predict_wh, heatmap,
                                          box_target, reg_weight)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            results = self.ttf_head.get_bboxes(predict_hm, predict_wh,
                                               feed_vars['scale_factor'])
            return results
Ejemplo n.º 6
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        if mode == 'train' or mode == 'eval':
            gt_bbox = feed_vars['gt_bbox']
            gt_class = feed_vars['gt_class']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        if isinstance(body_feats, OrderedDict):
            body_feat_names = list(body_feats.keys())
            body_feats = [body_feats[name] for name in body_feat_names]

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]

        locs, confs, box, box_var = self.multi_box_head(
            inputs=body_feats, image=im, num_classes=self.num_classes)

        if mode == 'train':
            loss = fluid.layers.ssd_loss(locs, confs, gt_bbox, gt_class, box,
                                         box_var)
            loss = fluid.layers.reduce_sum(loss)
            return {'loss': loss}
        else:
            pred = self.output_decoder(locs, confs, box, box_var)
            return {'bbox': pred}
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        im_info = feed_vars['im_info']
        if mode == 'train':
            gt_bbox = feed_vars['gt_bbox']
            gt_class = feed_vars['gt_class']
            is_crowd = feed_vars['is_crowd']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        body_feats, spatial_scale = self.fpn.get_output(body_feats)

        # retinanet head
        if mode == 'train':
            loss = self.retina_head.get_loss(body_feats, spatial_scale, im_info,
                                             gt_bbox, gt_class, is_crowd)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            pred = self.retina_head.get_prediction(body_feats, spatial_scale,
                                                   im_info)
            return pred
Ejemplo n.º 8
0
    def depthwise_separable(self,
                            input,
                            num_filters1,
                            num_filters2,
                            num_groups,
                            stride,
                            scale,
                            name=None):
        mixed_precision_enabled = mixed_precision_global_state() is not None
        depthwise_conv = self._conv_norm(input=input,
                                         filter_size=3,
                                         num_filters=int(num_filters1 * scale),
                                         stride=stride,
                                         padding=1,
                                         num_groups=int(num_groups * scale),
                                         use_cudnn=mixed_precision_enabled,
                                         name=name + "_dw")

        pointwise_conv = self._conv_norm(input=depthwise_conv,
                                         filter_size=1,
                                         num_filters=int(num_filters2 * scale),
                                         stride=1,
                                         padding=0,
                                         name=name + "_sep")
        return pointwise_conv
Ejemplo n.º 9
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']

        mixed_precision_enabled = mixed_precision_global_state() is not None

        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        body_feats = self.backbone(im)

        if self.fpn is not None:
            body_feats, spatial_scale = self.fpn.get_output(body_feats)

        if isinstance(body_feats, OrderedDict):
            body_feat_names = list(body_feats.keys())
            body_feats = [body_feats[name] for name in body_feat_names]

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]

        mask_feat_pred = self.mask_head.get_output(body_feats)

        if mode == 'train':
            ins_labels = []
            cate_labels = []
            grid_orders = []
            fg_num = feed_vars['fg_num']

            for i in range(self.num_level):
                ins_label = 'ins_label{}'.format(i)
                if ins_label in feed_vars:
                    ins_labels.append(feed_vars[ins_label])
                cate_label = 'cate_label{}'.format(i)
                if cate_label in feed_vars:
                    cate_labels.append(feed_vars[cate_label])
                grid_order = 'grid_order{}'.format(i)
                if grid_order in feed_vars:
                    grid_orders.append(feed_vars[grid_order])

            cate_preds, kernel_preds = self.bbox_head.get_outputs(body_feats)

            losses = self.bbox_head.get_loss(cate_preds, kernel_preds,
                                             mask_feat_pred, ins_labels,
                                             cate_labels, grid_orders, fg_num)
            total_loss = fluid.layers.sum(list(losses.values()))
            losses.update({'loss': total_loss})
            return losses
        else:
            im_info = feed_vars['im_info']
            outs = self.bbox_head.get_outputs(body_feats, is_eval=True)
            seg_inputs = outs + (mask_feat_pred, im_info)
            return self.bbox_head.get_prediction(*seg_inputs)
Ejemplo n.º 10
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        body_feats, spatial_scale = self.fpn.get_output(body_feats)

        # MaskFeatHead。 [bs, 256, s4, s4]   掩码原型
        mask_feats = self.mask_feat_head.get_mask_feats(body_feats)

        # SOLOv2Head
        if mode == 'train':
            # kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid],  每个格子的预测卷积核。      从 小感受野 到 大感受野。
            # cls_preds里每个元素形状是   [N,  80, seg_num_grid, seg_num_grid],  每个格子的预测概率,未进行sigmoid()激活。  从 小感受野 到 大感受野。
            kernel_preds, cls_preds = self.solo_head.get_prediction(body_feats,
                                                                    eval=False)
            gt_objs = []
            gt_clss = []
            gt_masks = []
            gt_pos_idx = []
            for i in range(len(self.solo_head.strides)):
                gt_objs.append(feed_vars['layer%d_gt_objs' % i])
                gt_clss.append(feed_vars['layer%d_gt_clss' % i])
                gt_masks.append(feed_vars['layer%d_gt_masks' % i])
                gt_pos_idx.append(feed_vars['layer%d_gt_pos_idx' % i])
            loss = self.solo_head.get_loss(kernel_preds, cls_preds, mask_feats,
                                           gt_objs, gt_clss, gt_masks,
                                           gt_pos_idx)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            ori_shape = feed_vars['ori_shape']
            resize_shape = feed_vars['resize_shape']

            # kernel_preds里每个元素形状是[N, 256, seg_num_grid, seg_num_grid],  每个格子的预测卷积核。      从 小感受野 到 大感受野。
            # cls_preds里每个元素形状是   [N, seg_num_grid, seg_num_grid,  80],  每个格子的预测概率,已进行sigmoid()激活。  从 小感受野 到 大感受野。
            kernel_preds, cls_preds = self.solo_head.get_prediction(body_feats,
                                                                    eval=True)
            pred = self.solo_head.get_seg(kernel_preds, cls_preds, mask_feats,
                                          ori_shape, resize_shape)
            return pred
Ejemplo n.º 11
0
    def build(self, feed_vars, mode='train'):
        print('build-----------------------------------------------')
        [
            'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd',
            'gt_segm'
        ]
        if mode == 'train':
            required_fields = [
                'image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd',
                'gt_segm'
            ]
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)
        im = feed_vars['image']
        im_info = feed_vars['im_info']
        if mode == 'train':
            gt_box = feed_vars['gt_bbox']
            gt_class = feed_vars['gt_class']
            gt_segm = feed_vars['gt_segm']
            is_crowd = feed_vars['is_crowd']
            gt_num = feed_vars['gt_num']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        body_feats, spatial_scale = self.fpn.get_output(body_feats)

        print(type(self.yolact_head))

        # retinanet head
        if mode == 'train':
            loss = self.yolact_head.get_loss(body_feats, spatial_scale,
                                             im_info, gt_box, gt_class,
                                             gt_segm, is_crowd, gt_num)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            pred = self.yolact_head.get_prediction(body_feats, spatial_scale,
                                                   im_info)
            return pred
Ejemplo n.º 12
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']
        im_info = feed_vars['im_info']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        body_feats, spatial_scale = self.fpn.get_output(body_feats)

        # fcosnet head
        if mode == 'train':
            tag_labels = []
            tag_bboxes = []
            tag_centerness = []
            for i in range(len(self.fcos_head.fpn_stride)):
                # reg_target, labels, scores, centerness
                k_lbl = 'labels{}'.format(i)
                if k_lbl in feed_vars:
                    tag_labels.append(feed_vars[k_lbl])
                k_box = 'reg_target{}'.format(i)
                if k_box in feed_vars:
                    tag_bboxes.append(feed_vars[k_box])
                k_ctn = 'centerness{}'.format(i)
                if k_ctn in feed_vars:
                    tag_centerness.append(feed_vars[k_ctn])
            # tag_labels, tag_bboxes, tag_centerness
            loss = self.fcos_head.get_loss(body_feats, tag_labels, tag_bboxes,
                                           tag_centerness)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            pred = self.fcos_head.get_prediction(body_feats, im_info)
            return pred
Ejemplo n.º 13
0
 def get_add_weights(self, inputs):
     """use fully connected layers to generate add_weights"""
     # 1. avg_pool
     mixed_precision_enabled = mixed_precision_global_state() is not None
     xs = [
         fluid.layers.pool2d(input=x,
                             pool_size=0,
                             pool_type='avg',
                             global_pooling=True,
                             use_cudnn=mixed_precision_enabled)
         for x in inputs
     ]
     xs = fluid.layers.concat(xs, axis=1)
     add_weights = self.squeeze_excitation(xs,
                                           self.num_channels * len(inputs))
     add_weights = fluid.layers.reshape(
         add_weights, (len(inputs), -1, self.num_channels))
     add_weights = fluid.layers.softmax(add_weights, axis=0)
     return add_weights
Ejemplo n.º 14
0
    def build(self, feed_vars, mode='train', exclude_nms=False):
        im = feed_vars['image']

        mixed_precision_enabled = mixed_precision_global_state() is not None

        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        body_feats = self.backbone(im)

        if isinstance(body_feats, OrderedDict):
            body_feat_names = list(body_feats.keys())
            body_feats = [body_feats[name] for name in body_feat_names]

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]

        if mode == 'train':
            gt_bbox = feed_vars['gt_bbox']
            gt_class = feed_vars['gt_class']
            gt_score = feed_vars['gt_score']

            # Get targets for splited yolo loss calculation
            num_output_layer = len(self.yolo_head.anchor_masks)
            targets = []
            for i in range(num_output_layer):
                k = 'target{}'.format(i)
                if k in feed_vars:
                    targets.append(feed_vars[k])

            loss = self.yolo_head.get_loss(body_feats, gt_bbox, gt_class,
                                           gt_score, targets)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            im_size = feed_vars['im_size']
            # exclude_nms only for benchmark, postprocess(NMS) is not needed
            return self.yolo_head.get_prediction(
                body_feats, im_size, exclude_nms=exclude_nms)
Ejemplo n.º 15
0
    def build(self, feed_vars, mode='train'):
        im = feed_vars['image']

        mixed_precision_enabled = mixed_precision_global_state() is not None

        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        body_feats = self.backbone(im)

        if isinstance(body_feats, OrderedDict):
            body_feat_names = list(body_feats.keys())
            body_feats = [body_feats[name] for name in body_feat_names]

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]

        if mode == 'train':
            gt_bbox = feed_vars['gt_bbox']
            gt_class = feed_vars['gt_class']
            gt_score = feed_vars['gt_score']

            # Get targets for splited yolo loss calculation
            # YOLOv3 supports up to 3 output layers currently
            targets = []
            for i in range(3):
                k = 'target{}'.format(i)
                if k in feed_vars:
                    targets.append(feed_vars[k])

            loss = self.yolo_head.get_loss(body_feats, gt_bbox, gt_class,
                                           gt_score, targets)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            im_size = feed_vars['im_size']
            return self.yolo_head.get_prediction(body_feats, im_size)
Ejemplo n.º 16
0
    def get_add_weights(self, inputs):
        # get channel weights
        mixed_precision_enabled = mixed_precision_global_state() is not None
        cha_weights = [
            fluid.layers.pool2d(input=x,
                                pool_size=0,
                                pool_type='avg',
                                global_pooling=True,
                                use_cudnn=mixed_precision_enabled)
            for x in inputs
        ]
        cha_weights = fluid.layers.concat(cha_weights, axis=1)
        cha_weights = self.squeeze_excitation(cha_weights,
                                              self.num_channels * len(inputs))
        # (n, 4c) --> (n, 4, c)
        cha_weights = fluid.layers.reshape(
            cha_weights, (-1, len(inputs), self.num_channels))
        # (n, 4, c) --> (4, n, c)
        cha_weights = fluid.layers.transpose(cha_weights, [1, 0, 2])
        cha_weights = fluid.layers.softmax(cha_weights, axis=0)

        # get spatial weights
        spa_weights = fluid.layers.concat(inputs, axis=1)
        spa_weights = fluid.layers.conv2d(
            spa_weights,
            len(inputs),
            filter_size=1,
            param_attr=ParamAttr(name='spafuse_weights'),
            bias_attr=ParamAttr(name='spafuse_bias'))
        # (n, 4, h, w) --> (n, 4, 1, h, w)
        spa_weights = fluid.layers.unsqueeze(spa_weights, axes=[2])
        # (n, 4, 1, h, w) --> (4, n, 1, h, w)
        spa_weights = fluid.layers.transpose(spa_weights, [1, 0, 2, 3, 4])
        spa_weights = fluid.layers.softmax(spa_weights, axis=0)

        return spa_weights, cha_weights
    def BlazeBlock(self,
                   input,
                   in_channels,
                   out_channels,
                   double_channels=None,
                   stride=1,
                   use_5x5kernel=True,
                   name=None):
        assert stride in [1, 2]
        use_pool = not stride == 1
        use_double_block = double_channels is not None
        act = 'relu' if use_double_block else None
        mixed_precision_enabled = mixed_precision_global_state() is not None

        if use_5x5kernel:
            conv_dw = self._conv_norm(
                input=input,
                filter_size=5,
                num_filters=in_channels,
                stride=stride,
                padding=2,
                num_groups=in_channels,
                use_cudnn=mixed_precision_enabled,
                name=name + "1_dw")
        else:
            conv_dw_1 = self._conv_norm(
                input=input,
                filter_size=3,
                num_filters=in_channels,
                stride=1,
                padding=1,
                num_groups=in_channels,
                use_cudnn=mixed_precision_enabled,
                name=name + "1_dw_1")
            conv_dw = self._conv_norm(
                input=conv_dw_1,
                filter_size=3,
                num_filters=in_channels,
                stride=stride,
                padding=1,
                num_groups=in_channels,
                use_cudnn=mixed_precision_enabled,
                name=name + "1_dw_2")

        conv_pw = self._conv_norm(
            input=conv_dw,
            filter_size=1,
            num_filters=out_channels,
            stride=1,
            padding=0,
            act=act,
            name=name + "1_sep")

        if use_double_block:
            if use_5x5kernel:
                conv_dw = self._conv_norm(
                    input=conv_pw,
                    filter_size=5,
                    num_filters=out_channels,
                    stride=1,
                    padding=2,
                    use_cudnn=mixed_precision_enabled,
                    name=name + "2_dw")
            else:
                conv_dw_1 = self._conv_norm(
                    input=conv_pw,
                    filter_size=3,
                    num_filters=out_channels,
                    stride=1,
                    padding=1,
                    num_groups=out_channels,
                    use_cudnn=mixed_precision_enabled,
                    name=name + "2_dw_1")
                conv_dw = self._conv_norm(
                    input=conv_dw_1,
                    filter_size=3,
                    num_filters=out_channels,
                    stride=1,
                    padding=1,
                    num_groups=out_channels,
                    use_cudnn=mixed_precision_enabled,
                    name=name + "2_dw_2")

            conv_pw = self._conv_norm(
                input=conv_dw,
                filter_size=1,
                num_filters=double_channels,
                stride=1,
                padding=0,
                name=name + "2_sep")

        # shortcut
        if use_pool:
            shortcut_channel = double_channels or out_channels
            shortcut_pool = self._pooling_block(input, stride, stride)
            channel_pad = self._conv_norm(
                input=shortcut_pool,
                filter_size=1,
                num_filters=shortcut_channel,
                stride=1,
                padding=0,
                name="shortcut" + name)
            return fluid.layers.elementwise_add(
                x=channel_pad, y=conv_pw, act='relu')
        return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu')
Ejemplo n.º 18
0
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
            required_fields = [
                'gt_class', 'gt_bbox', 'gt_mask', 'is_crowd', 'im_info'
            ]
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)

        im = feed_vars['image']
        if mode == 'train':
            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']

        im_info = feed_vars['im_info']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        if self.fpn is not None:
            body_feats, spatial_scale = self.fpn.get_output(body_feats)

        # rpn proposals
        rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)

        if mode == 'train':
            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
        else:
            if self.rpn_only:
                im_scale = fluid.layers.slice(im_info, [1],
                                              starts=[2],
                                              ends=[3])
                im_scale = fluid.layers.sequence_expand(im_scale, rpn_rois)
                rois = rpn_rois / im_scale
                return {'proposal': rois}

        proposal_list = []
        roi_feat_list = []
        rcnn_pred_list = []
        rcnn_target_list = []

        proposals = None
        bbox_pred = None
        for i in range(3):
            if i > 0:
                refined_bbox = self._decode_box(
                    proposals,
                    bbox_pred,
                    curr_stage=i - 1,
                )
            else:
                refined_bbox = rpn_rois

            if mode == 'train':
                outs = self.bbox_assigner(input_rois=refined_bbox,
                                          feed_vars=feed_vars,
                                          curr_stage=i)

                proposals = outs[0]
                rcnn_target_list.append(outs)
            else:
                proposals = refined_bbox
            proposal_list.append(proposals)

            # extract roi features
            roi_feat = self.roi_extractor(body_feats, proposals, spatial_scale)
            roi_feat_list.append(roi_feat)

            # bbox head
            cls_score, bbox_pred = self.bbox_head.get_output(
                roi_feat,
                wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i],
                name='_' + str(i + 1) if i > 0 else '')
            rcnn_pred_list.append((cls_score, bbox_pred))

        # get mask rois
        rois = proposal_list[2]

        if mode == 'train':
            loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list,
                                           self.cascade_rcnn_loss_weight)
            loss.update(rpn_loss)

            labels_int32 = rcnn_target_list[2][1]

            mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner(
                rois=rois,
                gt_classes=feed_vars['gt_class'],
                is_crowd=feed_vars['is_crowd'],
                gt_segms=feed_vars['gt_mask'],
                im_info=feed_vars['im_info'],
                labels_int32=labels_int32)

            if self.fpn is None:
                bbox_head_feat = self.bbox_head.get_head_feat()
                feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32)
            else:
                feat = self.roi_extractor(body_feats,
                                          mask_rois,
                                          spatial_scale,
                                          is_mask=True)
            mask_loss = self.mask_head.get_loss(feat, mask_int32)
            loss.update(mask_loss)

            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            mask_name = 'mask_pred'
            mask_pred, bbox_pred = self.single_scale_eval(
                body_feats, spatial_scale, im_info, mask_name, bbox_pred,
                roi_feat_list, rcnn_pred_list, proposal_list,
                feed_vars['im_shape'])
            return {'bbox': bbox_pred, 'mask': mask_pred}
Ejemplo n.º 19
0
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
            required_fields = [
                'gt_class', 'gt_bbox', 'gt_mask', 'is_crowd', 'im_info'
            ]
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)
        im = feed_vars['image']
        im_info = feed_vars['im_info']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        spatial_scale = None
        if self.fpn is not None:
            body_feats, spatial_scale = self.fpn.get_output(body_feats)

        # RPN proposals
        rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)

        if mode == 'train':
            rpn_loss = self.rpn_head.get_loss(im_info, feed_vars['gt_bbox'],
                                              feed_vars['is_crowd'])

            outs = self.bbox_assigner(rpn_rois=rois,
                                      gt_classes=feed_vars['gt_class'],
                                      is_crowd=feed_vars['is_crowd'],
                                      gt_boxes=feed_vars['gt_bbox'],
                                      im_info=feed_vars['im_info'])
            rois = outs[0]
            labels_int32 = outs[1]

            if self.fpn is None:
                last_feat = body_feats[list(body_feats.keys())[-1]]
                roi_feat = self.roi_extractor(last_feat, rois)
            else:
                roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)

            loss = self.bbox_head.get_loss(roi_feat, labels_int32, *outs[2:])
            loss.update(rpn_loss)

            mask_rois, roi_has_mask_int32, mask_int32 = self.mask_assigner(
                rois=rois,
                gt_classes=feed_vars['gt_class'],
                is_crowd=feed_vars['is_crowd'],
                gt_segms=feed_vars['gt_mask'],
                im_info=feed_vars['im_info'],
                labels_int32=labels_int32)
            if self.fpn is None:
                bbox_head_feat = self.bbox_head.get_head_feat()
                feat = fluid.layers.gather(bbox_head_feat, roi_has_mask_int32)
            else:
                feat = self.roi_extractor(body_feats,
                                          mask_rois,
                                          spatial_scale,
                                          is_mask=True)

            mask_loss = self.mask_head.get_loss(feat, mask_int32)
            loss.update(mask_loss)

            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss

        else:
            if self.rpn_only:
                im_scale = fluid.layers.slice(im_info, [1],
                                              starts=[2],
                                              ends=[3])
                im_scale = fluid.layers.sequence_expand(im_scale, rois)
                rois = rois / im_scale
                return {'proposal': rois}
            mask_name = 'mask_pred'
            mask_pred, bbox_pred = self.single_scale_eval(
                body_feats, mask_name, rois, im_info, feed_vars['im_shape'],
                spatial_scale)
            return {'bbox': bbox_pred, 'mask': mask_pred}
Ejemplo n.º 20
0
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
            required_fields = ['gt_class', 'gt_bbox', 'is_crowd', 'im_info']
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)

        im = feed_vars['image']
        im_info = feed_vars['im_info']
        if mode == 'train':
            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']
        else:
            im_shape = feed_vars['im_shape']

        mixed_precision_enabled = mixed_precision_global_state() is not None

        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        body_feats = self.backbone(im)
        body_feat_names = list(body_feats.keys())

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        if self.fpn is not None:
            body_feats, spatial_scale = self.fpn.get_output(body_feats)

        rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)

        if mode == 'train':
            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
            # sampled rpn proposals
            for var in ['gt_class', 'is_crowd', 'gt_bbox', 'im_info']:
                assert var in feed_vars, "{} has no {}".format(feed_vars, var)
            outs = self.bbox_assigner(rpn_rois=rois,
                                      gt_classes=feed_vars['gt_class'],
                                      is_crowd=feed_vars['is_crowd'],
                                      gt_boxes=feed_vars['gt_bbox'],
                                      im_info=feed_vars['im_info'])

            rois = outs[0]
            labels_int32 = outs[1]
            bbox_targets = outs[2]
            bbox_inside_weights = outs[3]
            bbox_outside_weights = outs[4]
        else:
            if self.rpn_only:
                im_scale = fluid.layers.slice(im_info, [1],
                                              starts=[2],
                                              ends=[3])
                im_scale = fluid.layers.sequence_expand(im_scale, rois)
                rois = rois / im_scale
                return {'proposal': rois}
        if self.fpn is None:
            # in models without FPN, roi extractor only uses the last level of
            # feature maps. And body_feat_names[-1] represents the name of
            # last feature map.
            body_feat = body_feats[body_feat_names[-1]]
            roi_feat = self.roi_extractor(body_feat, rois)
        else:
            roi_feat = self.roi_extractor(body_feats, rois, spatial_scale)

        if mode == 'train':
            loss = self.bbox_head.get_loss(roi_feat, labels_int32,
                                           bbox_targets, bbox_inside_weights,
                                           bbox_outside_weights)
            loss.update(rpn_loss)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            pred = self.bbox_head.get_prediction(roi_feat, rois, im_info,
                                                 im_shape)
            return pred
    def build(self, feed_vars, mode='train'):
        if mode == 'train':
            required_fields = ['gt_class', 'gt_bbox', 'is_crowd', 'im_info']
        else:
            required_fields = ['im_shape', 'im_info']
        self._input_check(required_fields, feed_vars)

        im = feed_vars['image']
        im_info = feed_vars['im_info']

        if mode == 'train':
            gt_bbox = feed_vars['gt_bbox']
            is_crowd = feed_vars['is_crowd']

        mixed_precision_enabled = mixed_precision_global_state() is not None
        # cast inputs to FP16
        if mixed_precision_enabled:
            im = fluid.layers.cast(im, 'float16')

        # backbone
        body_feats = self.backbone(im)

        # cast features back to FP32
        if mixed_precision_enabled:
            body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
                                     for k, v in body_feats.items())

        # FPN
        if self.fpn is not None:
            body_feats, spatial_scale = self.fpn.get_output(body_feats)

        # rpn proposals
        rpn_rois = self.rpn_head.get_proposals(body_feats, im_info, mode=mode)

        if mode == 'train':
            #fluid.layers.Print(gt_bbox)
            #fluid.layers.Print(is_crowd)
            rpn_loss = self.rpn_head.get_loss(im_info, gt_bbox, is_crowd)
        else:
            if self.rpn_only:
                im_scale = fluid.layers.slice(im_info, [1],
                                              starts=[2],
                                              ends=[3])
                im_scale = fluid.layers.sequence_expand(im_scale, rpn_rois)
                rois = rpn_rois / im_scale
                return {'proposal': rois}

        proposal_list = []
        roi_feat_list = []
        rcnn_pred_list = []
        rcnn_target_list = []

        proposals = None
        bbox_pred = None
        max_overlap = None
        for i in range(3):
            if i > 0:
                refined_bbox = self._decode_box(
                    proposals,
                    bbox_pred,
                    curr_stage=i - 1,
                )
            else:
                refined_bbox = rpn_rois

            if mode == 'train':
                outs = self.bbox_assigner(input_rois=refined_bbox,
                                          feed_vars=feed_vars,
                                          curr_stage=i,
                                          max_overlap=max_overlap)

                proposals = outs[0]
                max_overlap = outs[-1]
                rcnn_target_list.append(outs[:-1])
            else:
                proposals = refined_bbox
            proposal_list.append(proposals)

            # extract roi features
            roi_feat = self.roi_extractor(body_feats, proposals, spatial_scale)
            roi_feat_list.append(roi_feat)

            # bbox head
            cls_score, bbox_pred = self.bbox_head.get_output(
                roi_feat,
                wb_scalar=1.0 / self.cascade_rcnn_loss_weight[i],
                name='_' + str(i + 1) if i > 0 else '')
            rcnn_pred_list.append((cls_score, bbox_pred))

        if mode == 'train':
            loss = self.bbox_head.get_loss(rcnn_pred_list, rcnn_target_list,
                                           self.cascade_rcnn_loss_weight)
            loss.update(rpn_loss)
            total_loss = fluid.layers.sum(list(loss.values()))
            loss.update({'loss': total_loss})
            return loss
        else:
            pred = self.bbox_head.get_prediction(im_info,
                                                 feed_vars['im_shape'],
                                                 roi_feat_list, rcnn_pred_list,
                                                 proposal_list,
                                                 self.cascade_bbox_reg_weights,
                                                 self.cls_agnostic_bbox_reg)
            return pred