Example #1
0
    def __init__(self, classes, class_agnostic):
        super(CoupleNet, self).__init__()
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        self.box_num_classes = 1 if class_agnostic else self.n_classes

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)

        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)
        self.RCNN_roi_crop = _RoICrop()

        self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE,
                                             cfg.POOLING_SIZE,
                                             spatial_scale=1 / 16.0,
                                             group_size=cfg.POOLING_SIZE,
                                             output_dim=self.n_classes)
        self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE,
                                             cfg.POOLING_SIZE,
                                             spatial_scale=1 / 16.0,
                                             group_size=cfg.POOLING_SIZE,
                                             output_dim=self.box_num_classes *
                                             4)
        self.avg_pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE,
                                        stride=cfg.POOLING_SIZE)
        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
Example #2
0
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
Example #3
0
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, self.n_classes * 2048)
        #  self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.class_agnostic:
            #  self.bottle_neck = nn.Sequential(
            #  nn.Linear(2048, 512),
            #  nn.BatchNorm2d(512),
            #  nn.ReLU(inplace=True),
            #  nn.Linear(512, 2048))
            #  self.rcnn_bbox_pred_top = nn.Linear(2048, 4)
            # self.relu_top = nn.ReLU(inplace=True)
            self.rcnn_bbox_pred = nn.Conv2d(2048, 4, 3, 1, 1)
        else:
            self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(
                F.cross_entropy, reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
Example #4
0
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        #  self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # some 3d statistic
        # some 2d points projected from 3d
        # self.rcnn_3d_preds_new = nn.Linear(in_channels, 3 + 4 * self.num_bins)

        self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)

        # dims
        self.rcnn_dims_pred = nn.Sequential(
            *[nn.Linear(in_channels, 256),
              nn.ReLU(),
              nn.Linear(256, 3)])

        # angle
        self.rcnn_angle_pred = nn.Sequential(*[
            nn.Linear(in_channels, 256),
            nn.ReLU(),
            nn.Linear(256, self.num_bins * 2)
        ])

        # angle conf
        self.rcnn_angle_conf_pred = nn.Sequential(*[
            nn.Linear(in_channels, 256),
            nn.ReLU(),
            nn.Linear(256, self.num_bins * 2)
        ])
Example #5
0
    def init_modules(self):
        self.feature_extractor = FeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        self.rcnn_pooling_cls = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        self.rcnn_pooling_loc = PSRoIPool(7, 7, 1.0 / 16, 7, 4)
        self.rcnn_cls_base = nn.Conv2d(
            in_channels=1024,
            out_channels=self.n_classes * self.pooling_size *
            self.pooling_size,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=False)
        self.rcnn_bbox_base = nn.Conv2d(
            in_channels=1024,
            out_channels=4 * self.pooling_size * self.pooling_size,
            kernel_size=1,
            stride=1,
            padding=0,
            bias=False)
        self.rcnn_top = nn.Conv2d(2048, 1024, 1, 1, 0, bias=False)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        # if self.class_agnostic:
        # self.rcnn_bbox_pred = nn.Linear(2048, 4)
        # else:
        # self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(
                F.cross_entropy, reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
Example #6
0
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)

        self.modify_feature_extractor()
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.mask_rcnn_pooling = RoIAlignAvg(14, 14, 1.0 / 16.0)
        # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes)
        self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)
        self.rcnn_kp_loss = functools.partial(F.cross_entropy,
                                              reduce=False,
                                              ignore_index=-1)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # some 3d statistic
        # some 2d points projected from 3d
        self.rcnn_3d_pred = nn.Linear(in_channels, 3)

        # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)
        # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins)
        self.rcnn_3d_loss = OrientationLoss(split_loss=True)

        self.keypoint_predictor = KeyPointPredictor2(1024)
Example #7
0
    def init_modules(self):
        self.feature_extractor = feature_extractors_builder.build(
            self.feature_extractor_config)
        # self.feature_extractor = ResNetFeatureExtractor(
        # self.feature_extractor_config)
        # self.feature_extractor = MobileNetFeatureExtractor(
        # self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(self.ndin, self.n_classes)
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(self.ndin, 4)
            # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1)
        else:
            self.rcnn_bbox_pred = nn.Linear(self.ndin, 4 * self.n_classes)

        # loss module
        if self.use_focal_loss:
            self.rcnn_cls_loss = FocalLoss(2,
                                           gamma=2,
                                           alpha=0.2,
                                           auto_alpha=False)
        else:
            self.rcnn_cls_loss = functools.partial(F.cross_entropy,
                                                   reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)

        # attention
        if self.use_self_attention:
            self.spatial_attention = nn.Conv2d(self.ndin, 1, 3, 1, 1)

        self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size, self.pooling_size,
                                         1.0 / 8.0)
        self.reduce_pooling = nn.Sequential(nn.Conv2d(512, 1024, 1, 1, 0),
                                            nn.ReLU())
Example #8
0
    def init_modules(self):
        self.feature_extractor = ResNetFeatureExtractor(
            self.feature_extractor_config)
        self.rpn_model = RPNModel(self.rpn_config)
        if self.pooling_mode == 'align':
            self.rcnn_pooling = RoIAlignAvg(self.pooling_size,
                                            self.pooling_size, 1.0 / 16.0)
        elif self.pooling_mode == 'ps':
            self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes)
        elif self.pooling_mode == 'psalign':
            raise NotImplementedError('have not implemented yet!')
        elif self.pooling_mode == 'deformable_psalign':
            raise NotImplementedError('have not implemented yet!')
        self.rcnn_cls_pred = nn.Linear(2048, 2)
        if self.reduce:
            in_channels = 2048
        else:
            in_channels = 2048 * 4 * 4
        if self.class_agnostic:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4)
        else:
            self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes)

        if self.multiple_crop:
            self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size,
                                             self.pooling_size, 1.0 / 8.0)
            #  1x1 fusion
            self.pooled_feat_fusion = nn.Conv2d(512, 1024, 1, 1, 0)

        # loss module
        #  if self.enable_cls:
        #  if self.use_focal_loss:
        #  self.rcnn_cls_loss = FocalLoss(2, alpha=0.25, gamma=2)
        #  else:
        #  self.rcnn_cls_loss = functools.partial(
        #  F.cross_entropy, reduce=False)
        #  elif self.enable_iou:
        self.rcnn_cls_loss = nn.MSELoss(reduce=False)

        self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
Example #9
0
    def __init__(self,
                 classes,
                 num_layers=101,
                 pretrained=False,
                 class_agnostic=False,
                 b_save_mid_convs=False):
        super(_fasterRCNN, self).__init__(classes, num_layers, pretrained,
                                          class_agnostic)
        self.classes = classes
        self.n_classes = len(classes)
        self.class_agnostic = class_agnostic
        self.b_save_mid_convs = b_save_mid_convs
        # loss
        self.RCNN_loss_cls = 0
        self.RCNN_loss_bbox = 0

        # define rpn
        self.RCNN_rpn = _RPN(self.dout_base_model)
        self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes)
        self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                         1.0 / 16.0)
        self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE,
                                          1.0 / 16.0)

        self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
        self.RCNN_roi_crop = _RoICrop()
        self.Conv_feat_track = None
        self.rpn_rois = None

        if cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.FASTER_RCNN:
            print('RCNN uses Faster RCNN core.')
        elif cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.RFCN_LIGHTHEAD:
            print('RCNN uses RFCN Light Head core.')
            # The input channel is set mannually since we use resnet101 only.
            # c_out is set to 10*ps*ps. c_mid is set to 256.
            self.relu = nn.ReLU()
            core_depth = cfg.RESNET.GLOBAL_CONTEXT_OUT_DEPTH
            ctx_size = cfg.RESNET.GLOBAL_CONTEXT_RANGE
            self.g_ctx = _global_context_layer(
                2048, core_depth * cfg.POOLING_SIZE * cfg.POOLING_SIZE, 256,
                ctx_size)
            self.RCNN_psroi_pool = PSRoIPool(cfg.POOLING_SIZE,
                                             cfg.POOLING_SIZE, 1.0 / 16.0,
                                             cfg.POOLING_SIZE, core_depth)
            # fc layer for roi-wise prediction.
            # roi_mid_c in the original paper is 2048.
            roi_mid_c = 2048
            self.fc_roi = nn.Linear(
                core_depth * cfg.POOLING_SIZE * cfg.POOLING_SIZE, roi_mid_c)
        elif cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.RFCN:
            print('RCNN uses R-FCN core.')
            # define extra convolution layers for psroi input.
            tmp_c_in = 2048
            self.rfcn_cls = nn.Conv2d(tmp_c_in,
                                      self.n_classes * cfg.POOLING_SIZE *
                                      cfg.POOLING_SIZE,
                                      kernel_size=1)
            if self.class_agnostic:
                self.rfcn_bbox = nn.Conv2d(tmp_c_in,
                                           4 * cfg.POOLING_SIZE *
                                           cfg.POOLING_SIZE,
                                           kernel_size=1)
            else:
                # Need to remove the background class for bbox regression.
                # Other circumstances are handled by torch.gather op later.
                self.rfcn_bbox = nn.Conv2d(tmp_c_in,
                                           4 * (self.n_classes) *
                                           cfg.POOLING_SIZE * cfg.POOLING_SIZE,
                                           kernel_size=1)
            # define psroi layers
            self.RCNN_psroi_score = PSRoIPool(cfg.POOLING_SIZE,
                                              cfg.POOLING_SIZE, 1.0 / 16.0,
                                              cfg.POOLING_SIZE, self.n_classes)
            if self.class_agnostic:
                self.RCNN_psroi_bbox = PSRoIPool(cfg.POOLING_SIZE,
                                                 cfg.POOLING_SIZE, 1.0 / 16.0,
                                                 cfg.POOLING_SIZE, 4)
            else:
                self.RCNN_psroi_bbox = PSRoIPool(cfg.POOLING_SIZE,
                                                 cfg.POOLING_SIZE, 1.0 / 16.0,
                                                 cfg.POOLING_SIZE,
                                                 4 * (self.n_classes))
            # define ave_roi_pooling layers.
            self.ave_pooling_bbox = nn.AvgPool2d(cfg.POOLING_SIZE,
                                                 stride=cfg.POOLING_SIZE)
            self.ave_pooling_cls = nn.AvgPool2d(cfg.POOLING_SIZE,
                                                stride=cfg.POOLING_SIZE)