예제 #1
0
    def __init__(self, cfg, din, classes=2):
        super(_RPN, self).__init__()

        self.din = din  # get depth of input feature map, e.g., 512
        self.feat_stride = cfg.FEAT_STRIDE[0]
        self.num_anchors = len(self.feat_stride)

        self.conv_loc = nn.Conv2d(self.feat_channels, 1, 1)
        self.conv_shape = nn.Conv2d(self.feat_channels, self.num_anchors * 2,
                                    1)
        self.feature_adaption = nn.Conv2d(
            self.feat_channels,
            self.feat_channels,
            kernel_size=3)
        self.conv_cls = nn.Conv2d(self.feat_channels,
                                  self.num_anchors * self.cls_out_channels,
                                  1)
        self.conv_reg = nn.Conv2d(self.feat_channels, self.num_anchors * 4,
                                  1)

        # define proposal layer
        self.RPN_proposal = ProposalLayer(
            cfg, self.feat_stride, self.anchor_scales, self.anchor_ratios)

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0
예제 #2
0
    def __init__(self, use_xyz=True, mode='TRAIN'):
        super().__init__()
        self.training_mode = (mode == 'TRAIN')

        MODEL = importlib.import_module(cfg.RPN.BACKBONE)
        self.backbone_net = MODEL.get_model(input_channels=int(
            cfg.RPN.USE_INTENSITY),
                                            use_xyz=use_xyz)

        # classification branch
        cls_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]
        for k in range(0, cfg.RPN.CLS_FC.__len__()):
            cls_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RPN.CLS_FC[k],
                                bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None))
        if cfg.RPN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_cls_layer = nn.Sequential(*cls_layers)

        # regression branch
        per_loc_bin_num = int(cfg.RPN.LOC_SCOPE / cfg.RPN.LOC_BIN_SIZE) * 2
        if cfg.RPN.LOC_XZ_FINE:
            reg_channel = per_loc_bin_num * 4 + cfg.RPN.NUM_HEAD_BIN * 2 + 3
        else:
            reg_channel = per_loc_bin_num * 2 + cfg.RPN.NUM_HEAD_BIN * 2 + 3
        reg_channel += 1  # reg y

        reg_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]
        for k in range(0, cfg.RPN.REG_FC.__len__()):
            reg_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RPN.REG_FC[k],
                                bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.REG_FC[k]

        reg_layers.append(
            pt_utils.Conv1d(pre_channel, reg_channel, activation=None))

        if cfg.RPN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))

        self.rpn_reg_layer = nn.Sequential(*reg_layers)

        if cfg.RPN.LOSS_CLS == 'DiceLoss':
            self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1)
        elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss':
            self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(
                alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA)
        elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy':
            self.rpn_cls_loss_func = F.binary_cross_entropy
        else:
            raise NotImplementedError

        self.proposal_layer = ProposalLayer(mode=mode)
        self.init_weights()
예제 #3
0
    def __init__(self, din, rpn_din):
        super(_RPN, self).__init__()

        self.din = din  # get depth of input feature map, e.g., 512
        self.anchor_scales = cfg.ANCHOR_SCALES
        self.anchor_ratios = cfg.ANCHOR_RATIOS
        self.feat_stride = cfg.FEAT_STRIDE[0]
        self.rpn_din = rpn_din

        # define the convrelu layers processing input feature map
        self.RPN_Conv = nn.Conv2d(self.din, self.rpn_din, 3, 1, 1, bias=True)

        # define bg/fg classifcation score layer
        self.nc_score_out = len(self.anchor_scales) * len(
            self.anchor_ratios) * 2  # 2(bg/fg) * 9 (anchors)
        self.RPN_cls_score = nn.Conv2d(self.rpn_din, self.nc_score_out, 1, 1,
                                       0)

        # define anchor box offset prediction layer
        self.nc_bbox_out = len(self.anchor_scales) * len(
            self.anchor_ratios) * 4  # 4(coords) * 9 (anchors)
        self.RPN_bbox_pred = nn.Conv2d(self.rpn_din, self.nc_bbox_out, 1, 1, 0)

        # define proposal layer
        self.RPN_proposal = ProposalLayer(self.feat_stride, self.anchor_scales,
                                          self.anchor_ratios)

        # define anchor target layer
        self.RPN_anchor_target = AnchorTargetLayer(cfg)

        self.rpn_loss_cls = 0
        self.rpn_loss_box = 0
예제 #4
0
    def __init__(self, cfg, classes, pretrained=False, align=False):
        super(faster_rcnn, self).__init__()
        self.classes = classes
        self.rpn_cls_loss = 0
        self.rpn_bbox_loss = 0

        self.rpn_regression = rpn_regression(self.rpn_inchannels)
        self.proposallayer = ProposalLayer(cfg, cfg.FEAT_STRIDE[0],
                                           cfg.ANCHOR_SCALES,
                                           cfg.ANCHOR_RATIOS)
        self.proposaltargetlayer = ProposalTargetLayer(self.classes)
        self.roi_extraction = ROIPoolingLayer(
            (cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0)
        if not align:
            self.roi_extraction = ROIAlignLayer(
                (cfg.POOLING_SIZE, cfg.POOLING_SIZE), 1.0 / 16.0, 0)

        self.regressionDim = 512
        self.ROIDim = 256

        self.Regression = nn.Sequential(
            OrderedDict([
                ('fc6',
                 nn.Linear(self.ROIDim * cfg.POOLING_SIZE * cfg.POOLING_SIZE,
                           self.regressionDim)),
                ('fc6_relu', nn.ReLU(inplace=True)),
                ('fc7',
                 nn.Linear(self.regressionDim, self.regressionDim, bias=True)),
                ('fc7_relu', nn.ReLU(inplace=True))
            ]))

        self.cls_predict = nn.Sequential(
            OrderedDict([('fc_cls', nn.Linear(self.regressionDim,
                                              self.classes))]))

        self.bbox_predict = nn.Sequential(
            OrderedDict([('fc_bbox',
                          nn.Linear(self.regressionDim, self.classes * 4))]))

        self.out_sigmoid = nn.Sigmoid()
예제 #5
0
    def __init__(self, use_xyz=True, mode='TRAIN'):
        super().__init__()
        self.training_mode = (mode == 'TRAIN')

        MODEL = importlib.import_module(cfg.RPN.BACKBONE)
        self.backbone_net = MODEL.get_model(input_channels=int(
            cfg.RPN.USE_INTENSITY),
                                            use_xyz=use_xyz)

        # here Conv1d is almost the same as torch Conv1d
        # for torch Conv1d see https://pytorch.org/docs/stable/nn.html#conv1d
        # here we use the Conv1d so we can do two levels of batch calculation
        # the first level is at the level of the scenes and the second is at the level of th points
        # The input to both heads is a (B, C, N) shaped tensor.
        # C is number of channels (i.e. the number of features each point has) (it is apparently 128)
        # N is the number of points in one scene
        # this way we regress the output values of all the points using a single run of a Conv1d layer
        # Notice the output has the form: classification head  (B,1,N) , regression head (B,9,N)
        # since the kernel_size is 1 the output is a linear combination of channels just like a simple linear regression plus a bias
        # in the case of the regression head, each of the 9 outputs has its own set of weights and biases.
        # notice the output is the result of the regression/classification for all the points not just a single one.

        # classification branch
        cls_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]  # = 128
        for k in range(0, cfg.RPN.CLS_FC.__len__()):
            # input is 128 output is also 128
            cls_layers.append(
                pt_utils.Conv1d(
                    pre_channel, cfg.RPN.CLS_FC[k],
                    bn=cfg.RPN.USE_BN))  # bn is batch normalization
            pre_channel = cfg.RPN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)
                          )  # sigmoid is applied in the loss function not here
        # this ends up being:
        # 1st layer 128 inputs to 128 outputs
        # 2nd layer 128 to 1
        if cfg.RPN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_cls_layer = nn.Sequential(*cls_layers)
        # it adds a dropout layer with ratio 0.5

        # regression branch
        # we will do a normal regression for all the 9 parameters (x,y,z, w,h,l , rx,ry,rz) of our bboxes
        reg_channel = 9

        reg_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]  # = 128
        for k in range(0, cfg.RPN.REG_FC.__len__()):  # cfg.RPN.REG_FC = [128]
            reg_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RPN.REG_FC[k],
                                bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.REG_FC[k]

        reg_layers.append(
            pt_utils.Conv1d(pre_channel, reg_channel, activation=None))

        #if you use binning and classification the activation of this last layer is applied in the loss instead
        # see /lib/utils/loss_utils.py "get_reg_loss" it uses BinaryCrossEntropy which applies a softmax (I need to change this !)

        if cfg.RPN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_reg_layer = nn.Sequential(*reg_layers)
        # this ends up being:
        # 1st layer 128 inputs to 128 outputs
        # 2nd layer 128 to 9 outputs
        #  it adds a dropout layer with ratio 0.5

        if cfg.RPN.LOSS_CLS == 'DiceLoss':
            self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1)
        elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss':
            self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(
                alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA)
        elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy':
            self.rpn_cls_loss_func = F.binary_cross_entropy
        else:
            raise NotImplementedError

        # proposal layer is only used in RCNN and not in RPN
        self.proposal_layer = ProposalLayer(mode=mode)
        self.init_weights()