Example #1
0
    def __init__(self, num_classes, use_xyz=True, mode='TRAIN'):
        super().__init__()

        assert cfg.RPN.ENABLED or cfg.RCNN.ENABLED

        if cfg.PSP.ENABLED:
            self.psp = PSPNet(n_classes=1)
            # self.psp = PSPNet()

        if cfg.RPN.ENABLED:
            self.rpn = RPN(use_xyz=use_xyz, mode=mode)

        # merge down the xyz features and image features from PSPNet
        feature_channel = cfg.RPN.FP_MLPS[0][-1]
        self.merge_down = pt_utils.SharedMLP(
            [feature_channel * 2, feature_channel], bn=cfg.RPN.USE_BN)
        # self.merge_down = pt_utils.SharedMLP([feature_channel + 512, feature_channel], bn=cfg.RPN.USE_BN)

        if cfg.RCNN.ENABLED:
            rcnn_input_channels = 128  # channels of 128 merged rpn and PSPnet feature
            if cfg.RCNN.BACKBONE == 'pointnet':
                self.rcnn_net = RCNNNet(num_classes=num_classes,
                                        input_channels=rcnn_input_channels,
                                        use_xyz=use_xyz)
            elif cfg.RCNN.BACKBONE == 'pointsift':
                pass
            else:
                raise NotImplementedError
Example #2
0
    def __init__(self, use_xyz=True, mode='TRAIN'):
        super().__init__()
        self.training_mode = (mode == 'TRAIN')

        MODEL = importlib.import_module(cfg.RPN.BACKBONE)
        self.backbone_net = MODEL.get_model(input_channels=int(cfg.RPN.USE_INTENSITY) + 3*int(cfg.RPN.USE_BGR) + 9*int(cfg.RPN.USE_MEAN_COVARIANCE), use_xyz=use_xyz)

        # merge down the xyz features and image features from PSPNet
        feature_channel = cfg.RPN.FP_MLPS[0][-1]
        self.merge_down_layer = pt_utils.SharedMLP([feature_channel * 2, feature_channel], bn=cfg.RPN.USE_BN)

        # classification branch
        cls_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1] 
        # pre_channel = cfg.RPN.FP_MLPS[0][-1] + 512
        for k in range(0, cfg.RPN.CLS_FC.__len__()):
            cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RPN.CLS_FC[k], bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None))
        if cfg.RPN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_cls_layer = nn.Sequential(*cls_layers)

        # regression branch
        per_loc_bin_num = int(cfg.RPN.LOC_SCOPE / cfg.RPN.LOC_BIN_SIZE) * 2
        if cfg.RPN.LOC_XZ_FINE:
            reg_channel = per_loc_bin_num * 4 + cfg.RPN.NUM_HEAD_BIN * 2 + 3
        else:
            reg_channel = per_loc_bin_num * 2 + cfg.RPN.NUM_HEAD_BIN * 2 + 3
        reg_channel += 1  # reg y

        reg_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]
        # pre_channel = cfg.RPN.FP_MLPS[0][-1] + 512
        for k in range(0, cfg.RPN.REG_FC.__len__()):
            reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RPN.REG_FC[k], bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.REG_FC[k]
        reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None))
        if cfg.RPN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_reg_layer = nn.Sequential(*reg_layers)

        if cfg.RPN.LOSS_CLS == 'DiceLoss':
            self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1)
        elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss':
            self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RPN.FOCAL_ALPHA[0],
                                                                               gamma=cfg.RPN.FOCAL_GAMMA)
        elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy':
            self.rpn_cls_loss_func = F.binary_cross_entropy
        else:
            raise NotImplementedError

        self.proposal_layer = ProposalLayer(mode=mode)
        self.init_weights()
Example #3
0
    def __init__(self, num_classes, input_channels=0, use_xyz=True):
        super().__init__()

        self.SA_modules = nn.ModuleList()
        channel_in = input_channels

        if cfg.RCNN.USE_RPN_FEATURES:
            self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH)
            self.xyz_up_layer = pt_utils.SharedMLP([self.rcnn_input_channel] + cfg.RCNN.XYZ_UP_LAYER,
                                                   bn=cfg.RCNN.USE_BN)
            c_out = cfg.RCNN.XYZ_UP_LAYER[-1]
            self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN)

        for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()):
            mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k]

            npoint = cfg.RCNN.SA_CONFIG.NPOINTS[k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None
            self.SA_modules.append(
                PointnetSAModule(
                    npoint=npoint,
                    radius=cfg.RCNN.SA_CONFIG.RADIUS[k],
                    nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k],
                    mlp=mlps,
                    use_xyz=use_xyz,
                    bn=cfg.RCNN.USE_BN
                )
            )
            channel_in = mlps[-1]

        # classification layer
        cls_channel = 1 if num_classes == 2 else num_classes
        cls_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.CLS_FC.__len__()):
            cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, cls_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.cls_layer = nn.Sequential(*cls_layers)

        if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss':
            self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RCNN.FOCAL_ALPHA[0],
                                                                           gamma=cfg.RCNN.FOCAL_GAMMA)
        elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy':
            self.cls_loss_func = F.binary_cross_entropy
        elif cfg.RCNN.LOSS_CLS == 'CrossEntropy':
            cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float()
            self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight)
        else:
            raise NotImplementedError

        if cfg.USE_IOU_BRANCH:
            iou_branch = []
            iou_branch.append(pt_utils.Conv1d(channel_in, cfg.RCNN.REG_FC[0], bn=cfg.RCNN.USE_BN))
            iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[0], cfg.RCNN.REG_FC[1], bn=cfg.RCNN.USE_BN))
            iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[1], 1, activation=None))
            if cfg.RCNN.DP_RATIO >= 0:
                iou_branch.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
            self.iou_branch = nn.Sequential(*iou_branch)
            #pass

        # regression layer
        per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2
        loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2
        reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3
        reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2)

        reg_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.REG_FC.__len__()):
            reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.REG_FC[k]
        reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.reg_layer = nn.Sequential(*reg_layers)

        self.proposal_target_layer = ProposalTargetLayer()
        self.init_weights(weight_init='xavier')
Example #4
0
    def __init__(self,
                 num_classes,
                 num_point=512,
                 input_channels=0,
                 use_xyz=True):
        super().__init__()

        self.SA_modules = nn.ModuleList()
        self.ATT_modules = nn.ModuleList()
        channel_in = input_channels
        self.MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()

        #todo use statics feature num
        #self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH)
        self.rcnn_input_channel = 5
        self.input_tansformer = Transformer(num_point, 3)
        self.xyz_up_layer = pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER,
                                               bn=cfg.RCNN.USE_BN)

        #self.feature_tansformer = Transformer(num_point, cfg.RCNN.XYZ_UP_LAYER[-1])

        self.feature_up_layer = pt_utils.SharedMLP(
            [self.rcnn_input_channel - 3] + cfg.RCNN.XYZ_UP_LAYER,
            bn=cfg.RCNN.USE_BN)
        c_out = cfg.RCNN.XYZ_UP_LAYER[-1]
        self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out],
                                                   bn=cfg.RCNN.USE_BN)

        for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()):

            if cfg.ATTENTION:
                self.ATT_modules.append(
                    pt_utils.SharedMLP([channel_in],
                                       bn=cfg.RCNN.USE_BN,
                                       activation=nn.ReLU(inplace=True)))

            mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k]

            npoint = cfg.RCNN.SA_CONFIG.NPOINTS[
                k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None
            self.SA_modules.append(
                PointnetSAModule(npoint=npoint,
                                 radius=cfg.RCNN.SA_CONFIG.RADIUS[k],
                                 nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k],
                                 mlp=mlps,
                                 use_xyz=use_xyz,
                                 bn=cfg.RCNN.USE_BN))
            channel_in = mlps[-1]

            # class SharedMLP(nn.Sequential):
            #
            #     def __init__(
            #             self,
            #             args: List[int],
            #             *,
            #             bn: bool = False,
            #             activation=nn.ReLU(inplace=True),
            #             preact: bool = False,
            #             first: bool = False,
            #             name: str = "",
            #             instance_norm: bool = False,
            #     ):

        # classification layer
        cls_channel = 1 if num_classes == 2 else num_classes
        cls_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.CLS_FC.__len__()):
            cls_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RCNN.CLS_FC[k],
                                bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.CLS_FC[k]
        cls_layers.append(
            pt_utils.Conv1d(pre_channel, cls_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.cls_layer = nn.Sequential(*cls_layers)

        if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss':
            self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(
                alpha=cfg.RCNN.FOCAL_ALPHA[0], gamma=cfg.RCNN.FOCAL_GAMMA)
        elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy':
            self.cls_loss_func = F.binary_cross_entropy
        elif cfg.RCNN.LOSS_CLS == 'CrossEntropy':
            cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float()
            self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1,
                                                     reduce=False,
                                                     weight=cls_weight)
        else:
            raise NotImplementedError

        # regression layer
        per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2
        loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2
        reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3
        reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2)

        reg_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.REG_FC.__len__()):
            reg_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RCNN.REG_FC[k],
                                bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.REG_FC[k]
        reg_layers.append(
            pt_utils.Conv1d(pre_channel, reg_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.reg_layer = nn.Sequential(*reg_layers)

        # IOU estimation
        # IOU layer
        if cfg.IOUN.ENABLED:
            self.cascade = cfg.CASCADE
            self.can_xyz_up_layer = nn.ModuleList()
            self.can_feature_up_layer = nn.ModuleList()
            self.can_merge_down_layer = nn.ModuleList()
            self.SA_score_modules = nn.ModuleList()
            self.ATT_score_modules = nn.ModuleList()
            self.IOU_layer = nn.ModuleList()
            self.ICL_layer = nn.ModuleList()
            self.ref_layer = nn.ModuleList()
            for i in range(self.cascade):
                for p in self.parameters():
                    p.requires_grad = False

                self.can_xyz_up_layer.append(
                    pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER,
                                       bn=cfg.RCNN.USE_BN).cuda())
                self.can_feature_up_layer.append(
                    pt_utils.SharedMLP([2] + cfg.RCNN.XYZ_UP_LAYER,
                                       bn=cfg.RCNN.USE_BN).cuda())
                c_out = cfg.RCNN.XYZ_UP_LAYER[-1]
                self.can_merge_down_layer.append(
                    pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN))

                iou_channel_in = input_channels
                for k in range(cfg.IOUN.SA_CONFIG.NPOINTS.__len__()):

                    mlps = [iou_channel_in] + cfg.IOUN.SA_CONFIG.MLPS[k]

                    if cfg.ATTENTION:
                        self.ATT_score_modules.append(
                            pt_utils.SharedMLP(
                                [iou_channel_in],
                                bn=cfg.RCNN.USE_BN,
                                activation=nn.ELU(inplace=True)))

                    npoint = cfg.IOUN.SA_CONFIG.NPOINTS[
                        k] if cfg.IOUN.SA_CONFIG.NPOINTS[k] != -1 else None
                    self.SA_score_modules.append(
                        PointnetSAModule(npoint=npoint,
                                         radius=cfg.IOUN.SA_CONFIG.RADIUS[k],
                                         nsample=cfg.IOUN.SA_CONFIG.NSAMPLE[k],
                                         mlp=mlps,
                                         use_xyz=use_xyz,
                                         bn=cfg.IOUN.USE_BN).cuda())
                    iou_channel_in = mlps[-1]

                IOU_channel = 1
                IOU_layers = []
                pre_channel = iou_channel_in
                for k in range(0, cfg.IOUN.CLS_FC.__len__()):
                    IOU_layers.append(
                        pt_utils.Conv1d(pre_channel,
                                        cfg.IOUN.CLS_FC[k],
                                        bn=cfg.IOUN.USE_BN))
                    pre_channel = cfg.IOUN.CLS_FC[k]
                IOU_layers.append(
                    pt_utils.Conv1d(pre_channel, IOU_channel, activation=None))
                if cfg.IOUN.DP_RATIO >= 0:
                    IOU_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO))
                self.IOU_layer.append(nn.Sequential(*IOU_layers).cuda())

                ICL_channel = 1
                ICL_layers = []
                pre_channel = iou_channel_in
                for k in range(0, cfg.IOUN.CLS_FC.__len__()):
                    ICL_layers.append(
                        pt_utils.Conv1d(pre_channel,
                                        cfg.IOUN.CLS_FC[k],
                                        bn=cfg.IOUN.USE_BN))
                    pre_channel = cfg.IOUN.CLS_FC[k]
                ICL_layers.append(
                    pt_utils.Conv1d(pre_channel, ICL_channel, activation=None))
                if cfg.IOUN.DP_RATIO >= 0:
                    ICL_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO))
                self.ICL_layer.append(nn.Sequential(*ICL_layers).cuda())

                per_loc_bin_num = int(
                    cfg.IOUN.LOC_SCOPE / cfg.IOUN.LOC_BIN_SIZE) * 2
                loc_y_bin_num = int(
                    cfg.IOUN.LOC_Y_SCOPE / cfg.IOUN.LOC_Y_BIN_SIZE) * 2
                ref_channel = 7

                ref_layers = []
                pre_channel = iou_channel_in
                for k in range(0, cfg.IOUN.REG_FC.__len__()):
                    ref_layers.append(
                        pt_utils.Conv1d(pre_channel,
                                        cfg.IOUN.REG_FC[k],
                                        bn=cfg.IOUN.USE_BN))
                    pre_channel = cfg.IOUN.REG_FC[k]
                ref_layers.append(
                    pt_utils.Conv1d(pre_channel, ref_channel, activation=None))
                if cfg.IOUN.DP_RATIO >= 0:
                    ref_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO))
                self.ref_layer.append(nn.Sequential(*ref_layers).cuda())

        self.init_weights(weight_init='xavier')