コード例 #1
0
    def __init__(self, use_xyz=True, mode='TRAIN'):
        super().__init__()
        self.training_mode = (mode == 'TRAIN')

        MODEL = importlib.import_module(cfg.RPN.BACKBONE)
        self.backbone_net = MODEL.get_model(input_channels=int(
            cfg.RPN.USE_INTENSITY),
                                            use_xyz=use_xyz)

        # classification branch
        cls_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]
        for k in range(0, cfg.RPN.CLS_FC.__len__()):
            cls_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RPN.CLS_FC[k],
                                bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None))
        if cfg.RPN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_cls_layer = nn.Sequential(*cls_layers)

        # regression branch
        per_loc_bin_num = int(cfg.RPN.LOC_SCOPE / cfg.RPN.LOC_BIN_SIZE) * 2
        if cfg.RPN.LOC_XZ_FINE:
            reg_channel = per_loc_bin_num * 4 + cfg.RPN.NUM_HEAD_BIN * 2 + 3
        else:
            reg_channel = per_loc_bin_num * 2 + cfg.RPN.NUM_HEAD_BIN * 2 + 3
        reg_channel += 1  # reg y

        reg_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]
        for k in range(0, cfg.RPN.REG_FC.__len__()):
            reg_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RPN.REG_FC[k],
                                bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.REG_FC[k]

        reg_layers.append(
            pt_utils.Conv1d(pre_channel, reg_channel, activation=None))

        if cfg.RPN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))

        self.rpn_reg_layer = nn.Sequential(*reg_layers)

        if cfg.RPN.LOSS_CLS == 'DiceLoss':
            self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1)
        elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss':
            self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(
                alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA)
        elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy':
            self.rpn_cls_loss_func = F.binary_cross_entropy
        else:
            raise NotImplementedError

        self.proposal_layer = ProposalLayer(mode=mode)
        self.init_weights()
コード例 #2
0
ファイル: neural_ins.py プロジェクト: c-feng/Neuron-Tracking
    def __init__(self, input_channels=0, use_xyz=True, mode="TRAIN"):
        super().__init__()
        
        # 特征提取
        # self.backbone_net = pointnet2_msg.get_model()
        c_in = input_channels
        self.SA_module1 = PointnetSAModuleMSG(npoint=8192, radii=[0.1, 0.5], nsamples=[16, 32], mlps=[[c_in, 16, 16, 32], [c_in, 32, 32, 64]], use_xyz=use_xyz, bn=True)
        c_out_1 = 32 + 64

        c_in = c_out_1
        self.SA_module2 = PointnetSAModuleMSG(npoint=2048, radii=[0.5, 1.], nsamples=[16, 32], mlps=[[c_in, 64, 64, 128], [c_in, 64, 96, 128]], use_xyz=use_xyz, bn=True)
        c_out_2 = 128 + 128
        
        c_in = c_out_2
        self.SA_module3 = PointnetSAModuleMSG(npoint=512, radii=[1.0, 2.0], nsamples=[16, 32], mlps=[[c_in, 128, 196, 256], [c_in, 128, 196, 256]], use_xyz=use_xyz, bn=True)
        c_out_3 = 256 + 256

        c_in = c_out_3
        self.SA_module4 = PointnetSAModuleMSG(npoint=128, radii=[2.0, 4.0], nsamples=[16, 32], mlps=[[c_in, 256, 256, 512], [c_in, 256, 384, 512]], use_xyz=use_xyz, bn=True)
        c_out_4 = 512 + 512
    
        self.FP_module1 = PointnetFPModule(mlp=[c_out_4+c_out_3, 512, 512], bn=True)
        self.FP_module2 = PointnetFPModule(mlp=[512+c_out_2, 512, 512], bn=True)
        self.FP_module3 = PointnetFPModule(mlp=[512+c_out_1, 256, 256], bn=True)
        self.FP_module4 = PointnetFPModule(mlp=[256+input_channels, 128, 128], bn=True)

        self.ins_fc1 = pt_utils.Conv1d(128, 64, kernel_size=1, bn=True)
        # self.ins_dp = nn.Dropout(0.1)
        self.ins_fc2 = pt_utils.Conv1d(64, 5, kernel_size=1, bn=False, activation=None)
コード例 #3
0
    def __init__(self):
        super().__init__()

        iou_layer = []
        pre_channel = 512
        iou_layer.append(
            pt_utils.Conv1d(pre_channel, pre_channel, bn=cfg.RCNN.USE_BN))
        iou_layer.append(pt_utils.Conv1d(pre_channel, 1, activation=None))

        if cfg.RCNN.DP_RATIO >= 0:
            iou_layer.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.iou_layer = nn.Sequential(*iou_layer)
        self.proposal_target_layer = ProposalTargetLayer()
        self.init_weights(weight_init='xavier')
コード例 #4
0
    def __init__(self, input_channels=0, use_xyz=True, mode="TRAIN"):
        super().__init__()

        # 特征提取
        # self.backbone_net = pointnet2_msg.get_model()
        npoints = cfg.NET.SA_NPOINTS
        radiis = cfg.NET.RADIIS
        nsamples = cfg.NET.NSAMPLES
        mlps = cfg.NET.MLPS

        # c_in = input_channels
        # self.SA_module1 = PointnetSAModuleMSG(npoint=npoints[0], radii=radiis[0], nsamples=nsamples[0],
        #                         mlps=[[c_in, *mlps[0][0]], [c_in, *mlps[0][1]], [c_in, *mlps[0][2]]], use_xyz=use_xyz, bn=True)
        # c_out_1 = sum(mlps[0][:][-1])

        # c_in = c_out_1
        # self.SA_module2 = PointnetSAModuleMSG(npoint=npoints[1], radii=radiis[1], nsamples=nsamples[1],
        #                         mlps=[[c_in, *mlps[1][0]], [c_in, *mlps[1][1]], [c_in, *mlps[1][2]]], use_xyz=use_xyz, bn=True)
        # c_out_2 = sum(mlps[1][:][-1])

        # c_in = c_out_2
        # self.SA_module3 = PointnetSAModuleMSG(npoint=npoints[2], radii=radiis[2], nsamples=nsamples[2],
        #                         mlps=[[c_in, *mlps[2][0]], [c_in, *mlps[2][1]], [c_in, *mlps[2][2]]], use_xyz=use_xyz, bn=True)
        # c_out_3 = sum(mlps[2][:][-1])

        # c_in = c_out_3
        # self.SA_module4 = PointnetSAModuleMSG(npoint=npoints[3], radii=radiis[3], nsamples=nsamples[3],
        #                         mlps=[[c_in, *mlps[3][0]], [c_in, *mlps[3][1]], [c_in, *mlps[3][2]]], use_xyz=use_xyz, bn=True)
        # c_out_4 = sum(mlps[3][:][-1])

        c_in = input_channels
        c_out = []
        sa_module = []
        for i in range(len(npoints)):
            c_o = 0
            for m in mlps[i]:
                m[:] = [c_in, *m]
                c_o += m[-1]
            c_out.append(c_o)
            sa_module.append(
                PointnetSAModuleMSG(npoint=npoints[i],
                                    radii=radiis[i],
                                    nsamples=nsamples[i],
                                    mlps=mlps[i],
                                    use_xyz=use_xyz,
                                    bn=True))
            c_in = c_out[-1]

        self.SA_module1 = sa_module[0]
        self.SA_module2 = sa_module[1]
        self.SA_module3 = sa_module[2]
        self.SA_module4 = sa_module[3]
        c_out_1, c_out_2, c_out_3, c_out_4 = c_out

        self.FP_module1 = PointnetFPModule(mlp=[c_out_4 + c_out_3, 512, 512],
                                           bn=True)
        self.FP_module2 = PointnetFPModule(mlp=[512 + c_out_2, 512, 512],
                                           bn=True)
        self.FP_module3 = PointnetFPModule(mlp=[512 + c_out_1, 256, 256],
                                           bn=True)
        self.FP_module4 = PointnetFPModule(
            mlp=[256 + input_channels, 128, 128], bn=True)

        self.ins_fc1 = pt_utils.Conv1d(128, 64, kernel_size=1, bn=True)
        # self.ins_dp = nn.Dropout(0.1)
        self.ins_fc2 = pt_utils.Conv1d(64,
                                       8,
                                       kernel_size=1,
                                       bn=False,
                                       activation=None)
コード例 #5
0
    def __init__(self, num_classes, input_channels=0, use_xyz=True):
        super().__init__()

        self.SA_modules = nn.ModuleList()
        channel_in = input_channels

        if cfg.RCNN.USE_RPN_FEATURES:
            self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH)
            self.xyz_up_layer = pt_utils.SharedMLP([self.rcnn_input_channel] + cfg.RCNN.XYZ_UP_LAYER,
                                                   bn=cfg.RCNN.USE_BN)
            c_out = cfg.RCNN.XYZ_UP_LAYER[-1]
            self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN)

        for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()):
            mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k]

            npoint = cfg.RCNN.SA_CONFIG.NPOINTS[k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None
            self.SA_modules.append(
                PointnetSAModule(
                    npoint=npoint,
                    radius=cfg.RCNN.SA_CONFIG.RADIUS[k],
                    nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k],
                    mlp=mlps,
                    use_xyz=use_xyz,
                    bn=cfg.RCNN.USE_BN
                )
            )
            channel_in = mlps[-1]

        # classification layer
        cls_channel = 1 if num_classes == 2 else num_classes
        cls_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.CLS_FC.__len__()):
            cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, cls_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.cls_layer = nn.Sequential(*cls_layers)

        if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss':
            self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RCNN.FOCAL_ALPHA[0],
                                                                           gamma=cfg.RCNN.FOCAL_GAMMA)
        elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy':
            self.cls_loss_func = F.binary_cross_entropy
        elif cfg.RCNN.LOSS_CLS == 'CrossEntropy':
            cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float()
            self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight)
        else:
            raise NotImplementedError

        if cfg.USE_IOU_BRANCH:
            iou_branch = []
            iou_branch.append(pt_utils.Conv1d(channel_in, cfg.RCNN.REG_FC[0], bn=cfg.RCNN.USE_BN))
            iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[0], cfg.RCNN.REG_FC[1], bn=cfg.RCNN.USE_BN))
            iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[1], 1, activation=None))
            if cfg.RCNN.DP_RATIO >= 0:
                iou_branch.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
            self.iou_branch = nn.Sequential(*iou_branch)
            #pass

        # regression layer
        per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2
        loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2
        reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3
        reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2)

        reg_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.REG_FC.__len__()):
            reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.REG_FC[k]
        reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.reg_layer = nn.Sequential(*reg_layers)

        self.proposal_target_layer = ProposalTargetLayer()
        self.init_weights(weight_init='xavier')
コード例 #6
0
ファイル: rcnn_net.py プロジェクト: wenguanwang/WS3D
    def __init__(self,
                 num_classes,
                 num_point=512,
                 input_channels=0,
                 use_xyz=True):
        super().__init__()

        self.SA_modules = nn.ModuleList()
        self.ATT_modules = nn.ModuleList()
        channel_in = input_channels
        self.MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda()

        #todo use statics feature num
        #self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH)
        self.rcnn_input_channel = 5
        self.input_tansformer = Transformer(num_point, 3)
        self.xyz_up_layer = pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER,
                                               bn=cfg.RCNN.USE_BN)

        #self.feature_tansformer = Transformer(num_point, cfg.RCNN.XYZ_UP_LAYER[-1])

        self.feature_up_layer = pt_utils.SharedMLP(
            [self.rcnn_input_channel - 3] + cfg.RCNN.XYZ_UP_LAYER,
            bn=cfg.RCNN.USE_BN)
        c_out = cfg.RCNN.XYZ_UP_LAYER[-1]
        self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out],
                                                   bn=cfg.RCNN.USE_BN)

        for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()):

            if cfg.ATTENTION:
                self.ATT_modules.append(
                    pt_utils.SharedMLP([channel_in],
                                       bn=cfg.RCNN.USE_BN,
                                       activation=nn.ReLU(inplace=True)))

            mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k]

            npoint = cfg.RCNN.SA_CONFIG.NPOINTS[
                k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None
            self.SA_modules.append(
                PointnetSAModule(npoint=npoint,
                                 radius=cfg.RCNN.SA_CONFIG.RADIUS[k],
                                 nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k],
                                 mlp=mlps,
                                 use_xyz=use_xyz,
                                 bn=cfg.RCNN.USE_BN))
            channel_in = mlps[-1]

            # class SharedMLP(nn.Sequential):
            #
            #     def __init__(
            #             self,
            #             args: List[int],
            #             *,
            #             bn: bool = False,
            #             activation=nn.ReLU(inplace=True),
            #             preact: bool = False,
            #             first: bool = False,
            #             name: str = "",
            #             instance_norm: bool = False,
            #     ):

        # classification layer
        cls_channel = 1 if num_classes == 2 else num_classes
        cls_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.CLS_FC.__len__()):
            cls_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RCNN.CLS_FC[k],
                                bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.CLS_FC[k]
        cls_layers.append(
            pt_utils.Conv1d(pre_channel, cls_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.cls_layer = nn.Sequential(*cls_layers)

        if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss':
            self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(
                alpha=cfg.RCNN.FOCAL_ALPHA[0], gamma=cfg.RCNN.FOCAL_GAMMA)
        elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy':
            self.cls_loss_func = F.binary_cross_entropy
        elif cfg.RCNN.LOSS_CLS == 'CrossEntropy':
            cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float()
            self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1,
                                                     reduce=False,
                                                     weight=cls_weight)
        else:
            raise NotImplementedError

        # regression layer
        per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2
        loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2
        reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3
        reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2)

        reg_layers = []
        pre_channel = channel_in
        for k in range(0, cfg.RCNN.REG_FC.__len__()):
            reg_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RCNN.REG_FC[k],
                                bn=cfg.RCNN.USE_BN))
            pre_channel = cfg.RCNN.REG_FC[k]
        reg_layers.append(
            pt_utils.Conv1d(pre_channel, reg_channel, activation=None))
        if cfg.RCNN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO))
        self.reg_layer = nn.Sequential(*reg_layers)

        # IOU estimation
        # IOU layer
        if cfg.IOUN.ENABLED:
            self.cascade = cfg.CASCADE
            self.can_xyz_up_layer = nn.ModuleList()
            self.can_feature_up_layer = nn.ModuleList()
            self.can_merge_down_layer = nn.ModuleList()
            self.SA_score_modules = nn.ModuleList()
            self.ATT_score_modules = nn.ModuleList()
            self.IOU_layer = nn.ModuleList()
            self.ICL_layer = nn.ModuleList()
            self.ref_layer = nn.ModuleList()
            for i in range(self.cascade):
                for p in self.parameters():
                    p.requires_grad = False

                self.can_xyz_up_layer.append(
                    pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER,
                                       bn=cfg.RCNN.USE_BN).cuda())
                self.can_feature_up_layer.append(
                    pt_utils.SharedMLP([2] + cfg.RCNN.XYZ_UP_LAYER,
                                       bn=cfg.RCNN.USE_BN).cuda())
                c_out = cfg.RCNN.XYZ_UP_LAYER[-1]
                self.can_merge_down_layer.append(
                    pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN))

                iou_channel_in = input_channels
                for k in range(cfg.IOUN.SA_CONFIG.NPOINTS.__len__()):

                    mlps = [iou_channel_in] + cfg.IOUN.SA_CONFIG.MLPS[k]

                    if cfg.ATTENTION:
                        self.ATT_score_modules.append(
                            pt_utils.SharedMLP(
                                [iou_channel_in],
                                bn=cfg.RCNN.USE_BN,
                                activation=nn.ELU(inplace=True)))

                    npoint = cfg.IOUN.SA_CONFIG.NPOINTS[
                        k] if cfg.IOUN.SA_CONFIG.NPOINTS[k] != -1 else None
                    self.SA_score_modules.append(
                        PointnetSAModule(npoint=npoint,
                                         radius=cfg.IOUN.SA_CONFIG.RADIUS[k],
                                         nsample=cfg.IOUN.SA_CONFIG.NSAMPLE[k],
                                         mlp=mlps,
                                         use_xyz=use_xyz,
                                         bn=cfg.IOUN.USE_BN).cuda())
                    iou_channel_in = mlps[-1]

                IOU_channel = 1
                IOU_layers = []
                pre_channel = iou_channel_in
                for k in range(0, cfg.IOUN.CLS_FC.__len__()):
                    IOU_layers.append(
                        pt_utils.Conv1d(pre_channel,
                                        cfg.IOUN.CLS_FC[k],
                                        bn=cfg.IOUN.USE_BN))
                    pre_channel = cfg.IOUN.CLS_FC[k]
                IOU_layers.append(
                    pt_utils.Conv1d(pre_channel, IOU_channel, activation=None))
                if cfg.IOUN.DP_RATIO >= 0:
                    IOU_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO))
                self.IOU_layer.append(nn.Sequential(*IOU_layers).cuda())

                ICL_channel = 1
                ICL_layers = []
                pre_channel = iou_channel_in
                for k in range(0, cfg.IOUN.CLS_FC.__len__()):
                    ICL_layers.append(
                        pt_utils.Conv1d(pre_channel,
                                        cfg.IOUN.CLS_FC[k],
                                        bn=cfg.IOUN.USE_BN))
                    pre_channel = cfg.IOUN.CLS_FC[k]
                ICL_layers.append(
                    pt_utils.Conv1d(pre_channel, ICL_channel, activation=None))
                if cfg.IOUN.DP_RATIO >= 0:
                    ICL_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO))
                self.ICL_layer.append(nn.Sequential(*ICL_layers).cuda())

                per_loc_bin_num = int(
                    cfg.IOUN.LOC_SCOPE / cfg.IOUN.LOC_BIN_SIZE) * 2
                loc_y_bin_num = int(
                    cfg.IOUN.LOC_Y_SCOPE / cfg.IOUN.LOC_Y_BIN_SIZE) * 2
                ref_channel = 7

                ref_layers = []
                pre_channel = iou_channel_in
                for k in range(0, cfg.IOUN.REG_FC.__len__()):
                    ref_layers.append(
                        pt_utils.Conv1d(pre_channel,
                                        cfg.IOUN.REG_FC[k],
                                        bn=cfg.IOUN.USE_BN))
                    pre_channel = cfg.IOUN.REG_FC[k]
                ref_layers.append(
                    pt_utils.Conv1d(pre_channel, ref_channel, activation=None))
                if cfg.IOUN.DP_RATIO >= 0:
                    ref_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO))
                self.ref_layer.append(nn.Sequential(*ref_layers).cuda())

        self.init_weights(weight_init='xavier')
コード例 #7
0
    def __init__(self, use_xyz=True, mode='TRAIN'):
        super().__init__()
        self.training_mode = (mode == 'TRAIN')

        MODEL = importlib.import_module(cfg.RPN.BACKBONE)
        self.backbone_net = MODEL.get_model(input_channels=int(
            cfg.RPN.USE_INTENSITY),
                                            use_xyz=use_xyz)

        # here Conv1d is almost the same as torch Conv1d
        # for torch Conv1d see https://pytorch.org/docs/stable/nn.html#conv1d
        # here we use the Conv1d so we can do two levels of batch calculation
        # the first level is at the level of the scenes and the second is at the level of th points
        # The input to both heads is a (B, C, N) shaped tensor.
        # C is number of channels (i.e. the number of features each point has) (it is apparently 128)
        # N is the number of points in one scene
        # this way we regress the output values of all the points using a single run of a Conv1d layer
        # Notice the output has the form: classification head  (B,1,N) , regression head (B,9,N)
        # since the kernel_size is 1 the output is a linear combination of channels just like a simple linear regression plus a bias
        # in the case of the regression head, each of the 9 outputs has its own set of weights and biases.
        # notice the output is the result of the regression/classification for all the points not just a single one.

        # classification branch
        cls_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]  # = 128
        for k in range(0, cfg.RPN.CLS_FC.__len__()):
            # input is 128 output is also 128
            cls_layers.append(
                pt_utils.Conv1d(
                    pre_channel, cfg.RPN.CLS_FC[k],
                    bn=cfg.RPN.USE_BN))  # bn is batch normalization
            pre_channel = cfg.RPN.CLS_FC[k]
        cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)
                          )  # sigmoid is applied in the loss function not here
        # this ends up being:
        # 1st layer 128 inputs to 128 outputs
        # 2nd layer 128 to 1
        if cfg.RPN.DP_RATIO >= 0:
            cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_cls_layer = nn.Sequential(*cls_layers)
        # it adds a dropout layer with ratio 0.5

        # regression branch
        # we will do a normal regression for all the 9 parameters (x,y,z, w,h,l , rx,ry,rz) of our bboxes
        reg_channel = 9

        reg_layers = []
        pre_channel = cfg.RPN.FP_MLPS[0][-1]  # = 128
        for k in range(0, cfg.RPN.REG_FC.__len__()):  # cfg.RPN.REG_FC = [128]
            reg_layers.append(
                pt_utils.Conv1d(pre_channel,
                                cfg.RPN.REG_FC[k],
                                bn=cfg.RPN.USE_BN))
            pre_channel = cfg.RPN.REG_FC[k]

        reg_layers.append(
            pt_utils.Conv1d(pre_channel, reg_channel, activation=None))

        #if you use binning and classification the activation of this last layer is applied in the loss instead
        # see /lib/utils/loss_utils.py "get_reg_loss" it uses BinaryCrossEntropy which applies a softmax (I need to change this !)

        if cfg.RPN.DP_RATIO >= 0:
            reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO))
        self.rpn_reg_layer = nn.Sequential(*reg_layers)
        # this ends up being:
        # 1st layer 128 inputs to 128 outputs
        # 2nd layer 128 to 9 outputs
        #  it adds a dropout layer with ratio 0.5

        if cfg.RPN.LOSS_CLS == 'DiceLoss':
            self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1)
        elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss':
            self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(
                alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA)
        elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy':
            self.rpn_cls_loss_func = F.binary_cross_entropy
        else:
            raise NotImplementedError

        # proposal layer is only used in RCNN and not in RPN
        self.proposal_layer = ProposalLayer(mode=mode)
        self.init_weights()