def __init__(self, num_classes, use_xyz=True, mode='TRAIN'): super().__init__() assert cfg.RPN.ENABLED or cfg.RCNN.ENABLED if cfg.PSP.ENABLED: self.psp = PSPNet(n_classes=1) # self.psp = PSPNet() if cfg.RPN.ENABLED: self.rpn = RPN(use_xyz=use_xyz, mode=mode) # merge down the xyz features and image features from PSPNet feature_channel = cfg.RPN.FP_MLPS[0][-1] self.merge_down = pt_utils.SharedMLP( [feature_channel * 2, feature_channel], bn=cfg.RPN.USE_BN) # self.merge_down = pt_utils.SharedMLP([feature_channel + 512, feature_channel], bn=cfg.RPN.USE_BN) if cfg.RCNN.ENABLED: rcnn_input_channels = 128 # channels of 128 merged rpn and PSPnet feature if cfg.RCNN.BACKBONE == 'pointnet': self.rcnn_net = RCNNNet(num_classes=num_classes, input_channels=rcnn_input_channels, use_xyz=use_xyz) elif cfg.RCNN.BACKBONE == 'pointsift': pass else: raise NotImplementedError
def __init__(self, use_xyz=True, mode='TRAIN'): super().__init__() self.training_mode = (mode == 'TRAIN') MODEL = importlib.import_module(cfg.RPN.BACKBONE) self.backbone_net = MODEL.get_model(input_channels=int(cfg.RPN.USE_INTENSITY) + 3*int(cfg.RPN.USE_BGR) + 9*int(cfg.RPN.USE_MEAN_COVARIANCE), use_xyz=use_xyz) # merge down the xyz features and image features from PSPNet feature_channel = cfg.RPN.FP_MLPS[0][-1] self.merge_down_layer = pt_utils.SharedMLP([feature_channel * 2, feature_channel], bn=cfg.RPN.USE_BN) # classification branch cls_layers = [] pre_channel = cfg.RPN.FP_MLPS[0][-1] # pre_channel = cfg.RPN.FP_MLPS[0][-1] + 512 for k in range(0, cfg.RPN.CLS_FC.__len__()): cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RPN.CLS_FC[k], bn=cfg.RPN.USE_BN)) pre_channel = cfg.RPN.CLS_FC[k] cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)) if cfg.RPN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) self.rpn_cls_layer = nn.Sequential(*cls_layers) # regression branch per_loc_bin_num = int(cfg.RPN.LOC_SCOPE / cfg.RPN.LOC_BIN_SIZE) * 2 if cfg.RPN.LOC_XZ_FINE: reg_channel = per_loc_bin_num * 4 + cfg.RPN.NUM_HEAD_BIN * 2 + 3 else: reg_channel = per_loc_bin_num * 2 + cfg.RPN.NUM_HEAD_BIN * 2 + 3 reg_channel += 1 # reg y reg_layers = [] pre_channel = cfg.RPN.FP_MLPS[0][-1] # pre_channel = cfg.RPN.FP_MLPS[0][-1] + 512 for k in range(0, cfg.RPN.REG_FC.__len__()): reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RPN.REG_FC[k], bn=cfg.RPN.USE_BN)) pre_channel = cfg.RPN.REG_FC[k] reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) if cfg.RPN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) self.rpn_reg_layer = nn.Sequential(*reg_layers) if cfg.RPN.LOSS_CLS == 'DiceLoss': self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1) elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss': self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA) elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy': self.rpn_cls_loss_func = F.binary_cross_entropy else: raise NotImplementedError self.proposal_layer = ProposalLayer(mode=mode) self.init_weights()
def __init__(self, num_classes, input_channels=0, use_xyz=True): super().__init__() self.SA_modules = nn.ModuleList() channel_in = input_channels if cfg.RCNN.USE_RPN_FEATURES: self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) self.xyz_up_layer = pt_utils.SharedMLP([self.rcnn_input_channel] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN) c_out = cfg.RCNN.XYZ_UP_LAYER[-1] self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN) for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()): mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k] npoint = cfg.RCNN.SA_CONFIG.NPOINTS[k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None self.SA_modules.append( PointnetSAModule( npoint=npoint, radius=cfg.RCNN.SA_CONFIG.RADIUS[k], nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k], mlp=mlps, use_xyz=use_xyz, bn=cfg.RCNN.USE_BN ) ) channel_in = mlps[-1] # classification layer cls_channel = 1 if num_classes == 2 else num_classes cls_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.CLS_FC.__len__()): cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.CLS_FC[k] cls_layers.append(pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.cls_layer = nn.Sequential(*cls_layers) if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss': self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RCNN.FOCAL_ALPHA[0], gamma=cfg.RCNN.FOCAL_GAMMA) elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy': self.cls_loss_func = F.binary_cross_entropy elif cfg.RCNN.LOSS_CLS == 'CrossEntropy': cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float() self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight) else: raise NotImplementedError if cfg.USE_IOU_BRANCH: iou_branch = [] iou_branch.append(pt_utils.Conv1d(channel_in, cfg.RCNN.REG_FC[0], bn=cfg.RCNN.USE_BN)) iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[0], cfg.RCNN.REG_FC[1], bn=cfg.RCNN.USE_BN)) iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[1], 1, activation=None)) if cfg.RCNN.DP_RATIO >= 0: iou_branch.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.iou_branch = nn.Sequential(*iou_branch) #pass # regression layer per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2 loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2 reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3 reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2) reg_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.REG_FC.__len__()): reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.REG_FC[k] reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.reg_layer = nn.Sequential(*reg_layers) self.proposal_target_layer = ProposalTargetLayer() self.init_weights(weight_init='xavier')
def __init__(self, num_classes, num_point=512, input_channels=0, use_xyz=True): super().__init__() self.SA_modules = nn.ModuleList() self.ATT_modules = nn.ModuleList() channel_in = input_channels self.MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() #todo use statics feature num #self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) self.rcnn_input_channel = 5 self.input_tansformer = Transformer(num_point, 3) self.xyz_up_layer = pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN) #self.feature_tansformer = Transformer(num_point, cfg.RCNN.XYZ_UP_LAYER[-1]) self.feature_up_layer = pt_utils.SharedMLP( [self.rcnn_input_channel - 3] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN) c_out = cfg.RCNN.XYZ_UP_LAYER[-1] self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN) for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()): if cfg.ATTENTION: self.ATT_modules.append( pt_utils.SharedMLP([channel_in], bn=cfg.RCNN.USE_BN, activation=nn.ReLU(inplace=True))) mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k] npoint = cfg.RCNN.SA_CONFIG.NPOINTS[ k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None self.SA_modules.append( PointnetSAModule(npoint=npoint, radius=cfg.RCNN.SA_CONFIG.RADIUS[k], nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k], mlp=mlps, use_xyz=use_xyz, bn=cfg.RCNN.USE_BN)) channel_in = mlps[-1] # class SharedMLP(nn.Sequential): # # def __init__( # self, # args: List[int], # *, # bn: bool = False, # activation=nn.ReLU(inplace=True), # preact: bool = False, # first: bool = False, # name: str = "", # instance_norm: bool = False, # ): # classification layer cls_channel = 1 if num_classes == 2 else num_classes cls_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.CLS_FC.__len__()): cls_layers.append( pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.CLS_FC[k] cls_layers.append( pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.cls_layer = nn.Sequential(*cls_layers) if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss': self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss( alpha=cfg.RCNN.FOCAL_ALPHA[0], gamma=cfg.RCNN.FOCAL_GAMMA) elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy': self.cls_loss_func = F.binary_cross_entropy elif cfg.RCNN.LOSS_CLS == 'CrossEntropy': cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float() self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight) else: raise NotImplementedError # regression layer per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2 loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2 reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3 reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2) reg_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.REG_FC.__len__()): reg_layers.append( pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.REG_FC[k] reg_layers.append( pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.reg_layer = nn.Sequential(*reg_layers) # IOU estimation # IOU layer if cfg.IOUN.ENABLED: self.cascade = cfg.CASCADE self.can_xyz_up_layer = nn.ModuleList() self.can_feature_up_layer = nn.ModuleList() self.can_merge_down_layer = nn.ModuleList() self.SA_score_modules = nn.ModuleList() self.ATT_score_modules = nn.ModuleList() self.IOU_layer = nn.ModuleList() self.ICL_layer = nn.ModuleList() self.ref_layer = nn.ModuleList() for i in range(self.cascade): for p in self.parameters(): p.requires_grad = False self.can_xyz_up_layer.append( pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN).cuda()) self.can_feature_up_layer.append( pt_utils.SharedMLP([2] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN).cuda()) c_out = cfg.RCNN.XYZ_UP_LAYER[-1] self.can_merge_down_layer.append( pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN)) iou_channel_in = input_channels for k in range(cfg.IOUN.SA_CONFIG.NPOINTS.__len__()): mlps = [iou_channel_in] + cfg.IOUN.SA_CONFIG.MLPS[k] if cfg.ATTENTION: self.ATT_score_modules.append( pt_utils.SharedMLP( [iou_channel_in], bn=cfg.RCNN.USE_BN, activation=nn.ELU(inplace=True))) npoint = cfg.IOUN.SA_CONFIG.NPOINTS[ k] if cfg.IOUN.SA_CONFIG.NPOINTS[k] != -1 else None self.SA_score_modules.append( PointnetSAModule(npoint=npoint, radius=cfg.IOUN.SA_CONFIG.RADIUS[k], nsample=cfg.IOUN.SA_CONFIG.NSAMPLE[k], mlp=mlps, use_xyz=use_xyz, bn=cfg.IOUN.USE_BN).cuda()) iou_channel_in = mlps[-1] IOU_channel = 1 IOU_layers = [] pre_channel = iou_channel_in for k in range(0, cfg.IOUN.CLS_FC.__len__()): IOU_layers.append( pt_utils.Conv1d(pre_channel, cfg.IOUN.CLS_FC[k], bn=cfg.IOUN.USE_BN)) pre_channel = cfg.IOUN.CLS_FC[k] IOU_layers.append( pt_utils.Conv1d(pre_channel, IOU_channel, activation=None)) if cfg.IOUN.DP_RATIO >= 0: IOU_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO)) self.IOU_layer.append(nn.Sequential(*IOU_layers).cuda()) ICL_channel = 1 ICL_layers = [] pre_channel = iou_channel_in for k in range(0, cfg.IOUN.CLS_FC.__len__()): ICL_layers.append( pt_utils.Conv1d(pre_channel, cfg.IOUN.CLS_FC[k], bn=cfg.IOUN.USE_BN)) pre_channel = cfg.IOUN.CLS_FC[k] ICL_layers.append( pt_utils.Conv1d(pre_channel, ICL_channel, activation=None)) if cfg.IOUN.DP_RATIO >= 0: ICL_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO)) self.ICL_layer.append(nn.Sequential(*ICL_layers).cuda()) per_loc_bin_num = int( cfg.IOUN.LOC_SCOPE / cfg.IOUN.LOC_BIN_SIZE) * 2 loc_y_bin_num = int( cfg.IOUN.LOC_Y_SCOPE / cfg.IOUN.LOC_Y_BIN_SIZE) * 2 ref_channel = 7 ref_layers = [] pre_channel = iou_channel_in for k in range(0, cfg.IOUN.REG_FC.__len__()): ref_layers.append( pt_utils.Conv1d(pre_channel, cfg.IOUN.REG_FC[k], bn=cfg.IOUN.USE_BN)) pre_channel = cfg.IOUN.REG_FC[k] ref_layers.append( pt_utils.Conv1d(pre_channel, ref_channel, activation=None)) if cfg.IOUN.DP_RATIO >= 0: ref_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO)) self.ref_layer.append(nn.Sequential(*ref_layers).cuda()) self.init_weights(weight_init='xavier')