def __init__(self, use_xyz=True, mode='TRAIN'): super().__init__() self.training_mode = (mode == 'TRAIN') MODEL = importlib.import_module(cfg.RPN.BACKBONE) self.backbone_net = MODEL.get_model(input_channels=int( cfg.RPN.USE_INTENSITY), use_xyz=use_xyz) # classification branch cls_layers = [] pre_channel = cfg.RPN.FP_MLPS[0][-1] for k in range(0, cfg.RPN.CLS_FC.__len__()): cls_layers.append( pt_utils.Conv1d(pre_channel, cfg.RPN.CLS_FC[k], bn=cfg.RPN.USE_BN)) pre_channel = cfg.RPN.CLS_FC[k] cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None)) if cfg.RPN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) self.rpn_cls_layer = nn.Sequential(*cls_layers) # regression branch per_loc_bin_num = int(cfg.RPN.LOC_SCOPE / cfg.RPN.LOC_BIN_SIZE) * 2 if cfg.RPN.LOC_XZ_FINE: reg_channel = per_loc_bin_num * 4 + cfg.RPN.NUM_HEAD_BIN * 2 + 3 else: reg_channel = per_loc_bin_num * 2 + cfg.RPN.NUM_HEAD_BIN * 2 + 3 reg_channel += 1 # reg y reg_layers = [] pre_channel = cfg.RPN.FP_MLPS[0][-1] for k in range(0, cfg.RPN.REG_FC.__len__()): reg_layers.append( pt_utils.Conv1d(pre_channel, cfg.RPN.REG_FC[k], bn=cfg.RPN.USE_BN)) pre_channel = cfg.RPN.REG_FC[k] reg_layers.append( pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) if cfg.RPN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) self.rpn_reg_layer = nn.Sequential(*reg_layers) if cfg.RPN.LOSS_CLS == 'DiceLoss': self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1) elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss': self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss( alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA) elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy': self.rpn_cls_loss_func = F.binary_cross_entropy else: raise NotImplementedError self.proposal_layer = ProposalLayer(mode=mode) self.init_weights()
def __init__(self, input_channels=0, use_xyz=True, mode="TRAIN"): super().__init__() # 特征提取 # self.backbone_net = pointnet2_msg.get_model() c_in = input_channels self.SA_module1 = PointnetSAModuleMSG(npoint=8192, radii=[0.1, 0.5], nsamples=[16, 32], mlps=[[c_in, 16, 16, 32], [c_in, 32, 32, 64]], use_xyz=use_xyz, bn=True) c_out_1 = 32 + 64 c_in = c_out_1 self.SA_module2 = PointnetSAModuleMSG(npoint=2048, radii=[0.5, 1.], nsamples=[16, 32], mlps=[[c_in, 64, 64, 128], [c_in, 64, 96, 128]], use_xyz=use_xyz, bn=True) c_out_2 = 128 + 128 c_in = c_out_2 self.SA_module3 = PointnetSAModuleMSG(npoint=512, radii=[1.0, 2.0], nsamples=[16, 32], mlps=[[c_in, 128, 196, 256], [c_in, 128, 196, 256]], use_xyz=use_xyz, bn=True) c_out_3 = 256 + 256 c_in = c_out_3 self.SA_module4 = PointnetSAModuleMSG(npoint=128, radii=[2.0, 4.0], nsamples=[16, 32], mlps=[[c_in, 256, 256, 512], [c_in, 256, 384, 512]], use_xyz=use_xyz, bn=True) c_out_4 = 512 + 512 self.FP_module1 = PointnetFPModule(mlp=[c_out_4+c_out_3, 512, 512], bn=True) self.FP_module2 = PointnetFPModule(mlp=[512+c_out_2, 512, 512], bn=True) self.FP_module3 = PointnetFPModule(mlp=[512+c_out_1, 256, 256], bn=True) self.FP_module4 = PointnetFPModule(mlp=[256+input_channels, 128, 128], bn=True) self.ins_fc1 = pt_utils.Conv1d(128, 64, kernel_size=1, bn=True) # self.ins_dp = nn.Dropout(0.1) self.ins_fc2 = pt_utils.Conv1d(64, 5, kernel_size=1, bn=False, activation=None)
def __init__(self): super().__init__() iou_layer = [] pre_channel = 512 iou_layer.append( pt_utils.Conv1d(pre_channel, pre_channel, bn=cfg.RCNN.USE_BN)) iou_layer.append(pt_utils.Conv1d(pre_channel, 1, activation=None)) if cfg.RCNN.DP_RATIO >= 0: iou_layer.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.iou_layer = nn.Sequential(*iou_layer) self.proposal_target_layer = ProposalTargetLayer() self.init_weights(weight_init='xavier')
def __init__(self, input_channels=0, use_xyz=True, mode="TRAIN"): super().__init__() # 特征提取 # self.backbone_net = pointnet2_msg.get_model() npoints = cfg.NET.SA_NPOINTS radiis = cfg.NET.RADIIS nsamples = cfg.NET.NSAMPLES mlps = cfg.NET.MLPS # c_in = input_channels # self.SA_module1 = PointnetSAModuleMSG(npoint=npoints[0], radii=radiis[0], nsamples=nsamples[0], # mlps=[[c_in, *mlps[0][0]], [c_in, *mlps[0][1]], [c_in, *mlps[0][2]]], use_xyz=use_xyz, bn=True) # c_out_1 = sum(mlps[0][:][-1]) # c_in = c_out_1 # self.SA_module2 = PointnetSAModuleMSG(npoint=npoints[1], radii=radiis[1], nsamples=nsamples[1], # mlps=[[c_in, *mlps[1][0]], [c_in, *mlps[1][1]], [c_in, *mlps[1][2]]], use_xyz=use_xyz, bn=True) # c_out_2 = sum(mlps[1][:][-1]) # c_in = c_out_2 # self.SA_module3 = PointnetSAModuleMSG(npoint=npoints[2], radii=radiis[2], nsamples=nsamples[2], # mlps=[[c_in, *mlps[2][0]], [c_in, *mlps[2][1]], [c_in, *mlps[2][2]]], use_xyz=use_xyz, bn=True) # c_out_3 = sum(mlps[2][:][-1]) # c_in = c_out_3 # self.SA_module4 = PointnetSAModuleMSG(npoint=npoints[3], radii=radiis[3], nsamples=nsamples[3], # mlps=[[c_in, *mlps[3][0]], [c_in, *mlps[3][1]], [c_in, *mlps[3][2]]], use_xyz=use_xyz, bn=True) # c_out_4 = sum(mlps[3][:][-1]) c_in = input_channels c_out = [] sa_module = [] for i in range(len(npoints)): c_o = 0 for m in mlps[i]: m[:] = [c_in, *m] c_o += m[-1] c_out.append(c_o) sa_module.append( PointnetSAModuleMSG(npoint=npoints[i], radii=radiis[i], nsamples=nsamples[i], mlps=mlps[i], use_xyz=use_xyz, bn=True)) c_in = c_out[-1] self.SA_module1 = sa_module[0] self.SA_module2 = sa_module[1] self.SA_module3 = sa_module[2] self.SA_module4 = sa_module[3] c_out_1, c_out_2, c_out_3, c_out_4 = c_out self.FP_module1 = PointnetFPModule(mlp=[c_out_4 + c_out_3, 512, 512], bn=True) self.FP_module2 = PointnetFPModule(mlp=[512 + c_out_2, 512, 512], bn=True) self.FP_module3 = PointnetFPModule(mlp=[512 + c_out_1, 256, 256], bn=True) self.FP_module4 = PointnetFPModule( mlp=[256 + input_channels, 128, 128], bn=True) self.ins_fc1 = pt_utils.Conv1d(128, 64, kernel_size=1, bn=True) # self.ins_dp = nn.Dropout(0.1) self.ins_fc2 = pt_utils.Conv1d(64, 8, kernel_size=1, bn=False, activation=None)
def __init__(self, num_classes, input_channels=0, use_xyz=True): super().__init__() self.SA_modules = nn.ModuleList() channel_in = input_channels if cfg.RCNN.USE_RPN_FEATURES: self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) self.xyz_up_layer = pt_utils.SharedMLP([self.rcnn_input_channel] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN) c_out = cfg.RCNN.XYZ_UP_LAYER[-1] self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN) for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()): mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k] npoint = cfg.RCNN.SA_CONFIG.NPOINTS[k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None self.SA_modules.append( PointnetSAModule( npoint=npoint, radius=cfg.RCNN.SA_CONFIG.RADIUS[k], nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k], mlp=mlps, use_xyz=use_xyz, bn=cfg.RCNN.USE_BN ) ) channel_in = mlps[-1] # classification layer cls_channel = 1 if num_classes == 2 else num_classes cls_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.CLS_FC.__len__()): cls_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.CLS_FC[k] cls_layers.append(pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.cls_layer = nn.Sequential(*cls_layers) if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss': self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss(alpha=cfg.RCNN.FOCAL_ALPHA[0], gamma=cfg.RCNN.FOCAL_GAMMA) elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy': self.cls_loss_func = F.binary_cross_entropy elif cfg.RCNN.LOSS_CLS == 'CrossEntropy': cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float() self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight) else: raise NotImplementedError if cfg.USE_IOU_BRANCH: iou_branch = [] iou_branch.append(pt_utils.Conv1d(channel_in, cfg.RCNN.REG_FC[0], bn=cfg.RCNN.USE_BN)) iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[0], cfg.RCNN.REG_FC[1], bn=cfg.RCNN.USE_BN)) iou_branch.append(pt_utils.Conv1d(cfg.RCNN.REG_FC[1], 1, activation=None)) if cfg.RCNN.DP_RATIO >= 0: iou_branch.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.iou_branch = nn.Sequential(*iou_branch) #pass # regression layer per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2 loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2 reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3 reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2) reg_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.REG_FC.__len__()): reg_layers.append(pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.REG_FC[k] reg_layers.append(pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.reg_layer = nn.Sequential(*reg_layers) self.proposal_target_layer = ProposalTargetLayer() self.init_weights(weight_init='xavier')
def __init__(self, num_classes, num_point=512, input_channels=0, use_xyz=True): super().__init__() self.SA_modules = nn.ModuleList() self.ATT_modules = nn.ModuleList() channel_in = input_channels self.MEAN_SIZE = torch.from_numpy(cfg.CLS_MEAN_SIZE[0]).cuda() #todo use statics feature num #self.rcnn_input_channel = 3 + int(cfg.RCNN.USE_INTENSITY) + int(cfg.RCNN.USE_MASK) + int(cfg.RCNN.USE_DEPTH) self.rcnn_input_channel = 5 self.input_tansformer = Transformer(num_point, 3) self.xyz_up_layer = pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN) #self.feature_tansformer = Transformer(num_point, cfg.RCNN.XYZ_UP_LAYER[-1]) self.feature_up_layer = pt_utils.SharedMLP( [self.rcnn_input_channel - 3] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN) c_out = cfg.RCNN.XYZ_UP_LAYER[-1] self.merge_down_layer = pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN) for k in range(cfg.RCNN.SA_CONFIG.NPOINTS.__len__()): if cfg.ATTENTION: self.ATT_modules.append( pt_utils.SharedMLP([channel_in], bn=cfg.RCNN.USE_BN, activation=nn.ReLU(inplace=True))) mlps = [channel_in] + cfg.RCNN.SA_CONFIG.MLPS[k] npoint = cfg.RCNN.SA_CONFIG.NPOINTS[ k] if cfg.RCNN.SA_CONFIG.NPOINTS[k] != -1 else None self.SA_modules.append( PointnetSAModule(npoint=npoint, radius=cfg.RCNN.SA_CONFIG.RADIUS[k], nsample=cfg.RCNN.SA_CONFIG.NSAMPLE[k], mlp=mlps, use_xyz=use_xyz, bn=cfg.RCNN.USE_BN)) channel_in = mlps[-1] # class SharedMLP(nn.Sequential): # # def __init__( # self, # args: List[int], # *, # bn: bool = False, # activation=nn.ReLU(inplace=True), # preact: bool = False, # first: bool = False, # name: str = "", # instance_norm: bool = False, # ): # classification layer cls_channel = 1 if num_classes == 2 else num_classes cls_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.CLS_FC.__len__()): cls_layers.append( pt_utils.Conv1d(pre_channel, cfg.RCNN.CLS_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.CLS_FC[k] cls_layers.append( pt_utils.Conv1d(pre_channel, cls_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.cls_layer = nn.Sequential(*cls_layers) if cfg.RCNN.LOSS_CLS == 'SigmoidFocalLoss': self.cls_loss_func = loss_utils.SigmoidFocalClassificationLoss( alpha=cfg.RCNN.FOCAL_ALPHA[0], gamma=cfg.RCNN.FOCAL_GAMMA) elif cfg.RCNN.LOSS_CLS == 'BinaryCrossEntropy': self.cls_loss_func = F.binary_cross_entropy elif cfg.RCNN.LOSS_CLS == 'CrossEntropy': cls_weight = torch.from_numpy(cfg.RCNN.CLS_WEIGHT).float() self.cls_loss_func = nn.CrossEntropyLoss(ignore_index=-1, reduce=False, weight=cls_weight) else: raise NotImplementedError # regression layer per_loc_bin_num = int(cfg.RCNN.LOC_SCOPE / cfg.RCNN.LOC_BIN_SIZE) * 2 loc_y_bin_num = int(cfg.RCNN.LOC_Y_SCOPE / cfg.RCNN.LOC_Y_BIN_SIZE) * 2 reg_channel = per_loc_bin_num * 4 + cfg.RCNN.NUM_HEAD_BIN * 2 + 3 reg_channel += (1 if not cfg.RCNN.LOC_Y_BY_BIN else loc_y_bin_num * 2) reg_layers = [] pre_channel = channel_in for k in range(0, cfg.RCNN.REG_FC.__len__()): reg_layers.append( pt_utils.Conv1d(pre_channel, cfg.RCNN.REG_FC[k], bn=cfg.RCNN.USE_BN)) pre_channel = cfg.RCNN.REG_FC[k] reg_layers.append( pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) if cfg.RCNN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RCNN.DP_RATIO)) self.reg_layer = nn.Sequential(*reg_layers) # IOU estimation # IOU layer if cfg.IOUN.ENABLED: self.cascade = cfg.CASCADE self.can_xyz_up_layer = nn.ModuleList() self.can_feature_up_layer = nn.ModuleList() self.can_merge_down_layer = nn.ModuleList() self.SA_score_modules = nn.ModuleList() self.ATT_score_modules = nn.ModuleList() self.IOU_layer = nn.ModuleList() self.ICL_layer = nn.ModuleList() self.ref_layer = nn.ModuleList() for i in range(self.cascade): for p in self.parameters(): p.requires_grad = False self.can_xyz_up_layer.append( pt_utils.SharedMLP([3] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN).cuda()) self.can_feature_up_layer.append( pt_utils.SharedMLP([2] + cfg.RCNN.XYZ_UP_LAYER, bn=cfg.RCNN.USE_BN).cuda()) c_out = cfg.RCNN.XYZ_UP_LAYER[-1] self.can_merge_down_layer.append( pt_utils.SharedMLP([c_out * 2, c_out], bn=cfg.RCNN.USE_BN)) iou_channel_in = input_channels for k in range(cfg.IOUN.SA_CONFIG.NPOINTS.__len__()): mlps = [iou_channel_in] + cfg.IOUN.SA_CONFIG.MLPS[k] if cfg.ATTENTION: self.ATT_score_modules.append( pt_utils.SharedMLP( [iou_channel_in], bn=cfg.RCNN.USE_BN, activation=nn.ELU(inplace=True))) npoint = cfg.IOUN.SA_CONFIG.NPOINTS[ k] if cfg.IOUN.SA_CONFIG.NPOINTS[k] != -1 else None self.SA_score_modules.append( PointnetSAModule(npoint=npoint, radius=cfg.IOUN.SA_CONFIG.RADIUS[k], nsample=cfg.IOUN.SA_CONFIG.NSAMPLE[k], mlp=mlps, use_xyz=use_xyz, bn=cfg.IOUN.USE_BN).cuda()) iou_channel_in = mlps[-1] IOU_channel = 1 IOU_layers = [] pre_channel = iou_channel_in for k in range(0, cfg.IOUN.CLS_FC.__len__()): IOU_layers.append( pt_utils.Conv1d(pre_channel, cfg.IOUN.CLS_FC[k], bn=cfg.IOUN.USE_BN)) pre_channel = cfg.IOUN.CLS_FC[k] IOU_layers.append( pt_utils.Conv1d(pre_channel, IOU_channel, activation=None)) if cfg.IOUN.DP_RATIO >= 0: IOU_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO)) self.IOU_layer.append(nn.Sequential(*IOU_layers).cuda()) ICL_channel = 1 ICL_layers = [] pre_channel = iou_channel_in for k in range(0, cfg.IOUN.CLS_FC.__len__()): ICL_layers.append( pt_utils.Conv1d(pre_channel, cfg.IOUN.CLS_FC[k], bn=cfg.IOUN.USE_BN)) pre_channel = cfg.IOUN.CLS_FC[k] ICL_layers.append( pt_utils.Conv1d(pre_channel, ICL_channel, activation=None)) if cfg.IOUN.DP_RATIO >= 0: ICL_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO)) self.ICL_layer.append(nn.Sequential(*ICL_layers).cuda()) per_loc_bin_num = int( cfg.IOUN.LOC_SCOPE / cfg.IOUN.LOC_BIN_SIZE) * 2 loc_y_bin_num = int( cfg.IOUN.LOC_Y_SCOPE / cfg.IOUN.LOC_Y_BIN_SIZE) * 2 ref_channel = 7 ref_layers = [] pre_channel = iou_channel_in for k in range(0, cfg.IOUN.REG_FC.__len__()): ref_layers.append( pt_utils.Conv1d(pre_channel, cfg.IOUN.REG_FC[k], bn=cfg.IOUN.USE_BN)) pre_channel = cfg.IOUN.REG_FC[k] ref_layers.append( pt_utils.Conv1d(pre_channel, ref_channel, activation=None)) if cfg.IOUN.DP_RATIO >= 0: ref_layers.insert(1, nn.Dropout(cfg.IOUN.DP_RATIO)) self.ref_layer.append(nn.Sequential(*ref_layers).cuda()) self.init_weights(weight_init='xavier')
def __init__(self, use_xyz=True, mode='TRAIN'): super().__init__() self.training_mode = (mode == 'TRAIN') MODEL = importlib.import_module(cfg.RPN.BACKBONE) self.backbone_net = MODEL.get_model(input_channels=int( cfg.RPN.USE_INTENSITY), use_xyz=use_xyz) # here Conv1d is almost the same as torch Conv1d # for torch Conv1d see https://pytorch.org/docs/stable/nn.html#conv1d # here we use the Conv1d so we can do two levels of batch calculation # the first level is at the level of the scenes and the second is at the level of th points # The input to both heads is a (B, C, N) shaped tensor. # C is number of channels (i.e. the number of features each point has) (it is apparently 128) # N is the number of points in one scene # this way we regress the output values of all the points using a single run of a Conv1d layer # Notice the output has the form: classification head (B,1,N) , regression head (B,9,N) # since the kernel_size is 1 the output is a linear combination of channels just like a simple linear regression plus a bias # in the case of the regression head, each of the 9 outputs has its own set of weights and biases. # notice the output is the result of the regression/classification for all the points not just a single one. # classification branch cls_layers = [] pre_channel = cfg.RPN.FP_MLPS[0][-1] # = 128 for k in range(0, cfg.RPN.CLS_FC.__len__()): # input is 128 output is also 128 cls_layers.append( pt_utils.Conv1d( pre_channel, cfg.RPN.CLS_FC[k], bn=cfg.RPN.USE_BN)) # bn is batch normalization pre_channel = cfg.RPN.CLS_FC[k] cls_layers.append(pt_utils.Conv1d(pre_channel, 1, activation=None) ) # sigmoid is applied in the loss function not here # this ends up being: # 1st layer 128 inputs to 128 outputs # 2nd layer 128 to 1 if cfg.RPN.DP_RATIO >= 0: cls_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) self.rpn_cls_layer = nn.Sequential(*cls_layers) # it adds a dropout layer with ratio 0.5 # regression branch # we will do a normal regression for all the 9 parameters (x,y,z, w,h,l , rx,ry,rz) of our bboxes reg_channel = 9 reg_layers = [] pre_channel = cfg.RPN.FP_MLPS[0][-1] # = 128 for k in range(0, cfg.RPN.REG_FC.__len__()): # cfg.RPN.REG_FC = [128] reg_layers.append( pt_utils.Conv1d(pre_channel, cfg.RPN.REG_FC[k], bn=cfg.RPN.USE_BN)) pre_channel = cfg.RPN.REG_FC[k] reg_layers.append( pt_utils.Conv1d(pre_channel, reg_channel, activation=None)) #if you use binning and classification the activation of this last layer is applied in the loss instead # see /lib/utils/loss_utils.py "get_reg_loss" it uses BinaryCrossEntropy which applies a softmax (I need to change this !) if cfg.RPN.DP_RATIO >= 0: reg_layers.insert(1, nn.Dropout(cfg.RPN.DP_RATIO)) self.rpn_reg_layer = nn.Sequential(*reg_layers) # this ends up being: # 1st layer 128 inputs to 128 outputs # 2nd layer 128 to 9 outputs # it adds a dropout layer with ratio 0.5 if cfg.RPN.LOSS_CLS == 'DiceLoss': self.rpn_cls_loss_func = loss_utils.DiceLoss(ignore_target=-1) elif cfg.RPN.LOSS_CLS == 'SigmoidFocalLoss': self.rpn_cls_loss_func = loss_utils.SigmoidFocalClassificationLoss( alpha=cfg.RPN.FOCAL_ALPHA[0], gamma=cfg.RPN.FOCAL_GAMMA) elif cfg.RPN.LOSS_CLS == 'BinaryCrossEntropy': self.rpn_cls_loss_func = F.binary_cross_entropy else: raise NotImplementedError # proposal layer is only used in RCNN and not in RPN self.proposal_layer = ProposalLayer(mode=mode) self.init_weights()