def __init__(self, classes, class_agnostic): super(CoupleNet, self).__init__() self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 self.box_num_classes = 1 if class_agnostic else self.n_classes # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_crop = _RoICrop() self.RCNN_psroi_pool_cls = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1 / 16.0, group_size=cfg.POOLING_SIZE, output_dim=self.n_classes) self.RCNN_psroi_pool_loc = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, spatial_scale=1 / 16.0, group_size=cfg.POOLING_SIZE, output_dim=self.box_num_classes * 4) self.avg_pooling = nn.AvgPool2d(kernel_size=cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes * 2048) # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.class_agnostic: # self.bottle_neck = nn.Sequential( # nn.Linear(2048, 512), # nn.BatchNorm2d(512), # nn.ReLU(inplace=True), # nn.Linear(512, 2048)) # self.rcnn_bbox_pred_top = nn.Linear(2048, 4) # self.relu_top = nn.ReLU(inplace=True) self.rcnn_bbox_pred = nn.Conv2d(2048, 4, 3, 1, 1) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d # self.rcnn_3d_preds_new = nn.Linear(in_channels, 3 + 4 * self.num_bins) self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # dims self.rcnn_dims_pred = nn.Sequential( *[nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, 3)]) # angle self.rcnn_angle_pred = nn.Sequential(*[ nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, self.num_bins * 2) ]) # angle conf self.rcnn_angle_conf_pred = nn.Sequential(*[ nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, self.num_bins * 2) ])
def init_modules(self): self.feature_extractor = FeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) self.rcnn_pooling_cls = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) self.rcnn_pooling_loc = PSRoIPool(7, 7, 1.0 / 16, 7, 4) self.rcnn_cls_base = nn.Conv2d( in_channels=1024, out_channels=self.n_classes * self.pooling_size * self.pooling_size, kernel_size=1, stride=1, padding=0, bias=False) self.rcnn_bbox_base = nn.Conv2d( in_channels=1024, out_channels=4 * self.pooling_size * self.pooling_size, kernel_size=1, stride=1, padding=0, bias=False) self.rcnn_top = nn.Conv2d(2048, 1024, 1, 1, 0, bias=False) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) # if self.class_agnostic: # self.rcnn_bbox_pred = nn.Linear(2048, 4) # else: # self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.modify_feature_extractor() self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.mask_rcnn_pooling = RoIAlignAvg(14, 14, 1.0 / 16.0) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_kp_loss = functools.partial(F.cross_entropy, reduce=False, ignore_index=-1) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d self.rcnn_3d_pred = nn.Linear(in_channels, 3) # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins) self.rcnn_3d_loss = OrientationLoss(split_loss=True) self.keypoint_predictor = KeyPointPredictor2(1024)
def init_modules(self): self.feature_extractor = feature_extractors_builder.build( self.feature_extractor_config) # self.feature_extractor = ResNetFeatureExtractor( # self.feature_extractor_config) # self.feature_extractor = MobileNetFeatureExtractor( # self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(self.ndin, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(self.ndin, 4) # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1) else: self.rcnn_bbox_pred = nn.Linear(self.ndin, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2, gamma=2, alpha=0.2, auto_alpha=False) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # attention if self.use_self_attention: self.spatial_attention = nn.Conv2d(self.ndin, 1, 3, 1, 1) self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 8.0) self.reduce_pooling = nn.Sequential(nn.Conv2d(512, 1024, 1, 1, 0), nn.ReLU())
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, 2) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) if self.multiple_crop: self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 8.0) # 1x1 fusion self.pooled_feat_fusion = nn.Conv2d(512, 1024, 1, 1, 0) # loss module # if self.enable_cls: # if self.use_focal_loss: # self.rcnn_cls_loss = FocalLoss(2, alpha=0.25, gamma=2) # else: # self.rcnn_cls_loss = functools.partial( # F.cross_entropy, reduce=False) # elif self.enable_iou: self.rcnn_cls_loss = nn.MSELoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def __init__(self, classes, num_layers=101, pretrained=False, class_agnostic=False, b_save_mid_convs=False): super(_fasterRCNN, self).__init__(classes, num_layers, pretrained, class_agnostic) self.classes = classes self.n_classes = len(classes) self.class_agnostic = class_agnostic self.b_save_mid_convs = b_save_mid_convs # loss self.RCNN_loss_cls = 0 self.RCNN_loss_bbox = 0 # define rpn self.RCNN_rpn = _RPN(self.dout_base_model) self.RCNN_proposal_target = _ProposalTargetLayer(self.n_classes) self.RCNN_roi_pool = _RoIPooling(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.RCNN_roi_align = RoIAlignAvg(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0) self.grid_size = cfg.POOLING_SIZE * 2 if cfg.CROP_RESIZE_WITH_MAX_POOL else cfg.POOLING_SIZE self.RCNN_roi_crop = _RoICrop() self.Conv_feat_track = None self.rpn_rois = None if cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.FASTER_RCNN: print('RCNN uses Faster RCNN core.') elif cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.RFCN_LIGHTHEAD: print('RCNN uses RFCN Light Head core.') # The input channel is set mannually since we use resnet101 only. # c_out is set to 10*ps*ps. c_mid is set to 256. self.relu = nn.ReLU() core_depth = cfg.RESNET.GLOBAL_CONTEXT_OUT_DEPTH ctx_size = cfg.RESNET.GLOBAL_CONTEXT_RANGE self.g_ctx = _global_context_layer( 2048, core_depth * cfg.POOLING_SIZE * cfg.POOLING_SIZE, 256, ctx_size) self.RCNN_psroi_pool = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0, cfg.POOLING_SIZE, core_depth) # fc layer for roi-wise prediction. # roi_mid_c in the original paper is 2048. roi_mid_c = 2048 self.fc_roi = nn.Linear( core_depth * cfg.POOLING_SIZE * cfg.POOLING_SIZE, roi_mid_c) elif cfg.RESNET.CORE_CHOICE.USE == cfg.RESNET.CORE_CHOICE.RFCN: print('RCNN uses R-FCN core.') # define extra convolution layers for psroi input. tmp_c_in = 2048 self.rfcn_cls = nn.Conv2d(tmp_c_in, self.n_classes * cfg.POOLING_SIZE * cfg.POOLING_SIZE, kernel_size=1) if self.class_agnostic: self.rfcn_bbox = nn.Conv2d(tmp_c_in, 4 * cfg.POOLING_SIZE * cfg.POOLING_SIZE, kernel_size=1) else: # Need to remove the background class for bbox regression. # Other circumstances are handled by torch.gather op later. self.rfcn_bbox = nn.Conv2d(tmp_c_in, 4 * (self.n_classes) * cfg.POOLING_SIZE * cfg.POOLING_SIZE, kernel_size=1) # define psroi layers self.RCNN_psroi_score = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0, cfg.POOLING_SIZE, self.n_classes) if self.class_agnostic: self.RCNN_psroi_bbox = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0, cfg.POOLING_SIZE, 4) else: self.RCNN_psroi_bbox = PSRoIPool(cfg.POOLING_SIZE, cfg.POOLING_SIZE, 1.0 / 16.0, cfg.POOLING_SIZE, 4 * (self.n_classes)) # define ave_roi_pooling layers. self.ave_pooling_bbox = nn.AvgPool2d(cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE) self.ave_pooling_cls = nn.AvgPool2d(cfg.POOLING_SIZE, stride=cfg.POOLING_SIZE)