def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(2048, self.n_classes * 2048) # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.class_agnostic: # self.bottle_neck = nn.Sequential( # nn.Linear(2048, 512), # nn.BatchNorm2d(512), # nn.ReLU(inplace=True), # nn.Linear(512, 2048)) # self.rcnn_bbox_pred_top = nn.Linear(2048, 4) # self.relu_top = nn.ReLU(inplace=True) self.rcnn_bbox_pred = nn.Conv2d(2048, 4, 3, 1, 1) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): # define the convrelu layers processing input feature map self.rpn_conv = nn.Conv2d(self.in_channels, 512, 3, 1, 1, bias=True) # define bg/fg classifcation score layer self.rpn_cls_score = nn.Conv2d(512, self.nc_score_out, 1, 1, 0) # define anchor box offset prediction layer if self.use_score: bbox_feat_channels = 512 + 2 self.nc_bbox_out /= self.num_anchors else: bbox_feat_channels = 512 self.rpn_bbox_pred = nn.Conv2d(bbox_feat_channels, self.nc_bbox_out, 1, 1, 0) # bbox self.rpn_bbox_loss = nn.modules.loss.SmoothL1Loss(reduce=False) # cls if self.use_focal_loss: self.rpn_cls_loss = FocalLoss(2) else: self.rpn_cls_loss = functools.partial(F.cross_entropy, reduce=False)
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = IoURPNModel(self.rpn_config) self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) in_channels = 2048 self.rcnn_iou = nn.Linear(in_channels, self.n_classes) self.rcnn_iog = nn.Linear(in_channels, self.n_classes) self.rcnn_iod = nn.Linear(in_channels, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_iou_loss = nn.MSELoss(reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d # self.rcnn_3d_preds_new = nn.Linear(in_channels, 3 + 4 * self.num_bins) self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # dims self.rcnn_dims_pred = nn.Sequential( *[nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, 3)]) # angle self.rcnn_angle_pred = nn.Sequential(*[ nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, self.num_bins * 2) ]) # angle conf self.rcnn_angle_conf_pred = nn.Sequential(*[ nn.Linear(in_channels, 256), nn.ReLU(), nn.Linear(256, self.num_bins * 2) ])
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.modify_feature_extractor() self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.mask_rcnn_pooling = RoIAlignAvg(14, 14, 1.0 / 16.0) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(in_channels, 4) else: self.rcnn_bbox_pred = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_kp_loss = functools.partial(F.cross_entropy, reduce=False, ignore_index=-1) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # some 3d statistic # some 2d points projected from 3d self.rcnn_3d_pred = nn.Linear(in_channels, 3) # self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins) # self.rcnn_3d_loss = MultiBinRegLoss(num_bins=self.num_bins) self.rcnn_3d_loss = OrientationLoss(split_loss=True) self.keypoint_predictor = KeyPointPredictor2(1024)
def init_modules(self): self.feature_extractor = feature_extractors_builder.build( self.feature_extractor_config) # self.feature_extractor = ResNetFeatureExtractor( # self.feature_extractor_config) # self.feature_extractor = MobileNetFeatureExtractor( # self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') self.rcnn_cls_pred = nn.Linear(self.ndin, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(self.ndin, 4) # self.rcnn_bbox_pred = nn.Conv2d(2048,4,3,1,1) else: self.rcnn_bbox_pred = nn.Linear(self.ndin, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2, gamma=2, alpha=0.2, auto_alpha=False) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # attention if self.use_self_attention: self.spatial_attention = nn.Conv2d(self.ndin, 1, 3, 1, 1) self.rcnn_pooling2 = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 8.0) self.reduce_pooling = nn.Sequential(nn.Conv2d(512, 1024, 1, 1, 0), nn.ReLU())
def init_modules(self): self.feature_extractor = ResNetFeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) if self.pooling_mode == 'align': self.rcnn_pooling = ROIAlign((self.pooling_size, self.pooling_size), 1.0 / 16.0, 2) elif self.pooling_mode == 'ps': self.rcnn_pooling = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) elif self.pooling_mode == 'psalign': raise NotImplementedError('have not implemented yet!') elif self.pooling_mode == 'deformable_psalign': raise NotImplementedError('have not implemented yet!') # self.rcnn_cls_pred = nn.Conv2d(2048, self.n_classes, 3, 1, 1) self.rcnn_cls_preds = nn.Linear(2048, self.n_classes) if self.reduce: in_channels = 2048 else: in_channels = 2048 * 4 * 4 if self.class_agnostic: self.rcnn_bbox_preds = nn.Linear(in_channels, 4) else: self.rcnn_bbox_preds = nn.Linear(in_channels, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(self.n_classes) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False) # self.rcnn_3d_pred = nn.Linear(c, 3 + 4 + 11 + 2 + 1) if self.class_agnostic_3d: self.rcnn_3d_pred = nn.Linear(in_channels, 3 + 4 * self.num_bins) else: self.rcnn_3d_pred = nn.Linear( in_channels, 3 * self.n_classes + 4 * self.num_bins) # self.rcnn_3d_loss = OrientationLoss(split_loss=True) self.rcnn_3d_loss = MultiBinLoss(num_bins=self.num_bins)
def init_modules(self): self.feature_extractor = FeatureExtractor( self.feature_extractor_config) self.rpn_model = GateRPNModel(self.rpn_config) self.rcnn_pooling = RoIAlignAvg(self.pooling_size, self.pooling_size, 1.0 / 16.0) self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) if self.class_agnostic: self.rcnn_bbox_pred = nn.Linear(2048, 4) else: self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial(F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)
def init_modules(self): self.feature_extractor = PyramidVggnetExtractor( self.feature_extractor_config) # loc layers and conf layers base_feat = self.feature_extractor.base_feat extra_layers = self.feature_extractor.extras_layers loc_layers, conf_layers = self.make_multibox(base_feat, extra_layers) self.loc_layers = loc_layers self.conf_layers = conf_layers # self.rcnn_3d_preds = nn.Linear() # loss layers self.loc_loss = nn.SmoothL1Loss(reduce=False) if self.use_focal_loss: self.conf_loss = FocalLoss( self.n_classes, alpha=0.2, gamma=2, auto_alpha=False) else: self.conf_loss = nn.CrossEntropyLoss(reduce=False)
def init_modules(self): self.feature_extractor = FeatureExtractor( self.feature_extractor_config) self.rpn_model = RPNModel(self.rpn_config) self.rcnn_pooling_cls = PSRoIPool(7, 7, 1.0 / 16, 7, self.n_classes) self.rcnn_pooling_loc = PSRoIPool(7, 7, 1.0 / 16, 7, 4) self.rcnn_cls_base = nn.Conv2d( in_channels=1024, out_channels=self.n_classes * self.pooling_size * self.pooling_size, kernel_size=1, stride=1, padding=0, bias=False) self.rcnn_bbox_base = nn.Conv2d( in_channels=1024, out_channels=4 * self.pooling_size * self.pooling_size, kernel_size=1, stride=1, padding=0, bias=False) self.rcnn_top = nn.Conv2d(2048, 1024, 1, 1, 0, bias=False) # self.rcnn_cls_pred = nn.Linear(2048, self.n_classes) # if self.class_agnostic: # self.rcnn_bbox_pred = nn.Linear(2048, 4) # else: # self.rcnn_bbox_pred = nn.Linear(2048, 4 * self.n_classes) # loss module if self.use_focal_loss: self.rcnn_cls_loss = FocalLoss(2) else: self.rcnn_cls_loss = functools.partial( F.cross_entropy, reduce=False) self.rcnn_bbox_loss = nn.modules.SmoothL1Loss(reduce=False)