def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0), init_cfg=None): super(BBoxHead, self).__init__(init_cfg) assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.debug_imgs = None if init_cfg is None: self.init_cfg = [] if self.with_cls: self.init_cfg += [ dict(type='Normal', std=0.01, override=dict(name='fc_cls')) ] if self.with_reg: self.init_cfg += [ dict(type='Normal', std=0.001, override=dict(name='fc_reg')) ]
def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), stacked_convs=0, feat_channels=256, use_depthwise=False, conv_cfg=None, norm_cfg=None, act_cfg=None, anchor_generator=dict( type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None, init_cfg=dict( type='Xavier', layer='Conv2d', distribution='uniform', bias=0)): super(AnchorHead, self).__init__(init_cfg) self.num_classes = num_classes self.in_channels = in_channels self.stacked_convs = stacked_convs self.feat_channels = feat_channels self.use_depthwise = use_depthwise self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.cls_out_channels = num_classes + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) self.num_anchors = self.anchor_generator.num_base_anchors self._init_layers() self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False
def loss_odm_single(self, odm_cls_score, odm_bbox_pred, anchors, labels, label_weights, bbox_targets, bbox_weights, num_total_samples, cfg): # classification loss labels = labels.reshape(-1) label_weights = label_weights.reshape(-1) odm_cls_score = odm_cls_score.permute(0, 2, 3, 1).reshape( -1, self.cls_out_channels) loss_odm_cls = self.loss_odm_cls(odm_cls_score, labels, label_weights, avg_factor=num_total_samples) # regression loss bbox_targets = bbox_targets.reshape(-1, 5) bbox_weights = bbox_weights.reshape(-1, 5) odm_bbox_pred = odm_bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5) reg_decoded_bbox = cfg.get('reg_decoded_bbox', False) if reg_decoded_bbox: # When the regression loss (e.g. `IouLoss`, `GIouLoss`) # is applied directly on the decoded bounding boxes, it # decodes the already encoded coordinates to absolute format. bbox_coder_cfg = cfg.get('bbox_coder', '') if bbox_coder_cfg == '': bbox_coder_cfg = dict(type='DeltaXYWHBBoxCoder') bbox_coder = build_bbox_coder(bbox_coder_cfg) anchors = anchors.reshape(-1, 5) odm_bbox_pred = bbox_coder.decode(anchors, odm_bbox_pred) loss_odm_bbox = self.loss_odm_bbox(odm_bbox_pred, bbox_targets, bbox_weights, avg_factor=num_total_samples) return loss_odm_cls, loss_odm_bbox
def __init__(self, num_classes, bbox_coder, train_cfg=None, test_cfg=None, pred_layer_cfg=None, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), dir_res_loss=None, size_res_loss=None, semantic_loss=None): super(BRBboxHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.dir_res_loss = build_loss(dir_res_loss) self.size_res_loss = build_loss(size_res_loss) self.semantic_loss = build_loss(semantic_loss) self.bbox_coder = build_bbox_coder(bbox_coder) # Bbox classification and regression self.conv_pred = BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels())
def test_centerpoint_bbox_coder(): bbox_coder_cfg = dict( type='CenterPointBBoxCoder', post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0], max_num=500, score_threshold=0.1, pc_range=[-51.2, -51.2], out_size_factor=4, voxel_size=[0.2, 0.2]) bbox_coder = build_bbox_coder(bbox_coder_cfg) batch_dim = torch.rand([2, 3, 128, 128]) batch_hei = torch.rand([2, 1, 128, 128]) batch_hm = torch.rand([2, 2, 128, 128]) batch_reg = torch.rand([2, 2, 128, 128]) batch_rotc = torch.rand([2, 1, 128, 128]) batch_rots = torch.rand([2, 1, 128, 128]) batch_vel = torch.rand([2, 2, 128, 128]) temp = bbox_coder.decode(batch_hm, batch_rots, batch_rotc, batch_hei, batch_dim, batch_vel, batch_reg, 5) for i in range(len(temp)): assert temp[i]['bboxes'].shape == torch.Size([500, 9]) assert temp[i]['scores'].shape == torch.Size([500]) assert temp[i]['labels'].shape == torch.Size([500])
def __init__(self, *args, anchor_angles=[ 0., ], bbox_coder=dict(type='DeltaXYWHABBoxCoder', target_means=(.0, .0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)), **kargs): super(AnchorHeadRotated, self).__init__(*args, **kargs) self.anchor_angles = anchor_angles self.reg_decoded_bbox = False self.use_vfl = True self.bbox_coder = build_bbox_coder(bbox_coder) self.anchor_generators = [] for anchor_base in self.anchor_base_sizes: self.anchor_generators.append( AnchorGeneratorRotated(anchor_base, self.anchor_scales, self.anchor_ratios, angles=anchor_angles)) self.num_anchors = len(self.anchor_ratios) * \ len(self.anchor_scales) * len(self.anchor_angles) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, anchor_generator=dict(type='AnchorGenerator', scales=[8, 16, 32], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict(type='DeltaXYWHBBoxCoder', clip_border=True, target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)), reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), train_cfg=None, test_cfg=None, init_cfg=dict(type='Normal', layer='Conv2d', std=0.01)): super(AnchorHead, self).__init__(init_cfg) self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) # TODO better way to determine whether sample or not self.sampling = loss_cls['type'] not in [ 'FocalLoss', 'GHMC', 'QualityFocalLoss' ] if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 if self.cls_out_channels <= 0: raise ValueError(f'num_classes={num_classes} is too small') self.reg_decoded_bbox = reg_decoded_bbox self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.anchor_generator = build_anchor_generator(anchor_generator) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] self._init_layers()
def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), anchor_generator=dict(type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.num_classes = num_classes # 类别数 self.in_channels = in_channels # 输入通道对照网络结构图 self.cls_out_channels = num_classes + 1 # add background class,输出通道 self.anchor_generator = build_anchor_generator( anchor_generator) # 得到anchor_generator类 num_anchors = self.anchor_generator.num_base_anchors reg_convs = [] # 回归卷积 cls_convs = [] # 分类卷积 for i in range(len(in_channels)): reg_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * (num_classes + 1), kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) self.bbox_coder = build_bbox_coder(bbox_coder) # 初始化bbox_coder对象 self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False
def __init__(self, in_channels=[128], tasks=None, train_cfg=None, test_cfg=None, bbox_coder=None, common_heads=dict(), loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), loss_bbox=dict(type='L1Loss', reduction='none', loss_weight=0.25), separate_head=dict(type='SeparateHead', init_bias=-2.19, final_kernel=3), share_conv_channel=64, num_heatmap_convs=2, conv_cfg=dict(type='Conv2d'), norm_cfg=dict(type='BN2d'), bias='auto', norm_bbox=True): super(CenterHead, self).__init__() num_classes = [len(t['class_names']) for t in tasks] self.class_names = [t['class_names'] for t in tasks] # Feng Xiang code # code begin # num_attr = [len(t['attr_names']) for t in tasks] # self.attr_names = [t['attr_names'] for t in tasks] # code end self.train_cfg = train_cfg self.test_cfg = test_cfg self.in_channels = in_channels self.num_classes = num_classes self.norm_bbox = norm_bbox self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.bbox_coder = build_bbox_coder(bbox_coder) self.num_anchor_per_locs = [n for n in num_classes] self.fp16_enabled = False # a shared convolution self.shared_conv = ConvModule(in_channels, share_conv_channel, kernel_size=3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias) self.task_heads = nn.ModuleList() for num_cls in num_classes: heads = copy.deepcopy(common_heads) heads.update(dict(heatmap=(num_cls, num_heatmap_convs))) separate_head.update(in_channels=share_conv_channel, heads=heads, num_cls=num_cls) self.task_heads.append(builder.build_head(separate_head))
def __init__( self, with_avg_pool=False, num_shared_fcs=2, roi_feat_size=7, in_channels=256, fc_out_channels=1024, num_classes=15, reg_class_agnostic=False, ratio_thr=0.8, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), fix_coder=dict(type='GVFixCoder'), ratio_coder=dict(type='GVRatioCoder'), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0), loss_fix=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0), loss_ratio=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=16.0), ): super(GVBBoxHead, self).__init__() self.with_avg_pool = with_avg_pool self.num_shared_fcs = num_shared_fcs self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.fc_out_channels = fc_out_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.ratio_thr = ratio_thr self.fp16_enabled = False self.start_bbox_type = 'hbb' self.end_bbox_type = 'poly' self.bbox_coder = build_bbox_coder(bbox_coder) self.fix_coder = build_bbox_coder(fix_coder) self.ratio_coder = build_bbox_coder(ratio_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.loss_fix = build_loss(loss_fix) self.loss_ratio = build_loss(loss_ratio) self._init_layers()
def __init__(self, num_classes, in_channels, feat_channels=256, stacked_convs=4, strides=(4, 8, 16, 32, 64), dcn_on_last_conv=False, conv_bias='auto', loss_cls=dict(type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox=dict(type='IoULoss', loss_weight=1.0), bbox_coder=dict(type='DistancePointBBoxCoder'), conv_cfg=None, norm_cfg=None, train_cfg=None, test_cfg=None, init_cfg=dict(type='Normal', layer='Conv2d', std=0.01, override=dict(type='Normal', name='conv_cls', std=0.01, bias_prob=0.01))): super(AnchorFreeHead, self).__init__(init_cfg) self.num_classes = num_classes self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 self.in_channels = in_channels self.feat_channels = feat_channels self.stacked_convs = stacked_convs self.strides = strides self.dcn_on_last_conv = dcn_on_last_conv assert conv_bias == 'auto' or isinstance(conv_bias, bool) self.conv_bias = conv_bias self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.bbox_coder = build_bbox_coder(bbox_coder) self.prior_generator = MlvlPointGenerator(strides) # In order to keep a more general interface and be consistent with # anchor_head. We can think of point like one anchor self.num_base_priors = self.prior_generator.num_base_priors[0] self.train_cfg = train_cfg self.test_cfg = test_cfg self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.fp16_enabled = False self._init_layers()
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, start_bbox_type='hbb', end_bbox_type='hbb', reg_dim=None, roi_feat_size=7, in_channels=256, num_classes=15, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(OBBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.start_bbox_type = start_bbox_type self.end_bbox_type = end_bbox_type assert self.start_bbox_type in ['hbb', 'obb', 'poly'] assert self.end_bbox_type in ['hbb', 'obb', 'poly'] self.reg_dim = get_bbox_dim(self.end_bbox_type) \ if reg_dim is None else reg_dim self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = self.reg_dim if reg_class_agnostic else \ self.reg_dim * num_classes self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.debug_imgs = None
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, score_type='normal', head_config=[True,False,False], init_type='normal', bbox_coder=dict( type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict( type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(AttentionLogoBBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = False self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) # 回归score的方式 self.score_type = score_type self.head_config = head_config self.init_type = init_type in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.debug_imgs = None
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', clip_border=True, target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(LogoDCBBoxHead, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = False self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes self.debug_imgs = None self.file_to_style = {} self.data_path = '/data/zhaozhiyuan/tb_variation/VOCdevkit_all' self.anno_path = os.path.join(self.data_path, 'VOC2007', 'Annotations') """init style and class index""" for anno in os.listdir(self.anno_path): anno_file = ET.parse(os.path.join(self.anno_path, anno)) name = anno_file.find('object').find('name').text style = anno_file.find('object').find('style').text self.file_to_style[anno.split('.')[0]] = style
def __init__(self, num_classes, bbox_coder, train_cfg=None, test_cfg=None, vote_module_cfg=None, vote_aggregation_cfg=None, pred_layer_cfg=None, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, center_loss_mse=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, semantic_loss=None, iou_loss=None): super(VoteHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = vote_module_cfg['gt_per_seed'] self.num_proposal = vote_aggregation_cfg['num_point'] self.objectness_loss = build_loss(objectness_loss) self.dir_res_loss = build_loss(dir_res_loss) self.dir_class_loss = build_loss(dir_class_loss) self.size_res_loss = build_loss(size_res_loss) if size_class_loss is not None: self.size_class_loss = build_loss(size_class_loss) if semantic_loss is not None: self.semantic_loss = build_loss(semantic_loss) if iou_loss is not None: self.iou_loss = build_loss(iou_loss) else: self.iou_loss = None if center_loss is not None: self.center_loss = build_loss(center_loss) if center_loss_mse is not None: self.center_loss_mse = build_loss(center_loss_mse) self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins self.vote_module = VoteModule(**vote_module_cfg) self.vote_aggregation = build_sa_module(vote_aggregation_cfg) self.fp16_enabled = False # Bbox classification and regression self.conv_pred = BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels())
def __init__(self, loss_weight, box_coder=None): super(Boxes3dDecodeLoss, self).__init__() self.weight = loss_weight # assert loss_type in ["smooth_l1","l1","balanced_l1"],"loss type {} is not support".format(loss_type) # if loss_type=="smooth_l1": # self.loss_fun=smooth_l1_loss # elif loss_type=="l1": # self.loss_fun=F.l1_loss # elif loss_type=="balanced_l1": # self.loss_fun=balanced_l1_loss # self.box_coder=box_coder self.box_coder = build_bbox_coder(box_coder)
def __init__(self, roi_feat_size=7, in_channels=256, num_convs=4, num_fcs=2, reg_num=2, conv_out_channels=256, fc_out_channels=1024, offset_coordinate='rectangle', offset_coder=dict( type='DeltaXYOffsetCoder', target_means=[0.0, 0.0], target_stds=[0.5, 0.5]), reg_decoded_offset=False, conv_cfg=None, norm_cfg=None, loss_offset=dict(type='MSELoss', loss_weight=1.0)): super(OffsetHead, self).__init__() self.in_channels = in_channels self.conv_out_channels = conv_out_channels self.fc_out_channels = fc_out_channels self.offset_coordinate = offset_coordinate self.reg_decoded_offset = reg_decoded_offset self.reg_num = reg_num self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.offset_coder = build_bbox_coder(offset_coder) self.loss_offset = build_loss(loss_offset) self.convs = nn.ModuleList() for i in range(num_convs): in_channels = (self.in_channels if i == 0 else self.conv_out_channels) self.convs.append( Conv2d( in_channels, self.conv_out_channels, 3, padding=1)) roi_feat_size = _pair(roi_feat_size) roi_feat_area = roi_feat_size[0] * roi_feat_size[1] self.fcs = nn.ModuleList() for i in range(num_fcs): in_channels = ( self.conv_out_channels * roi_feat_area if i == 0 else self.fc_out_channels) self.fcs.append(nn.Linear(in_channels, self.fc_out_channels)) self.fc_offset = nn.Linear(self.fc_out_channels, self.reg_num) self.relu = nn.ReLU() self.loss_offset = build_loss(loss_offset)
def __init__(self, C, in_channels, feat_channels=256, anchor_generator=dict(type='AnchorGenerator', scales=[8, 16, 32], ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=(.0, .0, .0, .0), target_stds=(1.0, 1.0, 1.0, 1.0)), reg_decoded_bbox=False, background_label=None, FL=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), SmoothL1=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0), train_cfg=None, test_cfg=None): super(MIAODHead, self).__init__() if train_cfg is not None: self.param_lambda = train_cfg.param_lambda self.in_channels = in_channels self.C = C self.feat_channels = feat_channels self.use_sigmoid_cls = FL.get('use_sigmoid', False) # TODO better way to determine whether sample or not self.sampling = FL['type'] not in ['FocalLoss', 'GHMC', 'QualityFocalLoss'] if self.use_sigmoid_cls: self.cls_out_channels = C else: self.cls_out_channels = C + 1 if self.cls_out_channels <= 0: raise ValueError(f'C={C} is too small') self.reg_decoded_bbox = reg_decoded_bbox self.background_label = (C if background_label is None else background_label) # background_label should be either 0 or C assert (self.background_label == 0 or self.background_label == C) self.bbox_coder = build_bbox_coder(bbox_coder) self.FL = build_loss(FL) self.SmoothL1 = build_loss(SmoothL1) self.l_imgcls = nn.BCELoss() self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.anchor_generator = build_anchor_generator(anchor_generator) # usually the numbers of anchors for each level are the same # except SSD detectors self.N = self.anchor_generator.num_base_anchors[0] self._init_layers()
def __init__(self, C=20, in_channels=(512, 1024, 512, 256, 256, 256), anchor_generator=dict(type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), background_label=20, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]), reg_decoded_bbox=False, train_cfg=None, test_cfg=None): super(MIAODHead, self).__init__() if train_cfg is not None: self.param_lambda = train_cfg.param_lambda self.in_channels = in_channels self.C = C self.cls_out_channels = C + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) N = self.anchor_generator.num_base_anchors self.l_imgcls = nn.BCELoss() f_r_convs = [] f_1_convs = [] f_2_convs = [] f_mil_convs = [] for i in range(len(in_channels)): f_r_convs.append(nn.Conv2d(in_channels[i], N[i] * 4, kernel_size=3, padding=1)) f_1_convs.append(nn.Conv2d(in_channels[i], N[i] * (C + 1), kernel_size=3, padding=1)) f_2_convs.append(nn.Conv2d(in_channels[i], N[i] * (C + 1), kernel_size=3, padding=1)) f_mil_convs.append(nn.Conv2d(in_channels[i], N[i] * (C + 1), kernel_size=3, padding=1)) self.f_r_convs = nn.ModuleList(f_r_convs) self.f_1_convs = nn.ModuleList(f_1_convs) self.f_2_convs = nn.ModuleList(f_2_convs) self.f_mil_convs = nn.ModuleList(f_mil_convs) self.background_label = (C if background_label is None else background_label) # background_label should be either 0 or C assert (self.background_label == 0 or self.background_label == C) self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False
def __init__(self, with_avg_pool=False, with_cls=True, with_reg=True, roi_feat_size=7, in_channels=256, num_classes=80, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, reg_decoded_bbox=False, loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)): super(BBoxHeadBN, self).__init__() assert with_cls or with_reg self.with_avg_pool = with_avg_pool self.with_cls = with_cls self.with_reg = with_reg self.roi_feat_size = _pair(roi_feat_size) self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1] self.in_channels = in_channels self.num_classes = num_classes self.reg_class_agnostic = reg_class_agnostic self.reg_decoded_bbox = reg_decoded_bbox self.fp16_enabled = False self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) in_channels = self.in_channels if self.with_avg_pool: self.avg_pool = nn.AvgPool2d(self.roi_feat_size) else: in_channels *= self.roi_feat_area if self.with_cls: # need to add background class self.fc_cls = nn.Linear(in_channels, num_classes + 1) if self.with_reg: out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes # self.fc_reg = nn.Linear(in_channels, out_dim_reg) self.fc_reg = nn.Sequential( nn.Linear(self.in_channels, out_dim_reg), nn.BatchNorm1d(4)) self.debug_imgs = None
def __init__(self, anchor_generator, in_channels, kernel_size=3, norm_cfg=dict(type='BN'), weighted_sum=False, bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[1., 1., 1., 1.]), loss_cls=dict(type='CrossEntropyLoss', reduction='sum', loss_weight=1.0), loss_bbox=dict(type='L1Loss', reduction='sum', loss_weight=1.2), train_cfg=None, test_cfg=None, init_cfg=None, *args, **kwargs): super(SiameseRPNHead, self).__init__(init_cfg) self.anchor_generator = build_prior_generator(anchor_generator) self.bbox_coder = build_bbox_coder(bbox_coder) self.train_cfg = train_cfg self.test_cfg = test_cfg self.assigner = build_assigner(self.train_cfg.assigner) self.sampler = build_sampler(self.train_cfg.sampler) self.fp16_enabled = False self.cls_heads = nn.ModuleList() self.reg_heads = nn.ModuleList() for i in range(len(in_channels)): self.cls_heads.append( CorrelationHead(in_channels[i], in_channels[i], 2 * self.anchor_generator.num_base_anchors[0], kernel_size, norm_cfg)) self.reg_heads.append( CorrelationHead(in_channels[i], in_channels[i], 4 * self.anchor_generator.num_base_anchors[0], kernel_size, norm_cfg)) self.weighted_sum = weighted_sum if self.weighted_sum: self.cls_weight = nn.Parameter(torch.ones(len(in_channels))) self.reg_weight = nn.Parameter(torch.ones(len(in_channels))) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox)
def __init__(self, num_classes, cls_in_channels=256, reg_in_channels=256, roi_feat_size=7, reg_feat_up_ratio=2, reg_pre_kernel=3, reg_post_kernel=3, reg_pre_num=2, reg_post_num=1, cls_out_channels=1024, reg_offset_out_channels=256, reg_cls_out_channels=256, num_cls_fcs=1, num_reg_fcs=0, reg_class_agnostic=True, norm_cfg=None, bbox_coder=dict( type='BucketingBBoxCoder', num_buckets=14, scale_factor=1.7), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox_reg=dict( type='SmoothL1Loss', beta=0.1, loss_weight=1.0)): super(SABLHead, self).__init__() self.cls_in_channels = cls_in_channels self.reg_in_channels = reg_in_channels self.roi_feat_size = roi_feat_size self.reg_feat_up_ratio = int(reg_feat_up_ratio) self.num_buckets = bbox_coder['num_buckets'] assert self.reg_feat_up_ratio // 2 >= 1 self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio assert self.up_reg_feat_size == bbox_coder['num_buckets'] self.reg_pre_kernel = reg_pre_kernel self.reg_post_kernel = reg_post_kernel self.reg_pre_num = reg_pre_num self.reg_post_num = reg_post_num self.num_classes = num_classes self.cls_out_channels = cls_out_channels self.reg_offset_out_channels = reg_offset_out_channels self.reg_cls_out_channels = reg_cls_out_channels self.num_cls_fcs = num_cls_fcs self.num_reg_fcs = num_reg_fcs self.reg_class_agnostic = reg_class_agnostic assert self.reg_class_agnostic self.norm_cfg = norm_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.cls_fcs = self._add_fc_branch(self.num_cls_fcs, self.cls_in_channels, self.roi_feat_size, self.cls_out_channels) self.side_num = int(np.ceil(self.num_buckets / 2)) if self.reg_feat_up_ratio > 1: self.upsample_x = nn.ConvTranspose1d( reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.upsample_y = nn.ConvTranspose1d( reg_in_channels, reg_in_channels, self.reg_feat_up_ratio, stride=self.reg_feat_up_ratio) self.reg_pre_convs = nn.ModuleList() for i in range(self.reg_pre_num): reg_pre_conv = ConvModule( reg_in_channels, reg_in_channels, kernel_size=reg_pre_kernel, padding=reg_pre_kernel // 2, norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_pre_convs.append(reg_pre_conv) self.reg_post_conv_xs = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_x = ConvModule( reg_in_channels, reg_in_channels, kernel_size=(1, reg_post_kernel), padding=(0, reg_post_kernel // 2), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_xs.append(reg_post_conv_x) self.reg_post_conv_ys = nn.ModuleList() for i in range(self.reg_post_num): reg_post_conv_y = ConvModule( reg_in_channels, reg_in_channels, kernel_size=(reg_post_kernel, 1), padding=(reg_post_kernel // 2, 0), norm_cfg=norm_cfg, act_cfg=dict(type='ReLU')) self.reg_post_conv_ys.append(reg_post_conv_y) self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1) self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1) self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1) self.relu = nn.ReLU(inplace=True) self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_cls_out_channels) self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs, self.reg_in_channels, 1, self.reg_offset_out_channels) self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1) self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)
def __init__(self, num_classes, seg_in_channels, part_in_channels, seg_conv_channels=None, part_conv_channels=None, merge_conv_channels=None, down_conv_channels=None, shared_fc_channels=None, cls_channels=None, reg_channels=None, dropout_ratio=0.1, roi_feat_size=14, with_corner_loss=True, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='none', loss_weight=1.0)): super(PartA2BboxHead, self).__init__() self.num_classes = num_classes self.with_corner_loss = with_corner_loss self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_bbox = build_loss(loss_bbox) self.loss_cls = build_loss(loss_cls) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) assert down_conv_channels[-1] == shared_fc_channels[0] # init layers part_channel_last = part_in_channels part_conv = [] for i, channel in enumerate(part_conv_channels): part_conv.append( make_sparse_convmodule(part_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key=f'rcnn_part{i}', conv_type='SubMConv3d')) part_channel_last = channel self.part_conv = spconv.SparseSequential(*part_conv) seg_channel_last = seg_in_channels seg_conv = [] for i, channel in enumerate(seg_conv_channels): seg_conv.append( make_sparse_convmodule(seg_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key=f'rcnn_seg{i}', conv_type='SubMConv3d')) seg_channel_last = channel self.seg_conv = spconv.SparseSequential(*seg_conv) self.conv_down = spconv.SparseSequential() merge_conv_channel_last = part_channel_last + seg_channel_last merge_conv = [] for i, channel in enumerate(merge_conv_channels): merge_conv.append( make_sparse_convmodule(merge_conv_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key='rcnn_down0')) merge_conv_channel_last = channel down_conv_channel_last = merge_conv_channel_last conv_down = [] for i, channel in enumerate(down_conv_channels): conv_down.append( make_sparse_convmodule(down_conv_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key='rcnn_down1')) down_conv_channel_last = channel self.conv_down.add_module('merge_conv', spconv.SparseSequential(*merge_conv)) self.conv_down.add_module( 'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2)) self.conv_down.add_module('down_conv', spconv.SparseSequential(*conv_down)) shared_fc_list = [] pool_size = roi_feat_size // 2 pre_channel = shared_fc_channels[0] * pool_size**3 for k in range(1, len(shared_fc_channels)): shared_fc_list.append( ConvModule(pre_channel, shared_fc_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = shared_fc_channels[k] if k != len(shared_fc_channels) - 1 and dropout_ratio > 0: shared_fc_list.append(nn.Dropout(dropout_ratio)) self.shared_fc = nn.Sequential(*shared_fc_list) # Classification layer channel_in = shared_fc_channels[-1] cls_channel = 1 cls_layers = [] pre_channel = channel_in for k in range(0, len(cls_channels)): cls_layers.append( ConvModule(pre_channel, cls_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = cls_channels[k] cls_layers.append( ConvModule(pre_channel, cls_channel, 1, padding=0, conv_cfg=conv_cfg, act_cfg=None)) if dropout_ratio >= 0: cls_layers.insert(1, nn.Dropout(dropout_ratio)) self.conv_cls = nn.Sequential(*cls_layers) # Regression layer reg_layers = [] pre_channel = channel_in for k in range(0, len(reg_channels)): reg_layers.append( ConvModule(pre_channel, reg_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = reg_channels[k] reg_layers.append( ConvModule(pre_channel, self.bbox_coder.code_size, 1, padding=0, conv_cfg=conv_cfg, act_cfg=None)) if dropout_ratio >= 0: reg_layers.insert(1, nn.Dropout(dropout_ratio)) self.conv_reg = nn.Sequential(*reg_layers) self.init_weights()
def __init__( self, num_classes, in_channels, feat_channels=256, approx_anchor_generator=dict( type='AnchorGenerator', octave_base_scale=8, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]), square_anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], scales=[8], strides=[4, 8, 16, 32, 64]), anchor_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0] ), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0] ), reg_decoded_bbox=False, deform_groups=4, loc_filter_thr=0.01, train_cfg=None, test_cfg=None, loss_loc=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0), init_cfg=dict(type='Normal', layer='Conv2d', std=0.01, override=dict(type='Normal', name='conv_loc', std=0.01, bias_prob=0.01))): # yapf: disable super(AnchorHead, self).__init__(init_cfg) self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.deform_groups = deform_groups self.loc_filter_thr = loc_filter_thr # build approx_anchor_generator and square_anchor_generator assert (approx_anchor_generator['octave_base_scale'] == square_anchor_generator['scales'][0]) assert (approx_anchor_generator['strides'] == square_anchor_generator['strides']) self.approx_anchor_generator = build_prior_generator( approx_anchor_generator) self.square_anchor_generator = build_prior_generator( square_anchor_generator) self.approxs_per_octave = self.approx_anchor_generator \ .num_base_priors[0] self.reg_decoded_bbox = reg_decoded_bbox # one anchor per location self.num_base_priors = self.square_anchor_generator.num_base_priors[0] self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.loc_focal_loss = loss_loc['type'] in ['FocalLoss'] self.sampling = loss_cls['type'] not in ['FocalLoss'] self.ga_sampling = train_cfg is not None and hasattr( train_cfg, 'ga_sampler') if self.use_sigmoid_cls: self.cls_out_channels = self.num_classes else: self.cls_out_channels = self.num_classes + 1 # build bbox_coder self.anchor_coder = build_bbox_coder(anchor_coder) self.bbox_coder = build_bbox_coder(bbox_coder) # build losses self.loss_loc = build_loss(loss_loc) self.loss_shape = build_loss(loss_shape) self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.ga_assigner = build_assigner(self.train_cfg.ga_assigner) if self.ga_sampling: ga_sampler_cfg = self.train_cfg.ga_sampler else: ga_sampler_cfg = dict(type='PseudoSampler') self.ga_sampler = build_sampler(ga_sampler_cfg, context=self) self.fp16_enabled = False self._init_layers()
def test_partial_bin_based_box_coder(): box_coder_cfg = dict(type='PartialBinBasedBBoxCoder', num_sizes=10, num_dir_bins=12, with_rot=True, mean_sizes=[[2.114256, 1.620300, 0.927272], [0.791118, 1.279516, 0.718182], [0.923508, 1.867419, 0.845495], [0.591958, 0.552978, 0.827272], [0.699104, 0.454178, 0.75625], [0.69519, 1.346299, 0.736364], [0.528526, 1.002642, 1.172878], [0.500618, 0.632163, 0.683424], [0.404671, 1.071108, 1.688889], [0.76584, 1.398258, 0.472728]]) box_coder = build_bbox_coder(box_coder_cfg) # test eocode gt_bboxes = DepthInstance3DBoxes( [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486], [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030], [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]]) gt_labels = torch.tensor([0, 1, 2]) center_target, size_class_target, size_res_target, dir_class_target, \ dir_res_target = box_coder.encode(gt_bboxes, gt_labels) expected_center_target = torch.tensor([[0.8308, 4.1168, -0.2413], [2.3002, 4.8149, -0.7687], [-1.1477, 1.8090, -0.1444]]) expected_size_class_target = torch.tensor([0, 1, 2]) expected_size_res_target = torch.tensor([[0.1350, 0.2241, 0.9972], [-0.2193, -0.4166, 0.2328], [-0.2270, -0.3401, 1.2108]]) expected_dir_class_target = torch.tensor([3, 3, 0]) expected_dir_res_target = torch.tensor([0.0778, 0.0322, 0.0552]) assert torch.allclose(center_target, expected_center_target, atol=1e-4) assert torch.all(size_class_target == expected_size_class_target) assert torch.allclose(size_res_target, expected_size_res_target, atol=1e-4) assert torch.all(dir_class_target == expected_dir_class_target) assert torch.allclose(dir_res_target, expected_dir_res_target, atol=1e-4) # test decode center = torch.tensor([[[0.8014, 3.4134, -0.6133], [2.6375, 8.4191, 2.0438], [4.2017, 5.2504, -0.7851], [-1.0088, 5.4107, 1.6293], [1.4837, 4.0268, 0.6222]]]) size_class = torch.tensor([[[ -1.0061, -2.2788, 1.1322, -4.4380, -11.0526, -2.8113, -2.0642, -7.5886, -4.8627, -5.0437 ], [ -2.2058, -0.3527, -1.9976, 0.8815, -2.7980, -1.9053, -0.5097, -2.0232, -1.4242, -4.1192 ], [ -1.4783, -0.1009, -1.1537, 0.3052, -4.3147, -2.6529, 0.2729, -0.3755, -2.6479, -3.7548 ], [ -6.1809, -3.5024, -8.3273, 1.1252, -4.3315, -7.8288, -4.6091, -5.8153, 0.7480, -10.1396 ], [ -9.0424, -3.7883, -6.0788, -1.8855, -10.2493, -9.7164, -1.0658, -4.1713, 1.1173, -10.6204 ]]]) size_res = torch.tensor([[[[-9.8976e-02, -5.2152e-01, -7.6421e-02], [1.4593e-01, 5.6099e-01, 8.9421e-02], [5.1481e-02, 3.9280e-01, 1.2705e-01], [3.6869e-01, 7.0558e-01, 1.4647e-01], [4.7683e-01, 3.3644e-01, 2.3481e-01], [8.7346e-02, 8.4987e-01, 3.3265e-01], [2.1393e-01, 8.5585e-01, 9.8948e-02], [7.8530e-02, 5.9694e-02, -8.7211e-02], [1.8551e-01, 1.1308e+00, -5.1864e-01], [3.6485e-01, 7.3757e-01, 1.5264e-01]], [[-9.5593e-01, -5.0455e-01, 1.9554e-01], [-1.0870e-01, 1.8025e-01, 1.0228e-01], [-8.2882e-02, -4.3771e-01, 9.2135e-02], [-4.0840e-02, -5.9841e-02, 1.1982e-01], [7.3448e-02, 5.2045e-02, 1.7301e-01], [-4.0440e-02, 4.9532e-02, 1.1266e-01], [3.5857e-02, 1.3564e-02, 1.0212e-01], [-1.0407e-01, -5.9321e-02, 9.2622e-02], [7.4691e-03, 9.3080e-02, -4.4077e-01], [-6.0121e-02, -1.3381e-01, -6.8083e-02]], [[-9.3970e-01, -9.7823e-01, -5.1075e-02], [-1.2843e-01, -1.8381e-01, 7.1327e-02], [-1.2247e-01, -8.1115e-01, 3.6495e-02], [4.9154e-02, -4.5440e-02, 8.9520e-02], [1.5653e-01, 3.5990e-02, 1.6414e-01], [-5.9621e-02, 4.9357e-03, 1.4264e-01], [8.5235e-04, -1.0030e-01, -3.0712e-02], [-3.7255e-02, 2.8996e-02, 5.5545e-02], [3.9298e-02, -4.7420e-02, -4.9147e-01], [-1.1548e-01, -1.5895e-01, -3.9155e-02]], [[-1.8725e+00, -7.4102e-01, 1.0524e+00], [-3.3210e-01, 4.7828e-02, -3.2666e-02], [-2.7949e-01, 5.5541e-02, -1.0059e-01], [-8.5533e-02, 1.4870e-01, -1.6709e-01], [3.8283e-01, 2.6609e-01, 2.1361e-01], [-4.2156e-01, 3.2455e-01, 6.7309e-01], [-2.4336e-02, -8.3366e-02, 3.9913e-01], [8.2142e-03, 4.8323e-02, -1.5247e-01], [-4.8142e-02, -3.0074e-01, -1.6829e-01], [1.3274e-01, -2.3825e-01, -1.8127e-01]], [[-1.2576e+00, -6.1550e-01, 7.9430e-01], [-4.7222e-01, 1.5634e+00, -5.9460e-02], [-3.5367e-01, 1.3616e+00, -1.6421e-01], [-1.6611e-02, 2.4231e-01, -9.6188e-02], [5.4486e-01, 4.6833e-01, 5.1151e-01], [-6.1755e-01, 1.0292e+00, 1.2458e+00], [-6.8152e-02, 2.4786e-01, 9.5088e-01], [-4.8745e-02, 1.5134e-01, -9.9962e-02], [2.4485e-03, -7.5991e-02, 1.3545e-01], [4.1608e-01, -1.2093e-01, -3.1643e-01]]]]) dir_class = torch.tensor([[[ -1.0230, -5.1965, -5.2195, 2.4030, -2.7661, -7.3399, -1.1640, -4.0630, -5.2940, 0.8245, -3.1869, -6.1743 ], [ -1.9503, -1.6940, -0.8716, -1.1494, -0.8196, 0.2862, -0.2921, -0.7894, -0.2481, -0.9916, -1.4304, -1.2466 ], [ -1.7435, -1.2043, -0.1265, 0.5083, -0.0717, -0.9560, -1.6171, -2.6463, -2.3863, -2.1358, -1.8812, -2.3117 ], [ -1.9282, 0.3792, -1.8426, -1.4587, -0.8582, -3.4639, -3.2133, -3.7867, -7.6781, -6.4459, -6.2455, -5.4797 ], [ -3.1869, 0.4456, -0.5824, 0.9994, -1.0554, -8.4232, -7.7019, -7.1382, -10.2724, -7.8229, -8.1860, -8.6194 ]]]) dir_res = torch.tensor( [[[ 1.1022e-01, -2.3750e-01, 2.0381e-01, 1.2177e-01, -2.8501e-01, 1.5351e-01, 1.2218e-01, -2.0677e-01, 1.4468e-01, 1.1593e-01, -2.6864e-01, 1.1290e-01 ], [ -1.5788e-02, 4.1538e-02, -2.2857e-04, -1.4011e-02, 4.2560e-02, -3.1186e-03, -5.0343e-02, 6.8110e-03, -2.6728e-02, -3.2781e-02, 3.6889e-02, -1.5609e-03 ], [ 1.9004e-02, 5.7105e-03, 6.0329e-02, 1.3074e-02, -2.5546e-02, -1.1456e-02, -3.2484e-02, -3.3487e-02, 1.6609e-03, 1.7095e-02, 1.2647e-05, 2.4814e-02 ], [ 1.4482e-01, -6.3083e-02, 5.8307e-02, 9.1396e-02, -8.4571e-02, 4.5890e-02, 5.6243e-02, -1.2448e-01, -9.5244e-02, 4.5746e-02, -1.7390e-02, 9.0267e-02 ], [ 1.8065e-01, -2.0078e-02, 8.5401e-02, 1.0784e-01, -1.2495e-01, 2.2796e-02, 1.1310e-01, -8.4364e-02, -1.1904e-01, 6.1180e-02, -1.8109e-02, 1.1229e-01 ]]]) bbox_out = dict(center=center, size_class=size_class, size_res=size_res, dir_class=dir_class, dir_res=dir_res) bbox3d = box_coder.decode(bbox_out) expected_bbox3d = torch.tensor( [[[0.8014, 3.4134, -0.6133, 0.9750, 2.2602, 0.9725, 1.6926], [2.6375, 8.4191, 2.0438, 0.5511, 0.4931, 0.9471, 2.6149], [4.2017, 5.2504, -0.7851, 0.6411, 0.5075, 0.9168, 1.5839], [-1.0088, 5.4107, 1.6293, 0.5064, 0.7017, 0.6602, 0.4605], [1.4837, 4.0268, 0.6222, 0.4071, 0.9951, 1.8243, 1.6786]]]) assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4) # test split_pred box_preds = torch.rand(2, 79, 256) base_xyz = torch.rand(2, 256, 3) results = box_coder.split_pred(box_preds, base_xyz) obj_scores = results['obj_scores'] center = results['center'] dir_class = results['dir_class'] dir_res_norm = results['dir_res_norm'] dir_res = results['dir_res'] size_class = results['size_class'] size_res_norm = results['size_res_norm'] size_res = results['size_res'] sem_scores = results['sem_scores'] assert obj_scores.shape == torch.Size([2, 256, 2]) assert center.shape == torch.Size([2, 256, 3]) assert dir_class.shape == torch.Size([2, 256, 12]) assert dir_res_norm.shape == torch.Size([2, 256, 12]) assert dir_res.shape == torch.Size([2, 256, 12]) assert size_class.shape == torch.Size([2, 256, 10]) assert size_res_norm.shape == torch.Size([2, 256, 10, 3]) assert size_res.shape == torch.Size([2, 256, 10, 3]) assert sem_scores.shape == torch.Size([2, 256, 10])
def __init__(self, num_classes=80, in_channels=(512, 1024, 512, 256, 256, 256), anchor_generator=dict(type='SSDAnchorGenerator', scale_major=False, input_size=300, strides=[8, 16, 32, 64, 100, 300], ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]), basesize_ratio_range=(0.1, 0.9)), background_label=None, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0], ), reg_decoded_bbox=False, depthwise_heads=False, depthwise_heads_activations='relu6', loss_balancing=False, train_cfg=None, test_cfg=None): super(AnchorHead, self).__init__() self.num_classes = num_classes self.in_channels = in_channels self.cls_out_channels = num_classes + 1 # add background class self.anchor_generator = build_anchor_generator(anchor_generator) num_anchors = self.anchor_generator.num_base_anchors reg_convs = [] cls_convs = [] for i in range(len(in_channels)): if depthwise_heads: activation_class = { 'relu': nn.ReLU, 'relu6': nn.ReLU6, }[depthwise_heads_activations] reg_convs.append( nn.Sequential( nn.Conv2d(in_channels[i], in_channels[i], kernel_size=3, padding=1, groups=in_channels[i]), nn.BatchNorm2d(in_channels[i]), activation_class(inplace=True), nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=1, padding=0))) cls_convs.append( nn.Sequential( nn.Conv2d(in_channels[i], in_channels[i], kernel_size=3, padding=1, groups=in_channels[i]), nn.BatchNorm2d(in_channels[i]), activation_class(inplace=True), nn.Conv2d(in_channels[i], num_anchors[i] * (num_classes + 1), kernel_size=1, padding=0))) else: reg_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * 4, kernel_size=3, padding=1)) cls_convs.append( nn.Conv2d(in_channels[i], num_anchors[i] * (num_classes + 1), kernel_size=3, padding=1)) self.reg_convs = nn.ModuleList(reg_convs) self.cls_convs = nn.ModuleList(cls_convs) self.background_label = (num_classes if background_label is None else background_label) # background_label should be either 0 or num_classes assert (self.background_label == 0 or self.background_label == num_classes) self.bbox_coder = build_bbox_coder(bbox_coder) self.reg_decoded_bbox = reg_decoded_bbox self.use_sigmoid_cls = False self.cls_focal_loss = False self.train_cfg = train_cfg self.test_cfg = test_cfg # set sampling=False for archor_target self.sampling = False if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # SSD sampling=False so use PseudoSampler sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.loss_balancing = loss_balancing if self.loss_balancing: self.loss_weights = torch.nn.Parameter(torch.FloatTensor(2)) for i in range(2): self.loss_weights.data[i] = 0.
def __init__(self, num_classes, in_channels, out_channels=(1024, 512, 256), anchor_generator=dict(type='YOLOAnchorGenerator', base_sizes=[[(116, 90), (156, 198), (373, 326)], [(30, 61), (62, 45), (59, 119)], [(10, 13), (16, 30), (33, 23)]], strides=[32, 16, 8]), bbox_coder=dict(type='YOLOBBoxCoder'), featmap_strides=[32, 16, 8], one_hot_smoother=0., conv_cfg=None, norm_cfg=dict(type='BN', requires_grad=True), act_cfg=dict(type='LeakyReLU', negative_slope=0.1), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_conf=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_xy=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_wh=dict(type='MSELoss', loss_weight=1.0), train_cfg=None, test_cfg=None, init_cfg=dict(type='Normal', std=0.01, override=dict(name='convs_pred'))): super(YOLOV3Head, self).__init__(init_cfg) # Check params assert (len(in_channels) == len(out_channels) == len(featmap_strides)) self.num_classes = num_classes self.in_channels = in_channels self.out_channels = out_channels self.featmap_strides = featmap_strides self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) if hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self.one_hot_smoother = one_hot_smoother self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.act_cfg = act_cfg self.bbox_coder = build_bbox_coder(bbox_coder) self.anchor_generator = build_anchor_generator(anchor_generator) self.loss_cls = build_loss(loss_cls) self.loss_conf = build_loss(loss_conf) self.loss_xy = build_loss(loss_xy) self.loss_wh = build_loss(loss_wh) # usually the numbers of anchors for each level are the same # except SSD detectors self.num_anchors = self.anchor_generator.num_base_anchors[0] assert len( self.anchor_generator.num_base_anchors) == len(featmap_strides) self._init_layers()
def __init__(self, num_classes, in_channels, stacked_convs=4, feat_channels=256, approx_anchor_generator=dict( type='AnchorGenerator', octave_base_scale=4, scales_per_octave=3, ratios=[0.5, 1.0, 2.0], strides=[8, 16, 32, 64, 128]), square_anchor_generator=dict( type='AnchorGenerator', ratios=[1.0], scales=[4], strides=[8, 16, 32, 64, 128]), conv_cfg=None, norm_cfg=None, bbox_coder=dict( type='BucketingBBoxCoder', num_buckets=14, scale_factor=3.0), reg_decoded_bbox=False, background_label=None, train_cfg=None, test_cfg=None, loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=1.0), loss_bbox_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.5), loss_bbox_reg=dict( type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)): super(SABLRetinaHead, self).__init__() self.in_channels = in_channels self.num_classes = num_classes self.feat_channels = feat_channels self.num_buckets = bbox_coder['num_buckets'] self.side_num = int(np.ceil(self.num_buckets / 2)) assert (approx_anchor_generator['octave_base_scale'] == square_anchor_generator['scales'][0]) assert (approx_anchor_generator['strides'] == square_anchor_generator['strides']) self.approx_anchor_generator = build_anchor_generator( approx_anchor_generator) self.square_anchor_generator = build_anchor_generator( square_anchor_generator) self.approxs_per_octave = ( self.approx_anchor_generator.num_base_anchors[0]) # one anchor per location self.num_anchors = 1 self.stacked_convs = stacked_convs self.conv_cfg = conv_cfg self.norm_cfg = norm_cfg self.reg_decoded_bbox = reg_decoded_bbox self.background_label = ( num_classes if background_label is None else background_label) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) self.sampling = loss_cls['type'] not in [ 'FocalLoss', 'GHMC', 'QualityFocalLoss' ] if self.use_sigmoid_cls: self.cls_out_channels = num_classes else: self.cls_out_channels = num_classes + 1 self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_cls = build_loss(loss_cls) self.loss_bbox_cls = build_loss(loss_bbox_cls) self.loss_bbox_reg = build_loss(loss_bbox_reg) self.train_cfg = train_cfg self.test_cfg = test_cfg if self.train_cfg: self.assigner = build_assigner(self.train_cfg.assigner) # use PseudoSampler when sampling is False if self.sampling and hasattr(self.train_cfg, 'sampler'): sampler_cfg = self.train_cfg.sampler else: sampler_cfg = dict(type='PseudoSampler') self.sampler = build_sampler(sampler_cfg, context=self) self.fp16_enabled = False self._init_layers()
def __init__(self, num_classes, suface_matching_cfg, line_matching_cfg, bbox_coder, train_cfg=None, test_cfg=None, gt_per_seed=1, num_proposal=256, feat_channels=(128, 128), primitive_feat_refine_streams=2, primitive_refine_channels=[128, 128, 128], upper_thresh=100.0, surface_thresh=0.5, line_thresh=0.5, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, semantic_loss=None, cues_objectness_loss=None, cues_semantic_loss=None, proposal_objectness_loss=None, primitive_center_loss=None): super(H3DBboxHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = gt_per_seed self.num_proposal = num_proposal self.with_angle = bbox_coder['with_rot'] self.upper_thresh = upper_thresh self.surface_thresh = surface_thresh self.line_thresh = line_thresh self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.dir_class_loss = build_loss(dir_class_loss) self.dir_res_loss = build_loss(dir_res_loss) self.size_class_loss = build_loss(size_class_loss) self.size_res_loss = build_loss(size_res_loss) self.semantic_loss = build_loss(semantic_loss) self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins self.cues_objectness_loss = build_loss(cues_objectness_loss) self.cues_semantic_loss = build_loss(cues_semantic_loss) self.proposal_objectness_loss = build_loss(proposal_objectness_loss) self.primitive_center_loss = build_loss(primitive_center_loss) assert suface_matching_cfg['mlp_channels'][-1] == \ line_matching_cfg['mlp_channels'][-1] # surface center matching self.surface_center_matcher = build_sa_module(suface_matching_cfg) # line center matching self.line_center_matcher = build_sa_module(line_matching_cfg) # Compute the matching scores matching_feat_dims = suface_matching_cfg['mlp_channels'][-1] self.matching_conv = ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True) self.matching_pred = nn.Conv1d(matching_feat_dims, 2, 1) # Compute the semantic matching scores self.semantic_matching_conv = ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True) self.semantic_matching_pred = nn.Conv1d(matching_feat_dims, 2, 1) # Surface feature aggregation self.surface_feats_aggregation = list() for k in range(primitive_feat_refine_streams): self.surface_feats_aggregation.append( ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) self.surface_feats_aggregation = nn.Sequential( *self.surface_feats_aggregation) # Line feature aggregation self.line_feats_aggregation = list() for k in range(primitive_feat_refine_streams): self.line_feats_aggregation.append( ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) self.line_feats_aggregation = nn.Sequential( *self.line_feats_aggregation) # surface center(6) + line center(12) prev_channel = 18 * matching_feat_dims self.bbox_pred = nn.ModuleList() for k in range(len(primitive_refine_channels)): self.bbox_pred.append( ConvModule(prev_channel, primitive_refine_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=False)) prev_channel = primitive_refine_channels[k] # Final object detection # Objectness scores (2), center residual (3), # heading class+residual (num_heading_bin*2), size class + # residual(num_size_cluster*4) conv_out_channel = (2 + 3 + bbox_coder['num_dir_bins'] * 2 + bbox_coder['num_sizes'] * 4 + self.num_classes) self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1))
def __init__(self, num_classes, in_channels, bbox_coder, num_decoder_layers, transformerlayers, decoder_self_posembeds=dict(type='ConvBNPositionalEncoding', input_channel=6, num_pos_feats=288), decoder_cross_posembeds=dict(type='ConvBNPositionalEncoding', input_channel=3, num_pos_feats=288), train_cfg=None, test_cfg=None, num_proposal=128, pred_layer_cfg=None, size_cls_agnostic=True, gt_per_seed=3, sampling_objectness_loss=None, objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, size_reg_loss=None, semantic_loss=None, init_cfg=None): super(GroupFree3DHead, self).__init__(init_cfg=init_cfg) self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.num_proposal = num_proposal self.in_channels = in_channels self.num_decoder_layers = num_decoder_layers self.size_cls_agnostic = size_cls_agnostic self.gt_per_seed = gt_per_seed # Transformer decoder layers if isinstance(transformerlayers, ConfigDict): transformerlayers = [ copy.deepcopy(transformerlayers) for _ in range(num_decoder_layers) ] else: assert isinstance(transformerlayers, list) and \ len(transformerlayers) == num_decoder_layers self.decoder_layers = nn.ModuleList() for i in range(self.num_decoder_layers): self.decoder_layers.append( build_transformer_layer(transformerlayers[i])) self.embed_dims = self.decoder_layers[0].embed_dims assert self.embed_dims == decoder_self_posembeds['num_pos_feats'] assert self.embed_dims == decoder_cross_posembeds['num_pos_feats'] # bbox_coder self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins # Initial object candidate sampling self.gsample_module = GeneralSamplingModule() self.fps_module = Points_Sampler([self.num_proposal]) self.points_obj_cls = PointsObjClsModule(self.in_channels) self.fp16_enabled = False # initial candidate prediction self.conv_pred = BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels()) # query proj and key proj self.decoder_query_proj = nn.Conv1d(self.embed_dims, self.embed_dims, kernel_size=1) self.decoder_key_proj = nn.Conv1d(self.embed_dims, self.embed_dims, kernel_size=1) # query position embed self.decoder_self_posembeds = nn.ModuleList() for _ in range(self.num_decoder_layers): self.decoder_self_posembeds.append( build_positional_encoding(decoder_self_posembeds)) # key position embed self.decoder_cross_posembeds = nn.ModuleList() for _ in range(self.num_decoder_layers): self.decoder_cross_posembeds.append( build_positional_encoding(decoder_cross_posembeds)) # Prediction Head self.prediction_heads = nn.ModuleList() for i in range(self.num_decoder_layers): self.prediction_heads.append( BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels())) self.sampling_objectness_loss = build_loss(sampling_objectness_loss) self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.dir_res_loss = build_loss(dir_res_loss) self.dir_class_loss = build_loss(dir_class_loss) self.semantic_loss = build_loss(semantic_loss) if self.size_cls_agnostic: self.size_reg_loss = build_loss(size_reg_loss) else: self.size_res_loss = build_loss(size_res_loss) self.size_class_loss = build_loss(size_class_loss)