Exemple #1
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 clip_border=True,
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0),
                 init_cfg=None):
        super(BBoxHead, self).__init__(init_cfg)
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = with_reg
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
            self.fc_reg = nn.Linear(in_channels, out_dim_reg)
        self.debug_imgs = None
        if init_cfg is None:
            self.init_cfg = []
            if self.with_cls:
                self.init_cfg += [
                    dict(type='Normal', std=0.01, override=dict(name='fc_cls'))
                ]
            if self.with_reg:
                self.init_cfg += [
                    dict(type='Normal',
                         std=0.001,
                         override=dict(name='fc_reg'))
                ]
Exemple #2
0
    def __init__(self,
                 num_classes=80,
                 in_channels=(512, 1024, 512, 256, 256, 256),
                 stacked_convs=0,
                 feat_channels=256,
                 use_depthwise=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 act_cfg=None,
                 anchor_generator=dict(
                     type='SSDAnchorGenerator',
                     scale_major=False,
                     input_size=300,
                     strides=[8, 16, 32, 64, 100, 300],
                     ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
                     basesize_ratio_range=(0.1, 0.9)),
                 bbox_coder=dict(
                     type='DeltaXYWHBBoxCoder',
                     clip_border=True,
                     target_means=[.0, .0, .0, .0],
                     target_stds=[1.0, 1.0, 1.0, 1.0],
                 ),
                 reg_decoded_bbox=False,
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=dict(
                     type='Xavier',
                     layer='Conv2d',
                     distribution='uniform',
                     bias=0)):
        super(AnchorHead, self).__init__(init_cfg)
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.stacked_convs = stacked_convs
        self.feat_channels = feat_channels
        self.use_depthwise = use_depthwise
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg

        self.cls_out_channels = num_classes + 1  # add background class
        self.anchor_generator = build_anchor_generator(anchor_generator)
        self.num_anchors = self.anchor_generator.num_base_anchors

        self._init_layers()

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.reg_decoded_bbox = reg_decoded_bbox
        self.use_sigmoid_cls = False
        self.cls_focal_loss = False
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        # set sampling=False for archor_target
        self.sampling = False
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # SSD sampling=False so use PseudoSampler
            sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.fp16_enabled = False
Exemple #3
0
    def loss_odm_single(self, odm_cls_score, odm_bbox_pred, anchors, labels,
                        label_weights, bbox_targets, bbox_weights,
                        num_total_samples, cfg):
        # classification loss
        labels = labels.reshape(-1)
        label_weights = label_weights.reshape(-1)
        odm_cls_score = odm_cls_score.permute(0, 2, 3, 1).reshape(
            -1, self.cls_out_channels)
        loss_odm_cls = self.loss_odm_cls(odm_cls_score,
                                         labels,
                                         label_weights,
                                         avg_factor=num_total_samples)
        # regression loss
        bbox_targets = bbox_targets.reshape(-1, 5)
        bbox_weights = bbox_weights.reshape(-1, 5)
        odm_bbox_pred = odm_bbox_pred.permute(0, 2, 3, 1).reshape(-1, 5)

        reg_decoded_bbox = cfg.get('reg_decoded_bbox', False)
        if reg_decoded_bbox:
            # When the regression loss (e.g. `IouLoss`, `GIouLoss`)
            # is applied directly on the decoded bounding boxes, it
            # decodes the already encoded coordinates to absolute format.
            bbox_coder_cfg = cfg.get('bbox_coder', '')
            if bbox_coder_cfg == '':
                bbox_coder_cfg = dict(type='DeltaXYWHBBoxCoder')
            bbox_coder = build_bbox_coder(bbox_coder_cfg)
            anchors = anchors.reshape(-1, 5)
            odm_bbox_pred = bbox_coder.decode(anchors, odm_bbox_pred)
        loss_odm_bbox = self.loss_odm_bbox(odm_bbox_pred,
                                           bbox_targets,
                                           bbox_weights,
                                           avg_factor=num_total_samples)
        return loss_odm_cls, loss_odm_bbox
Exemple #4
0
    def __init__(self,
                 num_classes,
                 bbox_coder,
                 train_cfg=None,
                 test_cfg=None,
                 pred_layer_cfg=None,
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d'),
                 dir_res_loss=None,
                 size_res_loss=None,
                 semantic_loss=None):
        super(BRBboxHead, self).__init__()
        self.num_classes = num_classes
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

        self.dir_res_loss = build_loss(dir_res_loss)
        self.size_res_loss = build_loss(size_res_loss)
        self.semantic_loss = build_loss(semantic_loss)

        self.bbox_coder = build_bbox_coder(bbox_coder)

        # Bbox classification and regression
        self.conv_pred = BaseConvBboxHead(
            **pred_layer_cfg,
            num_cls_out_channels=self._get_cls_out_channels(),
            num_reg_out_channels=self._get_reg_out_channels())
Exemple #5
0
def test_centerpoint_bbox_coder():
    bbox_coder_cfg = dict(
        type='CenterPointBBoxCoder',
        post_center_range=[-61.2, -61.2, -10.0, 61.2, 61.2, 10.0],
        max_num=500,
        score_threshold=0.1,
        pc_range=[-51.2, -51.2],
        out_size_factor=4,
        voxel_size=[0.2, 0.2])

    bbox_coder = build_bbox_coder(bbox_coder_cfg)

    batch_dim = torch.rand([2, 3, 128, 128])
    batch_hei = torch.rand([2, 1, 128, 128])
    batch_hm = torch.rand([2, 2, 128, 128])
    batch_reg = torch.rand([2, 2, 128, 128])
    batch_rotc = torch.rand([2, 1, 128, 128])
    batch_rots = torch.rand([2, 1, 128, 128])
    batch_vel = torch.rand([2, 2, 128, 128])

    temp = bbox_coder.decode(batch_hm, batch_rots, batch_rotc, batch_hei,
                             batch_dim, batch_vel, batch_reg, 5)
    for i in range(len(temp)):
        assert temp[i]['bboxes'].shape == torch.Size([500, 9])
        assert temp[i]['scores'].shape == torch.Size([500])
        assert temp[i]['labels'].shape == torch.Size([500])
    def __init__(self,
                 *args,
                 anchor_angles=[
                     0.,
                 ],
                 bbox_coder=dict(type='DeltaXYWHABBoxCoder',
                                 target_means=(.0, .0, .0, .0, .0),
                                 target_stds=(1.0, 1.0, 1.0, 1.0, 1.0)),
                 **kargs):
        super(AnchorHeadRotated, self).__init__(*args, **kargs)

        self.anchor_angles = anchor_angles
        self.reg_decoded_bbox = False
        self.use_vfl = True
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.anchor_generators = []
        for anchor_base in self.anchor_base_sizes:
            self.anchor_generators.append(
                AnchorGeneratorRotated(anchor_base,
                                       self.anchor_scales,
                                       self.anchor_ratios,
                                       angles=anchor_angles))

        self.num_anchors = len(self.anchor_ratios) * \
            len(self.anchor_scales) * len(self.anchor_angles)

        self._init_layers()
Exemple #7
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 feat_channels=256,
                 anchor_generator=dict(type='AnchorGenerator',
                                       scales=[8, 16, 32],
                                       ratios=[0.5, 1.0, 2.0],
                                       strides=[4, 8, 16, 32, 64]),
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 clip_border=True,
                                 target_means=(.0, .0, .0, .0),
                                 target_stds=(1.0, 1.0, 1.0, 1.0)),
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=True,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss',
                                beta=1.0 / 9.0,
                                loss_weight=1.0),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=dict(type='Normal', layer='Conv2d', std=0.01)):
        super(AnchorHead, self).__init__(init_cfg)
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.feat_channels = feat_channels
        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
        # TODO better way to determine whether sample or not
        self.sampling = loss_cls['type'] not in [
            'FocalLoss', 'GHMC', 'QualityFocalLoss'
        ]
        if self.use_sigmoid_cls:
            self.cls_out_channels = num_classes
        else:
            self.cls_out_channels = num_classes + 1

        if self.cls_out_channels <= 0:
            raise ValueError(f'num_classes={num_classes} is too small')
        self.reg_decoded_bbox = reg_decoded_bbox

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # use PseudoSampler when sampling is False
            if self.sampling and hasattr(self.train_cfg, 'sampler'):
                sampler_cfg = self.train_cfg.sampler
            else:
                sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.fp16_enabled = False

        self.anchor_generator = build_anchor_generator(anchor_generator)
        # usually the numbers of anchors for each level are the same
        # except SSD detectors
        self.num_anchors = self.anchor_generator.num_base_anchors[0]
        self._init_layers()
    def __init__(self,
                 num_classes=80,
                 in_channels=(512, 1024, 512, 256, 256, 256),
                 anchor_generator=dict(type='SSDAnchorGenerator',
                                       scale_major=False,
                                       input_size=300,
                                       strides=[8, 16, 32, 64, 100, 300],
                                       ratios=([2], [2,
                                                     3], [2,
                                                          3], [2,
                                                               3], [2], [2]),
                                       basesize_ratio_range=(0.1, 0.9)),
                 bbox_coder=dict(
                     type='DeltaXYWHBBoxCoder',
                     clip_border=True,
                     target_means=[.0, .0, .0, .0],
                     target_stds=[1.0, 1.0, 1.0, 1.0],
                 ),
                 reg_decoded_bbox=False,
                 train_cfg=None,
                 test_cfg=None):
        super(AnchorHead, self).__init__()
        self.num_classes = num_classes  # 类别数
        self.in_channels = in_channels  # 输入通道对照网络结构图
        self.cls_out_channels = num_classes + 1  # add background class,输出通道
        self.anchor_generator = build_anchor_generator(
            anchor_generator)  # 得到anchor_generator类
        num_anchors = self.anchor_generator.num_base_anchors

        reg_convs = []  # 回归卷积
        cls_convs = []  # 分类卷积
        for i in range(len(in_channels)):
            reg_convs.append(
                nn.Conv2d(in_channels[i],
                          num_anchors[i] * 4,
                          kernel_size=3,
                          padding=1))
            cls_convs.append(
                nn.Conv2d(in_channels[i],
                          num_anchors[i] * (num_classes + 1),
                          kernel_size=3,
                          padding=1))
        self.reg_convs = nn.ModuleList(reg_convs)
        self.cls_convs = nn.ModuleList(cls_convs)

        self.bbox_coder = build_bbox_coder(bbox_coder)  # 初始化bbox_coder对象
        self.reg_decoded_bbox = reg_decoded_bbox
        self.use_sigmoid_cls = False
        self.cls_focal_loss = False
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        # set sampling=False for archor_target
        self.sampling = False
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # SSD sampling=False so use PseudoSampler
            sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.fp16_enabled = False
    def __init__(self,
                 in_channels=[128],
                 tasks=None,
                 train_cfg=None,
                 test_cfg=None,
                 bbox_coder=None,
                 common_heads=dict(),
                 loss_cls=dict(type='GaussianFocalLoss', reduction='mean'),
                 loss_bbox=dict(type='L1Loss',
                                reduction='none',
                                loss_weight=0.25),
                 separate_head=dict(type='SeparateHead',
                                    init_bias=-2.19,
                                    final_kernel=3),
                 share_conv_channel=64,
                 num_heatmap_convs=2,
                 conv_cfg=dict(type='Conv2d'),
                 norm_cfg=dict(type='BN2d'),
                 bias='auto',
                 norm_bbox=True):
        super(CenterHead, self).__init__()

        num_classes = [len(t['class_names']) for t in tasks]
        self.class_names = [t['class_names'] for t in tasks]
        # Feng Xiang code
        # code begin
        # num_attr = [len(t['attr_names']) for t in tasks]
        # self.attr_names = [t['attr_names'] for t in tasks]
        # code end
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.norm_bbox = norm_bbox

        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.num_anchor_per_locs = [n for n in num_classes]
        self.fp16_enabled = False

        # a shared convolution
        self.shared_conv = ConvModule(in_channels,
                                      share_conv_channel,
                                      kernel_size=3,
                                      padding=1,
                                      conv_cfg=conv_cfg,
                                      norm_cfg=norm_cfg,
                                      bias=bias)

        self.task_heads = nn.ModuleList()

        for num_cls in num_classes:
            heads = copy.deepcopy(common_heads)
            heads.update(dict(heatmap=(num_cls, num_heatmap_convs)))
            separate_head.update(in_channels=share_conv_channel,
                                 heads=heads,
                                 num_cls=num_cls)
            self.task_heads.append(builder.build_head(separate_head))
    def __init__(
        self,
        with_avg_pool=False,
        num_shared_fcs=2,
        roi_feat_size=7,
        in_channels=256,
        fc_out_channels=1024,
        num_classes=15,
        reg_class_agnostic=False,
        ratio_thr=0.8,
        bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                        target_means=[0., 0., 0., 0.],
                        target_stds=[0.1, 0.1, 0.2, 0.2]),
        fix_coder=dict(type='GVFixCoder'),
        ratio_coder=dict(type='GVRatioCoder'),
        loss_cls=dict(type='CrossEntropyLoss',
                      use_sigmoid=False,
                      loss_weight=1.0),
        loss_bbox=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0),
        loss_fix=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=1.0),
        loss_ratio=dict(type='SmoothL1Loss', beta=1. / 3., loss_weight=16.0),
    ):
        super(GVBBoxHead, self).__init__()
        self.with_avg_pool = with_avg_pool
        self.num_shared_fcs = num_shared_fcs
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.fc_out_channels = fc_out_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.ratio_thr = ratio_thr
        self.fp16_enabled = False
        self.start_bbox_type = 'hbb'
        self.end_bbox_type = 'poly'

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.fix_coder = build_bbox_coder(fix_coder)
        self.ratio_coder = build_bbox_coder(ratio_coder)

        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.loss_fix = build_loss(loss_fix)
        self.loss_ratio = build_loss(loss_ratio)

        self._init_layers()
Exemple #11
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 feat_channels=256,
                 stacked_convs=4,
                 strides=(4, 8, 16, 32, 64),
                 dcn_on_last_conv=False,
                 conv_bias='auto',
                 loss_cls=dict(type='FocalLoss',
                               use_sigmoid=True,
                               gamma=2.0,
                               alpha=0.25,
                               loss_weight=1.0),
                 loss_bbox=dict(type='IoULoss', loss_weight=1.0),
                 bbox_coder=dict(type='DistancePointBBoxCoder'),
                 conv_cfg=None,
                 norm_cfg=None,
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=dict(type='Normal',
                               layer='Conv2d',
                               std=0.01,
                               override=dict(type='Normal',
                                             name='conv_cls',
                                             std=0.01,
                                             bias_prob=0.01))):
        super(AnchorFreeHead, self).__init__(init_cfg)
        self.num_classes = num_classes
        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
        if self.use_sigmoid_cls:
            self.cls_out_channels = num_classes
        else:
            self.cls_out_channels = num_classes + 1
        self.in_channels = in_channels
        self.feat_channels = feat_channels
        self.stacked_convs = stacked_convs
        self.strides = strides
        self.dcn_on_last_conv = dcn_on_last_conv
        assert conv_bias == 'auto' or isinstance(conv_bias, bool)
        self.conv_bias = conv_bias
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
        self.bbox_coder = build_bbox_coder(bbox_coder)

        self.prior_generator = MlvlPointGenerator(strides)

        # In order to keep a more general interface and be consistent with
        # anchor_head. We can think of point like one anchor
        self.num_base_priors = self.prior_generator.num_base_priors[0]

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.fp16_enabled = False

        self._init_layers()
Exemple #12
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 start_bbox_type='hbb',
                 end_bbox_type='hbb',
                 reg_dim=None,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=15,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0)):
        super(OBBoxHead, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = with_reg
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.start_bbox_type = start_bbox_type
        self.end_bbox_type = end_bbox_type
        assert self.start_bbox_type in ['hbb', 'obb', 'poly']
        assert self.end_bbox_type in ['hbb', 'obb', 'poly']
        self.reg_dim = get_bbox_dim(self.end_bbox_type) \
                if reg_dim is None else reg_dim

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = self.reg_dim if reg_class_agnostic else \
                    self.reg_dim * num_classes
            self.fc_reg = nn.Linear(in_channels, out_dim_reg)
        self.debug_imgs = None
Exemple #13
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 score_type='normal',
                 head_config=[True,False,False],
                 init_type='normal',
                 bbox_coder=dict(
                     type='DeltaXYWHBBoxCoder',
                     clip_border=True,
                     target_means=[0., 0., 0., 0.],
                     target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=False,
                     loss_weight=1.0),
                 loss_bbox=dict(
                     type='SmoothL1Loss', beta=1.0, loss_weight=1.0)):
        super(AttentionLogoBBoxHead, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = False
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        # 回归score的方式
        self.score_type = score_type
        self.head_config = head_config
        self.init_type = init_type
        
        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
        self.debug_imgs = None
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 clip_border=True,
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0)):
        super(LogoDCBBoxHead, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = False
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
        self.debug_imgs = None

        self.file_to_style = {}
        self.data_path = '/data/zhaozhiyuan/tb_variation/VOCdevkit_all'
        self.anno_path = os.path.join(self.data_path, 'VOC2007', 'Annotations')
        """init style and class index"""
        for anno in os.listdir(self.anno_path):
            anno_file = ET.parse(os.path.join(self.anno_path, anno))
            name = anno_file.find('object').find('name').text
            style = anno_file.find('object').find('style').text
            self.file_to_style[anno.split('.')[0]] = style
Exemple #15
0
    def __init__(self,
                 num_classes,
                 bbox_coder,
                 train_cfg=None,
                 test_cfg=None,
                 vote_module_cfg=None,
                 vote_aggregation_cfg=None,
                 pred_layer_cfg=None,
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d'),
                 objectness_loss=None,
                 center_loss=None,
                 center_loss_mse=None,
                 dir_class_loss=None,
                 dir_res_loss=None,
                 size_class_loss=None,
                 size_res_loss=None,
                 semantic_loss=None,
                 iou_loss=None):
        super(VoteHead, self).__init__()
        self.num_classes = num_classes
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.gt_per_seed = vote_module_cfg['gt_per_seed']
        self.num_proposal = vote_aggregation_cfg['num_point']

        self.objectness_loss = build_loss(objectness_loss)
        self.dir_res_loss = build_loss(dir_res_loss)
        self.dir_class_loss = build_loss(dir_class_loss)
        self.size_res_loss = build_loss(size_res_loss)
        if size_class_loss is not None:
            self.size_class_loss = build_loss(size_class_loss)
        if semantic_loss is not None:
            self.semantic_loss = build_loss(semantic_loss)
        if iou_loss is not None:
            self.iou_loss = build_loss(iou_loss)
        else:
            self.iou_loss = None
        if center_loss is not None:
            self.center_loss = build_loss(center_loss)
        if center_loss_mse is not None:
            self.center_loss_mse = build_loss(center_loss_mse)

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.num_sizes = self.bbox_coder.num_sizes
        self.num_dir_bins = self.bbox_coder.num_dir_bins

        self.vote_module = VoteModule(**vote_module_cfg)
        self.vote_aggregation = build_sa_module(vote_aggregation_cfg)
        self.fp16_enabled = False

        # Bbox classification and regression
        self.conv_pred = BaseConvBboxHead(
            **pred_layer_cfg,
            num_cls_out_channels=self._get_cls_out_channels(),
            num_reg_out_channels=self._get_reg_out_channels())
Exemple #16
0
 def __init__(self, loss_weight, box_coder=None):
     super(Boxes3dDecodeLoss, self).__init__()
     self.weight = loss_weight
     # assert loss_type in ["smooth_l1","l1","balanced_l1"],"loss type {} is not support".format(loss_type)
     # if loss_type=="smooth_l1":
     #     self.loss_fun=smooth_l1_loss
     # elif loss_type=="l1":
     #     self.loss_fun=F.l1_loss
     # elif loss_type=="balanced_l1":
     #     self.loss_fun=balanced_l1_loss
     # self.box_coder=box_coder
     self.box_coder = build_bbox_coder(box_coder)
Exemple #17
0
    def __init__(self,
                 roi_feat_size=7,
                 in_channels=256,
                 num_convs=4,
                 num_fcs=2,
                 reg_num=2,
                 conv_out_channels=256,
                 fc_out_channels=1024,
                 offset_coordinate='rectangle',
                 offset_coder=dict(
                    type='DeltaXYOffsetCoder',
                    target_means=[0.0, 0.0],
                    target_stds=[0.5, 0.5]),
                 reg_decoded_offset=False,
                 conv_cfg=None,
                 norm_cfg=None,
                 loss_offset=dict(type='MSELoss', loss_weight=1.0)):
        super(OffsetHead, self).__init__()
        self.in_channels = in_channels
        self.conv_out_channels = conv_out_channels
        self.fc_out_channels = fc_out_channels
        self.offset_coordinate = offset_coordinate
        self.reg_decoded_offset = reg_decoded_offset
        self.reg_num = reg_num
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        self.offset_coder = build_bbox_coder(offset_coder)
        self.loss_offset = build_loss(loss_offset)

        self.convs = nn.ModuleList()
        for i in range(num_convs):
            in_channels = (self.in_channels if i == 0 else self.conv_out_channels)
            self.convs.append(
                Conv2d(
                    in_channels,
                    self.conv_out_channels,
                    3,
                    padding=1))
    
        roi_feat_size = _pair(roi_feat_size)
        roi_feat_area = roi_feat_size[0] * roi_feat_size[1]
        self.fcs = nn.ModuleList()
        for i in range(num_fcs):
            in_channels = (
                self.conv_out_channels *
                roi_feat_area if i == 0 else self.fc_out_channels)
            self.fcs.append(nn.Linear(in_channels, self.fc_out_channels))

        self.fc_offset = nn.Linear(self.fc_out_channels, self.reg_num)
        self.relu = nn.ReLU()
        self.loss_offset = build_loss(loss_offset)
Exemple #18
0
 def __init__(self, C, in_channels, feat_channels=256,
              anchor_generator=dict(type='AnchorGenerator', scales=[8, 16, 32],
                                    ratios=[0.5, 1.0, 2.0], strides=[4, 8, 16, 32, 64]),
              bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=(.0, .0, .0, .0),
                              target_stds=(1.0, 1.0, 1.0, 1.0)),
              reg_decoded_bbox=False,
              background_label=None,
              FL=dict(type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
              SmoothL1=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.0),
              train_cfg=None,
              test_cfg=None):
     super(MIAODHead, self).__init__()
     if train_cfg is not None:
         self.param_lambda = train_cfg.param_lambda
     self.in_channels = in_channels
     self.C = C
     self.feat_channels = feat_channels
     self.use_sigmoid_cls = FL.get('use_sigmoid', False)
     # TODO better way to determine whether sample or not
     self.sampling = FL['type'] not in ['FocalLoss', 'GHMC', 'QualityFocalLoss']
     if self.use_sigmoid_cls:
         self.cls_out_channels = C
     else:
         self.cls_out_channels = C + 1
     if self.cls_out_channels <= 0:
         raise ValueError(f'C={C} is too small')
     self.reg_decoded_bbox = reg_decoded_bbox
     self.background_label = (C if background_label is None else background_label)
     # background_label should be either 0 or C
     assert (self.background_label == 0 or self.background_label == C)
     self.bbox_coder = build_bbox_coder(bbox_coder)
     self.FL = build_loss(FL)
     self.SmoothL1 = build_loss(SmoothL1)
     self.l_imgcls = nn.BCELoss()
     self.train_cfg = train_cfg
     self.test_cfg = test_cfg
     if self.train_cfg:
         self.assigner = build_assigner(self.train_cfg.assigner)
         # use PseudoSampler when sampling is False
         if self.sampling and hasattr(self.train_cfg, 'sampler'):
             sampler_cfg = self.train_cfg.sampler
         else:
             sampler_cfg = dict(type='PseudoSampler')
         self.sampler = build_sampler(sampler_cfg, context=self)
     self.fp16_enabled = False
     self.anchor_generator = build_anchor_generator(anchor_generator)
     # usually the numbers of anchors for each level are the same
     # except SSD detectors
     self.N = self.anchor_generator.num_base_anchors[0]
     self._init_layers()
Exemple #19
0
    def __init__(self, C=20, in_channels=(512, 1024, 512, 256, 256, 256),
                 anchor_generator=dict(type='SSDAnchorGenerator', scale_major=False, input_size=300,
                                       strides=[8, 16, 32, 64, 100, 300],
                                       ratios=([2], [2, 3], [2, 3], [2, 3], [2], [2]),
                                       basesize_ratio_range=(0.1, 0.9)),
                 background_label=20,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0],
                                 target_stds=[1.0, 1.0, 1.0, 1.0]),
                 reg_decoded_bbox=False, train_cfg=None, test_cfg=None):
        super(MIAODHead, self).__init__()
        if train_cfg is not None:
            self.param_lambda = train_cfg.param_lambda
        self.in_channels = in_channels
        self.C = C
        self.cls_out_channels = C + 1  # add background class
        self.anchor_generator = build_anchor_generator(anchor_generator)
        N = self.anchor_generator.num_base_anchors
        self.l_imgcls = nn.BCELoss()

        f_r_convs = []
        f_1_convs = []
        f_2_convs = []
        f_mil_convs = []
        for i in range(len(in_channels)):
            f_r_convs.append(nn.Conv2d(in_channels[i], N[i] * 4, kernel_size=3, padding=1))
            f_1_convs.append(nn.Conv2d(in_channels[i], N[i] * (C + 1), kernel_size=3, padding=1))
            f_2_convs.append(nn.Conv2d(in_channels[i], N[i] * (C + 1), kernel_size=3, padding=1))
            f_mil_convs.append(nn.Conv2d(in_channels[i], N[i] * (C + 1), kernel_size=3, padding=1))
        self.f_r_convs = nn.ModuleList(f_r_convs)
        self.f_1_convs = nn.ModuleList(f_1_convs)
        self.f_2_convs = nn.ModuleList(f_2_convs)
        self.f_mil_convs = nn.ModuleList(f_mil_convs)
        self.background_label = (C if background_label is None else background_label)
        # background_label should be either 0 or C
        assert (self.background_label == 0 or self.background_label == C)
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.reg_decoded_bbox = reg_decoded_bbox
        self.use_sigmoid_cls = False
        self.cls_focal_loss = False
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        # set sampling=False for archor_target
        self.sampling = False
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # SSD sampling=False so use PseudoSampler
            sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.fp16_enabled = False
Exemple #20
0
    def __init__(self,
                 with_avg_pool=False,
                 with_cls=True,
                 with_reg=True,
                 roi_feat_size=7,
                 in_channels=256,
                 num_classes=80,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[0.1, 0.1, 0.2, 0.2]),
                 reg_class_agnostic=False,
                 reg_decoded_bbox=False,
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=False,
                               loss_weight=1.0),
                 loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                                loss_weight=1.0)):
        super(BBoxHeadBN, self).__init__()
        assert with_cls or with_reg
        self.with_avg_pool = with_avg_pool
        self.with_cls = with_cls
        self.with_reg = with_reg
        self.roi_feat_size = _pair(roi_feat_size)
        self.roi_feat_area = self.roi_feat_size[0] * self.roi_feat_size[1]
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.reg_class_agnostic = reg_class_agnostic
        self.reg_decoded_bbox = reg_decoded_bbox
        self.fp16_enabled = False

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        in_channels = self.in_channels
        if self.with_avg_pool:
            self.avg_pool = nn.AvgPool2d(self.roi_feat_size)
        else:
            in_channels *= self.roi_feat_area
        if self.with_cls:
            # need to add background class
            self.fc_cls = nn.Linear(in_channels, num_classes + 1)
        if self.with_reg:
            out_dim_reg = 4 if reg_class_agnostic else 4 * num_classes
            # self.fc_reg = nn.Linear(in_channels, out_dim_reg)
            self.fc_reg = nn.Sequential(
                nn.Linear(self.in_channels, out_dim_reg), nn.BatchNorm1d(4))
        self.debug_imgs = None
    def __init__(self,
                 anchor_generator,
                 in_channels,
                 kernel_size=3,
                 norm_cfg=dict(type='BN'),
                 weighted_sum=False,
                 bbox_coder=dict(type='DeltaXYWHBBoxCoder',
                                 target_means=[0., 0., 0., 0.],
                                 target_stds=[1., 1., 1., 1.]),
                 loss_cls=dict(type='CrossEntropyLoss',
                               reduction='sum',
                               loss_weight=1.0),
                 loss_bbox=dict(type='L1Loss',
                                reduction='sum',
                                loss_weight=1.2),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=None,
                 *args,
                 **kwargs):
        super(SiameseRPNHead, self).__init__(init_cfg)
        self.anchor_generator = build_prior_generator(anchor_generator)
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.assigner = build_assigner(self.train_cfg.assigner)
        self.sampler = build_sampler(self.train_cfg.sampler)
        self.fp16_enabled = False

        self.cls_heads = nn.ModuleList()
        self.reg_heads = nn.ModuleList()
        for i in range(len(in_channels)):
            self.cls_heads.append(
                CorrelationHead(in_channels[i], in_channels[i],
                                2 * self.anchor_generator.num_base_anchors[0],
                                kernel_size, norm_cfg))
            self.reg_heads.append(
                CorrelationHead(in_channels[i], in_channels[i],
                                4 * self.anchor_generator.num_base_anchors[0],
                                kernel_size, norm_cfg))

        self.weighted_sum = weighted_sum
        if self.weighted_sum:
            self.cls_weight = nn.Parameter(torch.ones(len(in_channels)))
            self.reg_weight = nn.Parameter(torch.ones(len(in_channels)))

        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)
    def __init__(self,
                 num_classes,
                 cls_in_channels=256,
                 reg_in_channels=256,
                 roi_feat_size=7,
                 reg_feat_up_ratio=2,
                 reg_pre_kernel=3,
                 reg_post_kernel=3,
                 reg_pre_num=2,
                 reg_post_num=1,
                 cls_out_channels=1024,
                 reg_offset_out_channels=256,
                 reg_cls_out_channels=256,
                 num_cls_fcs=1,
                 num_reg_fcs=0,
                 reg_class_agnostic=True,
                 norm_cfg=None,
                 bbox_coder=dict(
                     type='BucketingBBoxCoder',
                     num_buckets=14,
                     scale_factor=1.7),
                 loss_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=False,
                     loss_weight=1.0),
                 loss_bbox_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=True,
                     loss_weight=1.0),
                 loss_bbox_reg=dict(
                     type='SmoothL1Loss', beta=0.1, loss_weight=1.0)):
        super(SABLHead, self).__init__()
        self.cls_in_channels = cls_in_channels
        self.reg_in_channels = reg_in_channels
        self.roi_feat_size = roi_feat_size
        self.reg_feat_up_ratio = int(reg_feat_up_ratio)
        self.num_buckets = bbox_coder['num_buckets']
        assert self.reg_feat_up_ratio // 2 >= 1
        self.up_reg_feat_size = roi_feat_size * self.reg_feat_up_ratio
        assert self.up_reg_feat_size == bbox_coder['num_buckets']
        self.reg_pre_kernel = reg_pre_kernel
        self.reg_post_kernel = reg_post_kernel
        self.reg_pre_num = reg_pre_num
        self.reg_post_num = reg_post_num
        self.num_classes = num_classes
        self.cls_out_channels = cls_out_channels
        self.reg_offset_out_channels = reg_offset_out_channels
        self.reg_cls_out_channels = reg_cls_out_channels
        self.num_cls_fcs = num_cls_fcs
        self.num_reg_fcs = num_reg_fcs
        self.reg_class_agnostic = reg_class_agnostic
        assert self.reg_class_agnostic
        self.norm_cfg = norm_cfg

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox_cls = build_loss(loss_bbox_cls)
        self.loss_bbox_reg = build_loss(loss_bbox_reg)

        self.cls_fcs = self._add_fc_branch(self.num_cls_fcs,
                                           self.cls_in_channels,
                                           self.roi_feat_size,
                                           self.cls_out_channels)

        self.side_num = int(np.ceil(self.num_buckets / 2))

        if self.reg_feat_up_ratio > 1:
            self.upsample_x = nn.ConvTranspose1d(
                reg_in_channels,
                reg_in_channels,
                self.reg_feat_up_ratio,
                stride=self.reg_feat_up_ratio)
            self.upsample_y = nn.ConvTranspose1d(
                reg_in_channels,
                reg_in_channels,
                self.reg_feat_up_ratio,
                stride=self.reg_feat_up_ratio)

        self.reg_pre_convs = nn.ModuleList()
        for i in range(self.reg_pre_num):
            reg_pre_conv = ConvModule(
                reg_in_channels,
                reg_in_channels,
                kernel_size=reg_pre_kernel,
                padding=reg_pre_kernel // 2,
                norm_cfg=norm_cfg,
                act_cfg=dict(type='ReLU'))
            self.reg_pre_convs.append(reg_pre_conv)

        self.reg_post_conv_xs = nn.ModuleList()
        for i in range(self.reg_post_num):
            reg_post_conv_x = ConvModule(
                reg_in_channels,
                reg_in_channels,
                kernel_size=(1, reg_post_kernel),
                padding=(0, reg_post_kernel // 2),
                norm_cfg=norm_cfg,
                act_cfg=dict(type='ReLU'))
            self.reg_post_conv_xs.append(reg_post_conv_x)
        self.reg_post_conv_ys = nn.ModuleList()
        for i in range(self.reg_post_num):
            reg_post_conv_y = ConvModule(
                reg_in_channels,
                reg_in_channels,
                kernel_size=(reg_post_kernel, 1),
                padding=(reg_post_kernel // 2, 0),
                norm_cfg=norm_cfg,
                act_cfg=dict(type='ReLU'))
            self.reg_post_conv_ys.append(reg_post_conv_y)

        self.reg_conv_att_x = nn.Conv2d(reg_in_channels, 1, 1)
        self.reg_conv_att_y = nn.Conv2d(reg_in_channels, 1, 1)

        self.fc_cls = nn.Linear(self.cls_out_channels, self.num_classes + 1)
        self.relu = nn.ReLU(inplace=True)

        self.reg_cls_fcs = self._add_fc_branch(self.num_reg_fcs,
                                               self.reg_in_channels, 1,
                                               self.reg_cls_out_channels)
        self.reg_offset_fcs = self._add_fc_branch(self.num_reg_fcs,
                                                  self.reg_in_channels, 1,
                                                  self.reg_offset_out_channels)
        self.fc_reg_cls = nn.Linear(self.reg_cls_out_channels, 1)
        self.fc_reg_offset = nn.Linear(self.reg_offset_out_channels, 1)
    def __init__(self,
                 num_classes,
                 seg_in_channels,
                 part_in_channels,
                 seg_conv_channels=None,
                 part_conv_channels=None,
                 merge_conv_channels=None,
                 down_conv_channels=None,
                 shared_fc_channels=None,
                 cls_channels=None,
                 reg_channels=None,
                 dropout_ratio=0.1,
                 roi_feat_size=14,
                 with_corner_loss=True,
                 bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'),
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01),
                 loss_bbox=dict(type='SmoothL1Loss',
                                beta=1.0 / 9.0,
                                loss_weight=2.0),
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=True,
                               reduction='none',
                               loss_weight=1.0)):
        super(PartA2BboxHead, self).__init__()
        self.num_classes = num_classes
        self.with_corner_loss = with_corner_loss
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_bbox = build_loss(loss_bbox)
        self.loss_cls = build_loss(loss_cls)
        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)

        assert down_conv_channels[-1] == shared_fc_channels[0]

        # init layers
        part_channel_last = part_in_channels
        part_conv = []
        for i, channel in enumerate(part_conv_channels):
            part_conv.append(
                make_sparse_convmodule(part_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key=f'rcnn_part{i}',
                                       conv_type='SubMConv3d'))
            part_channel_last = channel
        self.part_conv = spconv.SparseSequential(*part_conv)

        seg_channel_last = seg_in_channels
        seg_conv = []
        for i, channel in enumerate(seg_conv_channels):
            seg_conv.append(
                make_sparse_convmodule(seg_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key=f'rcnn_seg{i}',
                                       conv_type='SubMConv3d'))
            seg_channel_last = channel
        self.seg_conv = spconv.SparseSequential(*seg_conv)

        self.conv_down = spconv.SparseSequential()

        merge_conv_channel_last = part_channel_last + seg_channel_last
        merge_conv = []
        for i, channel in enumerate(merge_conv_channels):
            merge_conv.append(
                make_sparse_convmodule(merge_conv_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key='rcnn_down0'))
            merge_conv_channel_last = channel

        down_conv_channel_last = merge_conv_channel_last
        conv_down = []
        for i, channel in enumerate(down_conv_channels):
            conv_down.append(
                make_sparse_convmodule(down_conv_channel_last,
                                       channel,
                                       3,
                                       padding=1,
                                       norm_cfg=norm_cfg,
                                       indice_key='rcnn_down1'))
            down_conv_channel_last = channel

        self.conv_down.add_module('merge_conv',
                                  spconv.SparseSequential(*merge_conv))
        self.conv_down.add_module(
            'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2))
        self.conv_down.add_module('down_conv',
                                  spconv.SparseSequential(*conv_down))

        shared_fc_list = []
        pool_size = roi_feat_size // 2
        pre_channel = shared_fc_channels[0] * pool_size**3
        for k in range(1, len(shared_fc_channels)):
            shared_fc_list.append(
                ConvModule(pre_channel,
                           shared_fc_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           inplace=True))
            pre_channel = shared_fc_channels[k]

            if k != len(shared_fc_channels) - 1 and dropout_ratio > 0:
                shared_fc_list.append(nn.Dropout(dropout_ratio))

        self.shared_fc = nn.Sequential(*shared_fc_list)

        # Classification layer
        channel_in = shared_fc_channels[-1]
        cls_channel = 1
        cls_layers = []
        pre_channel = channel_in
        for k in range(0, len(cls_channels)):
            cls_layers.append(
                ConvModule(pre_channel,
                           cls_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           inplace=True))
            pre_channel = cls_channels[k]
        cls_layers.append(
            ConvModule(pre_channel,
                       cls_channel,
                       1,
                       padding=0,
                       conv_cfg=conv_cfg,
                       act_cfg=None))
        if dropout_ratio >= 0:
            cls_layers.insert(1, nn.Dropout(dropout_ratio))

        self.conv_cls = nn.Sequential(*cls_layers)

        # Regression layer
        reg_layers = []
        pre_channel = channel_in
        for k in range(0, len(reg_channels)):
            reg_layers.append(
                ConvModule(pre_channel,
                           reg_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           inplace=True))
            pre_channel = reg_channels[k]
        reg_layers.append(
            ConvModule(pre_channel,
                       self.bbox_coder.code_size,
                       1,
                       padding=0,
                       conv_cfg=conv_cfg,
                       act_cfg=None))
        if dropout_ratio >= 0:
            reg_layers.insert(1, nn.Dropout(dropout_ratio))

        self.conv_reg = nn.Sequential(*reg_layers)

        self.init_weights()
    def __init__(
            self,
            num_classes,
            in_channels,
            feat_channels=256,
            approx_anchor_generator=dict(
                type='AnchorGenerator',
                octave_base_scale=8,
                scales_per_octave=3,
                ratios=[0.5, 1.0, 2.0],
                strides=[4, 8, 16, 32, 64]),
            square_anchor_generator=dict(
                type='AnchorGenerator',
                ratios=[1.0],
                scales=[8],
                strides=[4, 8, 16, 32, 64]),
            anchor_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[.0, .0, .0, .0],
                target_stds=[1.0, 1.0, 1.0, 1.0]
            ),
            bbox_coder=dict(
                type='DeltaXYWHBBoxCoder',
                target_means=[.0, .0, .0, .0],
                target_stds=[1.0, 1.0, 1.0, 1.0]
            ),
            reg_decoded_bbox=False,
            deform_groups=4,
            loc_filter_thr=0.01,
            train_cfg=None,
            test_cfg=None,
            loss_loc=dict(
                type='FocalLoss',
                use_sigmoid=True,
                gamma=2.0,
                alpha=0.25,
                loss_weight=1.0),
            loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0),
            loss_cls=dict(
                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0),
            loss_bbox=dict(type='SmoothL1Loss', beta=1.0,
                           loss_weight=1.0),
            init_cfg=dict(type='Normal', layer='Conv2d', std=0.01,
                          override=dict(type='Normal',
                                        name='conv_loc',
                                        std=0.01,
                                        bias_prob=0.01))):  # yapf: disable
        super(AnchorHead, self).__init__(init_cfg)
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.feat_channels = feat_channels
        self.deform_groups = deform_groups
        self.loc_filter_thr = loc_filter_thr

        # build approx_anchor_generator and square_anchor_generator
        assert (approx_anchor_generator['octave_base_scale'] ==
                square_anchor_generator['scales'][0])
        assert (approx_anchor_generator['strides'] ==
                square_anchor_generator['strides'])
        self.approx_anchor_generator = build_prior_generator(
            approx_anchor_generator)
        self.square_anchor_generator = build_prior_generator(
            square_anchor_generator)
        self.approxs_per_octave = self.approx_anchor_generator \
            .num_base_priors[0]

        self.reg_decoded_bbox = reg_decoded_bbox

        # one anchor per location
        self.num_base_priors = self.square_anchor_generator.num_base_priors[0]

        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
        self.loc_focal_loss = loss_loc['type'] in ['FocalLoss']
        self.sampling = loss_cls['type'] not in ['FocalLoss']
        self.ga_sampling = train_cfg is not None and hasattr(
            train_cfg, 'ga_sampler')
        if self.use_sigmoid_cls:
            self.cls_out_channels = self.num_classes
        else:
            self.cls_out_channels = self.num_classes + 1

        # build bbox_coder
        self.anchor_coder = build_bbox_coder(anchor_coder)
        self.bbox_coder = build_bbox_coder(bbox_coder)

        # build losses
        self.loss_loc = build_loss(loss_loc)
        self.loss_shape = build_loss(loss_shape)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox = build_loss(loss_bbox)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # use PseudoSampler when sampling is False
            if self.sampling and hasattr(self.train_cfg, 'sampler'):
                sampler_cfg = self.train_cfg.sampler
            else:
                sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)

            self.ga_assigner = build_assigner(self.train_cfg.ga_assigner)
            if self.ga_sampling:
                ga_sampler_cfg = self.train_cfg.ga_sampler
            else:
                ga_sampler_cfg = dict(type='PseudoSampler')
            self.ga_sampler = build_sampler(ga_sampler_cfg, context=self)

        self.fp16_enabled = False

        self._init_layers()
def test_partial_bin_based_box_coder():
    box_coder_cfg = dict(type='PartialBinBasedBBoxCoder',
                         num_sizes=10,
                         num_dir_bins=12,
                         with_rot=True,
                         mean_sizes=[[2.114256, 1.620300, 0.927272],
                                     [0.791118, 1.279516, 0.718182],
                                     [0.923508, 1.867419, 0.845495],
                                     [0.591958, 0.552978, 0.827272],
                                     [0.699104, 0.454178, 0.75625],
                                     [0.69519, 1.346299, 0.736364],
                                     [0.528526, 1.002642, 1.172878],
                                     [0.500618, 0.632163, 0.683424],
                                     [0.404671, 1.071108, 1.688889],
                                     [0.76584, 1.398258, 0.472728]])
    box_coder = build_bbox_coder(box_coder_cfg)

    # test eocode
    gt_bboxes = DepthInstance3DBoxes(
        [[0.8308, 4.1168, -1.2035, 2.2493, 1.8444, 1.9245, 1.6486],
         [2.3002, 4.8149, -1.2442, 0.5718, 0.8629, 0.9510, 1.6030],
         [-1.1477, 1.8090, -1.1725, 0.6965, 1.5273, 2.0563, 0.0552]])

    gt_labels = torch.tensor([0, 1, 2])
    center_target, size_class_target, size_res_target, dir_class_target, \
        dir_res_target = box_coder.encode(gt_bboxes, gt_labels)
    expected_center_target = torch.tensor([[0.8308, 4.1168, -0.2413],
                                           [2.3002, 4.8149, -0.7687],
                                           [-1.1477, 1.8090, -0.1444]])
    expected_size_class_target = torch.tensor([0, 1, 2])
    expected_size_res_target = torch.tensor([[0.1350, 0.2241, 0.9972],
                                             [-0.2193, -0.4166, 0.2328],
                                             [-0.2270, -0.3401, 1.2108]])
    expected_dir_class_target = torch.tensor([3, 3, 0])
    expected_dir_res_target = torch.tensor([0.0778, 0.0322, 0.0552])
    assert torch.allclose(center_target, expected_center_target, atol=1e-4)
    assert torch.all(size_class_target == expected_size_class_target)
    assert torch.allclose(size_res_target, expected_size_res_target, atol=1e-4)
    assert torch.all(dir_class_target == expected_dir_class_target)
    assert torch.allclose(dir_res_target, expected_dir_res_target, atol=1e-4)

    # test decode
    center = torch.tensor([[[0.8014, 3.4134,
                             -0.6133], [2.6375, 8.4191, 2.0438],
                            [4.2017, 5.2504,
                             -0.7851], [-1.0088, 5.4107, 1.6293],
                            [1.4837, 4.0268, 0.6222]]])

    size_class = torch.tensor([[[
        -1.0061, -2.2788, 1.1322, -4.4380, -11.0526, -2.8113, -2.0642, -7.5886,
        -4.8627, -5.0437
    ],
                                [
                                    -2.2058, -0.3527, -1.9976, 0.8815, -2.7980,
                                    -1.9053, -0.5097, -2.0232, -1.4242, -4.1192
                                ],
                                [
                                    -1.4783, -0.1009, -1.1537, 0.3052, -4.3147,
                                    -2.6529, 0.2729, -0.3755, -2.6479, -3.7548
                                ],
                                [
                                    -6.1809, -3.5024, -8.3273, 1.1252, -4.3315,
                                    -7.8288, -4.6091, -5.8153, 0.7480, -10.1396
                                ],
                                [
                                    -9.0424, -3.7883, -6.0788, -1.8855,
                                    -10.2493, -9.7164, -1.0658, -4.1713,
                                    1.1173, -10.6204
                                ]]])

    size_res = torch.tensor([[[[-9.8976e-02, -5.2152e-01, -7.6421e-02],
                               [1.4593e-01, 5.6099e-01, 8.9421e-02],
                               [5.1481e-02, 3.9280e-01, 1.2705e-01],
                               [3.6869e-01, 7.0558e-01, 1.4647e-01],
                               [4.7683e-01, 3.3644e-01, 2.3481e-01],
                               [8.7346e-02, 8.4987e-01, 3.3265e-01],
                               [2.1393e-01, 8.5585e-01, 9.8948e-02],
                               [7.8530e-02, 5.9694e-02, -8.7211e-02],
                               [1.8551e-01, 1.1308e+00, -5.1864e-01],
                               [3.6485e-01, 7.3757e-01, 1.5264e-01]],
                              [[-9.5593e-01, -5.0455e-01, 1.9554e-01],
                               [-1.0870e-01, 1.8025e-01, 1.0228e-01],
                               [-8.2882e-02, -4.3771e-01, 9.2135e-02],
                               [-4.0840e-02, -5.9841e-02, 1.1982e-01],
                               [7.3448e-02, 5.2045e-02, 1.7301e-01],
                               [-4.0440e-02, 4.9532e-02, 1.1266e-01],
                               [3.5857e-02, 1.3564e-02, 1.0212e-01],
                               [-1.0407e-01, -5.9321e-02, 9.2622e-02],
                               [7.4691e-03, 9.3080e-02, -4.4077e-01],
                               [-6.0121e-02, -1.3381e-01, -6.8083e-02]],
                              [[-9.3970e-01, -9.7823e-01, -5.1075e-02],
                               [-1.2843e-01, -1.8381e-01, 7.1327e-02],
                               [-1.2247e-01, -8.1115e-01, 3.6495e-02],
                               [4.9154e-02, -4.5440e-02, 8.9520e-02],
                               [1.5653e-01, 3.5990e-02, 1.6414e-01],
                               [-5.9621e-02, 4.9357e-03, 1.4264e-01],
                               [8.5235e-04, -1.0030e-01, -3.0712e-02],
                               [-3.7255e-02, 2.8996e-02, 5.5545e-02],
                               [3.9298e-02, -4.7420e-02, -4.9147e-01],
                               [-1.1548e-01, -1.5895e-01, -3.9155e-02]],
                              [[-1.8725e+00, -7.4102e-01, 1.0524e+00],
                               [-3.3210e-01, 4.7828e-02, -3.2666e-02],
                               [-2.7949e-01, 5.5541e-02, -1.0059e-01],
                               [-8.5533e-02, 1.4870e-01, -1.6709e-01],
                               [3.8283e-01, 2.6609e-01, 2.1361e-01],
                               [-4.2156e-01, 3.2455e-01, 6.7309e-01],
                               [-2.4336e-02, -8.3366e-02, 3.9913e-01],
                               [8.2142e-03, 4.8323e-02, -1.5247e-01],
                               [-4.8142e-02, -3.0074e-01, -1.6829e-01],
                               [1.3274e-01, -2.3825e-01, -1.8127e-01]],
                              [[-1.2576e+00, -6.1550e-01, 7.9430e-01],
                               [-4.7222e-01, 1.5634e+00, -5.9460e-02],
                               [-3.5367e-01, 1.3616e+00, -1.6421e-01],
                               [-1.6611e-02, 2.4231e-01, -9.6188e-02],
                               [5.4486e-01, 4.6833e-01, 5.1151e-01],
                               [-6.1755e-01, 1.0292e+00, 1.2458e+00],
                               [-6.8152e-02, 2.4786e-01, 9.5088e-01],
                               [-4.8745e-02, 1.5134e-01, -9.9962e-02],
                               [2.4485e-03, -7.5991e-02, 1.3545e-01],
                               [4.1608e-01, -1.2093e-01, -3.1643e-01]]]])

    dir_class = torch.tensor([[[
        -1.0230, -5.1965, -5.2195, 2.4030, -2.7661, -7.3399, -1.1640, -4.0630,
        -5.2940, 0.8245, -3.1869, -6.1743
    ],
                               [
                                   -1.9503, -1.6940, -0.8716, -1.1494, -0.8196,
                                   0.2862, -0.2921, -0.7894, -0.2481, -0.9916,
                                   -1.4304, -1.2466
                               ],
                               [
                                   -1.7435, -1.2043, -0.1265, 0.5083, -0.0717,
                                   -0.9560, -1.6171, -2.6463, -2.3863, -2.1358,
                                   -1.8812, -2.3117
                               ],
                               [
                                   -1.9282, 0.3792, -1.8426, -1.4587, -0.8582,
                                   -3.4639, -3.2133, -3.7867, -7.6781, -6.4459,
                                   -6.2455, -5.4797
                               ],
                               [
                                   -3.1869, 0.4456, -0.5824, 0.9994, -1.0554,
                                   -8.4232, -7.7019, -7.1382, -10.2724,
                                   -7.8229, -8.1860, -8.6194
                               ]]])

    dir_res = torch.tensor(
        [[[
            1.1022e-01, -2.3750e-01, 2.0381e-01, 1.2177e-01, -2.8501e-01,
            1.5351e-01, 1.2218e-01, -2.0677e-01, 1.4468e-01, 1.1593e-01,
            -2.6864e-01, 1.1290e-01
        ],
          [
              -1.5788e-02, 4.1538e-02, -2.2857e-04, -1.4011e-02, 4.2560e-02,
              -3.1186e-03, -5.0343e-02, 6.8110e-03, -2.6728e-02, -3.2781e-02,
              3.6889e-02, -1.5609e-03
          ],
          [
              1.9004e-02, 5.7105e-03, 6.0329e-02, 1.3074e-02, -2.5546e-02,
              -1.1456e-02, -3.2484e-02, -3.3487e-02, 1.6609e-03, 1.7095e-02,
              1.2647e-05, 2.4814e-02
          ],
          [
              1.4482e-01, -6.3083e-02, 5.8307e-02, 9.1396e-02, -8.4571e-02,
              4.5890e-02, 5.6243e-02, -1.2448e-01, -9.5244e-02, 4.5746e-02,
              -1.7390e-02, 9.0267e-02
          ],
          [
              1.8065e-01, -2.0078e-02, 8.5401e-02, 1.0784e-01, -1.2495e-01,
              2.2796e-02, 1.1310e-01, -8.4364e-02, -1.1904e-01, 6.1180e-02,
              -1.8109e-02, 1.1229e-01
          ]]])
    bbox_out = dict(center=center,
                    size_class=size_class,
                    size_res=size_res,
                    dir_class=dir_class,
                    dir_res=dir_res)

    bbox3d = box_coder.decode(bbox_out)
    expected_bbox3d = torch.tensor(
        [[[0.8014, 3.4134, -0.6133, 0.9750, 2.2602, 0.9725, 1.6926],
          [2.6375, 8.4191, 2.0438, 0.5511, 0.4931, 0.9471, 2.6149],
          [4.2017, 5.2504, -0.7851, 0.6411, 0.5075, 0.9168, 1.5839],
          [-1.0088, 5.4107, 1.6293, 0.5064, 0.7017, 0.6602, 0.4605],
          [1.4837, 4.0268, 0.6222, 0.4071, 0.9951, 1.8243, 1.6786]]])
    assert torch.allclose(bbox3d, expected_bbox3d, atol=1e-4)

    # test split_pred
    box_preds = torch.rand(2, 79, 256)
    base_xyz = torch.rand(2, 256, 3)
    results = box_coder.split_pred(box_preds, base_xyz)
    obj_scores = results['obj_scores']
    center = results['center']
    dir_class = results['dir_class']
    dir_res_norm = results['dir_res_norm']
    dir_res = results['dir_res']
    size_class = results['size_class']
    size_res_norm = results['size_res_norm']
    size_res = results['size_res']
    sem_scores = results['sem_scores']
    assert obj_scores.shape == torch.Size([2, 256, 2])
    assert center.shape == torch.Size([2, 256, 3])
    assert dir_class.shape == torch.Size([2, 256, 12])
    assert dir_res_norm.shape == torch.Size([2, 256, 12])
    assert dir_res.shape == torch.Size([2, 256, 12])
    assert size_class.shape == torch.Size([2, 256, 10])
    assert size_res_norm.shape == torch.Size([2, 256, 10, 3])
    assert size_res.shape == torch.Size([2, 256, 10, 3])
    assert sem_scores.shape == torch.Size([2, 256, 10])
Exemple #26
0
    def __init__(self,
                 num_classes=80,
                 in_channels=(512, 1024, 512, 256, 256, 256),
                 anchor_generator=dict(type='SSDAnchorGenerator',
                                       scale_major=False,
                                       input_size=300,
                                       strides=[8, 16, 32, 64, 100, 300],
                                       ratios=([2], [2,
                                                     3], [2,
                                                          3], [2,
                                                               3], [2], [2]),
                                       basesize_ratio_range=(0.1, 0.9)),
                 background_label=None,
                 bbox_coder=dict(
                     type='DeltaXYWHBBoxCoder',
                     target_means=[.0, .0, .0, .0],
                     target_stds=[1.0, 1.0, 1.0, 1.0],
                 ),
                 reg_decoded_bbox=False,
                 depthwise_heads=False,
                 depthwise_heads_activations='relu6',
                 loss_balancing=False,
                 train_cfg=None,
                 test_cfg=None):
        super(AnchorHead, self).__init__()
        self.num_classes = num_classes
        self.in_channels = in_channels
        self.cls_out_channels = num_classes + 1  # add background class
        self.anchor_generator = build_anchor_generator(anchor_generator)
        num_anchors = self.anchor_generator.num_base_anchors

        reg_convs = []
        cls_convs = []
        for i in range(len(in_channels)):
            if depthwise_heads:
                activation_class = {
                    'relu': nn.ReLU,
                    'relu6': nn.ReLU6,
                }[depthwise_heads_activations]

                reg_convs.append(
                    nn.Sequential(
                        nn.Conv2d(in_channels[i],
                                  in_channels[i],
                                  kernel_size=3,
                                  padding=1,
                                  groups=in_channels[i]),
                        nn.BatchNorm2d(in_channels[i]),
                        activation_class(inplace=True),
                        nn.Conv2d(in_channels[i],
                                  num_anchors[i] * 4,
                                  kernel_size=1,
                                  padding=0)))
                cls_convs.append(
                    nn.Sequential(
                        nn.Conv2d(in_channels[i],
                                  in_channels[i],
                                  kernel_size=3,
                                  padding=1,
                                  groups=in_channels[i]),
                        nn.BatchNorm2d(in_channels[i]),
                        activation_class(inplace=True),
                        nn.Conv2d(in_channels[i],
                                  num_anchors[i] * (num_classes + 1),
                                  kernel_size=1,
                                  padding=0)))
            else:
                reg_convs.append(
                    nn.Conv2d(in_channels[i],
                              num_anchors[i] * 4,
                              kernel_size=3,
                              padding=1))
                cls_convs.append(
                    nn.Conv2d(in_channels[i],
                              num_anchors[i] * (num_classes + 1),
                              kernel_size=3,
                              padding=1))
        self.reg_convs = nn.ModuleList(reg_convs)
        self.cls_convs = nn.ModuleList(cls_convs)

        self.background_label = (num_classes if background_label is None else
                                 background_label)
        # background_label should be either 0 or num_classes
        assert (self.background_label == 0
                or self.background_label == num_classes)

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.reg_decoded_bbox = reg_decoded_bbox
        self.use_sigmoid_cls = False
        self.cls_focal_loss = False
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        # set sampling=False for archor_target
        self.sampling = False
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # SSD sampling=False so use PseudoSampler
            sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.fp16_enabled = False
        self.loss_balancing = loss_balancing
        if self.loss_balancing:
            self.loss_weights = torch.nn.Parameter(torch.FloatTensor(2))
            for i in range(2):
                self.loss_weights.data[i] = 0.
Exemple #27
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 out_channels=(1024, 512, 256),
                 anchor_generator=dict(type='YOLOAnchorGenerator',
                                       base_sizes=[[(116, 90), (156, 198),
                                                    (373, 326)],
                                                   [(30, 61), (62, 45),
                                                    (59, 119)],
                                                   [(10, 13), (16, 30),
                                                    (33, 23)]],
                                       strides=[32, 16, 8]),
                 bbox_coder=dict(type='YOLOBBoxCoder'),
                 featmap_strides=[32, 16, 8],
                 one_hot_smoother=0.,
                 conv_cfg=None,
                 norm_cfg=dict(type='BN', requires_grad=True),
                 act_cfg=dict(type='LeakyReLU', negative_slope=0.1),
                 loss_cls=dict(type='CrossEntropyLoss',
                               use_sigmoid=True,
                               loss_weight=1.0),
                 loss_conf=dict(type='CrossEntropyLoss',
                                use_sigmoid=True,
                                loss_weight=1.0),
                 loss_xy=dict(type='CrossEntropyLoss',
                              use_sigmoid=True,
                              loss_weight=1.0),
                 loss_wh=dict(type='MSELoss', loss_weight=1.0),
                 train_cfg=None,
                 test_cfg=None,
                 init_cfg=dict(type='Normal',
                               std=0.01,
                               override=dict(name='convs_pred'))):
        super(YOLOV3Head, self).__init__(init_cfg)
        # Check params
        assert (len(in_channels) == len(out_channels) == len(featmap_strides))

        self.num_classes = num_classes
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.featmap_strides = featmap_strides
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            if hasattr(self.train_cfg, 'sampler'):
                sampler_cfg = self.train_cfg.sampler
            else:
                sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)
        self.fp16_enabled = False

        self.one_hot_smoother = one_hot_smoother

        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg
        self.act_cfg = act_cfg

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.anchor_generator = build_anchor_generator(anchor_generator)

        self.loss_cls = build_loss(loss_cls)
        self.loss_conf = build_loss(loss_conf)
        self.loss_xy = build_loss(loss_xy)
        self.loss_wh = build_loss(loss_wh)
        # usually the numbers of anchors for each level are the same
        # except SSD detectors
        self.num_anchors = self.anchor_generator.num_base_anchors[0]
        assert len(
            self.anchor_generator.num_base_anchors) == len(featmap_strides)
        self._init_layers()
Exemple #28
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 stacked_convs=4,
                 feat_channels=256,
                 approx_anchor_generator=dict(
                     type='AnchorGenerator',
                     octave_base_scale=4,
                     scales_per_octave=3,
                     ratios=[0.5, 1.0, 2.0],
                     strides=[8, 16, 32, 64, 128]),
                 square_anchor_generator=dict(
                     type='AnchorGenerator',
                     ratios=[1.0],
                     scales=[4],
                     strides=[8, 16, 32, 64, 128]),
                 conv_cfg=None,
                 norm_cfg=None,
                 bbox_coder=dict(
                     type='BucketingBBoxCoder',
                     num_buckets=14,
                     scale_factor=3.0),
                 reg_decoded_bbox=False,
                 background_label=None,
                 train_cfg=None,
                 test_cfg=None,
                 loss_cls=dict(
                     type='FocalLoss',
                     use_sigmoid=True,
                     gamma=2.0,
                     alpha=0.25,
                     loss_weight=1.0),
                 loss_bbox_cls=dict(
                     type='CrossEntropyLoss',
                     use_sigmoid=True,
                     loss_weight=1.5),
                 loss_bbox_reg=dict(
                     type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=1.5)):
        super(SABLRetinaHead, self).__init__()
        self.in_channels = in_channels
        self.num_classes = num_classes
        self.feat_channels = feat_channels
        self.num_buckets = bbox_coder['num_buckets']
        self.side_num = int(np.ceil(self.num_buckets / 2))

        assert (approx_anchor_generator['octave_base_scale'] ==
                square_anchor_generator['scales'][0])
        assert (approx_anchor_generator['strides'] ==
                square_anchor_generator['strides'])

        self.approx_anchor_generator = build_anchor_generator(
            approx_anchor_generator)
        self.square_anchor_generator = build_anchor_generator(
            square_anchor_generator)
        self.approxs_per_octave = (
            self.approx_anchor_generator.num_base_anchors[0])

        # one anchor per location
        self.num_anchors = 1
        self.stacked_convs = stacked_convs
        self.conv_cfg = conv_cfg
        self.norm_cfg = norm_cfg

        self.reg_decoded_bbox = reg_decoded_bbox
        self.background_label = (
            num_classes if background_label is None else background_label)

        self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False)
        self.sampling = loss_cls['type'] not in [
            'FocalLoss', 'GHMC', 'QualityFocalLoss'
        ]
        if self.use_sigmoid_cls:
            self.cls_out_channels = num_classes
        else:
            self.cls_out_channels = num_classes + 1

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.loss_cls = build_loss(loss_cls)
        self.loss_bbox_cls = build_loss(loss_bbox_cls)
        self.loss_bbox_reg = build_loss(loss_bbox_reg)

        self.train_cfg = train_cfg
        self.test_cfg = test_cfg

        if self.train_cfg:
            self.assigner = build_assigner(self.train_cfg.assigner)
            # use PseudoSampler when sampling is False
            if self.sampling and hasattr(self.train_cfg, 'sampler'):
                sampler_cfg = self.train_cfg.sampler
            else:
                sampler_cfg = dict(type='PseudoSampler')
            self.sampler = build_sampler(sampler_cfg, context=self)

        self.fp16_enabled = False
        self._init_layers()
Exemple #29
0
    def __init__(self,
                 num_classes,
                 suface_matching_cfg,
                 line_matching_cfg,
                 bbox_coder,
                 train_cfg=None,
                 test_cfg=None,
                 gt_per_seed=1,
                 num_proposal=256,
                 feat_channels=(128, 128),
                 primitive_feat_refine_streams=2,
                 primitive_refine_channels=[128, 128, 128],
                 upper_thresh=100.0,
                 surface_thresh=0.5,
                 line_thresh=0.5,
                 conv_cfg=dict(type='Conv1d'),
                 norm_cfg=dict(type='BN1d'),
                 objectness_loss=None,
                 center_loss=None,
                 dir_class_loss=None,
                 dir_res_loss=None,
                 size_class_loss=None,
                 size_res_loss=None,
                 semantic_loss=None,
                 cues_objectness_loss=None,
                 cues_semantic_loss=None,
                 proposal_objectness_loss=None,
                 primitive_center_loss=None):
        super(H3DBboxHead, self).__init__()
        self.num_classes = num_classes
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.gt_per_seed = gt_per_seed
        self.num_proposal = num_proposal
        self.with_angle = bbox_coder['with_rot']
        self.upper_thresh = upper_thresh
        self.surface_thresh = surface_thresh
        self.line_thresh = line_thresh

        self.objectness_loss = build_loss(objectness_loss)
        self.center_loss = build_loss(center_loss)
        self.dir_class_loss = build_loss(dir_class_loss)
        self.dir_res_loss = build_loss(dir_res_loss)
        self.size_class_loss = build_loss(size_class_loss)
        self.size_res_loss = build_loss(size_res_loss)
        self.semantic_loss = build_loss(semantic_loss)

        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.num_sizes = self.bbox_coder.num_sizes
        self.num_dir_bins = self.bbox_coder.num_dir_bins

        self.cues_objectness_loss = build_loss(cues_objectness_loss)
        self.cues_semantic_loss = build_loss(cues_semantic_loss)
        self.proposal_objectness_loss = build_loss(proposal_objectness_loss)
        self.primitive_center_loss = build_loss(primitive_center_loss)

        assert suface_matching_cfg['mlp_channels'][-1] == \
            line_matching_cfg['mlp_channels'][-1]

        # surface center matching
        self.surface_center_matcher = build_sa_module(suface_matching_cfg)
        # line center matching
        self.line_center_matcher = build_sa_module(line_matching_cfg)

        # Compute the matching scores
        matching_feat_dims = suface_matching_cfg['mlp_channels'][-1]
        self.matching_conv = ConvModule(matching_feat_dims,
                                        matching_feat_dims,
                                        1,
                                        padding=0,
                                        conv_cfg=conv_cfg,
                                        norm_cfg=norm_cfg,
                                        bias=True,
                                        inplace=True)
        self.matching_pred = nn.Conv1d(matching_feat_dims, 2, 1)

        # Compute the semantic matching scores
        self.semantic_matching_conv = ConvModule(matching_feat_dims,
                                                 matching_feat_dims,
                                                 1,
                                                 padding=0,
                                                 conv_cfg=conv_cfg,
                                                 norm_cfg=norm_cfg,
                                                 bias=True,
                                                 inplace=True)
        self.semantic_matching_pred = nn.Conv1d(matching_feat_dims, 2, 1)

        # Surface feature aggregation
        self.surface_feats_aggregation = list()
        for k in range(primitive_feat_refine_streams):
            self.surface_feats_aggregation.append(
                ConvModule(matching_feat_dims,
                           matching_feat_dims,
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           bias=True,
                           inplace=True))
        self.surface_feats_aggregation = nn.Sequential(
            *self.surface_feats_aggregation)

        # Line feature aggregation
        self.line_feats_aggregation = list()
        for k in range(primitive_feat_refine_streams):
            self.line_feats_aggregation.append(
                ConvModule(matching_feat_dims,
                           matching_feat_dims,
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           bias=True,
                           inplace=True))
        self.line_feats_aggregation = nn.Sequential(
            *self.line_feats_aggregation)

        # surface center(6) + line center(12)
        prev_channel = 18 * matching_feat_dims
        self.bbox_pred = nn.ModuleList()
        for k in range(len(primitive_refine_channels)):
            self.bbox_pred.append(
                ConvModule(prev_channel,
                           primitive_refine_channels[k],
                           1,
                           padding=0,
                           conv_cfg=conv_cfg,
                           norm_cfg=norm_cfg,
                           bias=True,
                           inplace=False))
            prev_channel = primitive_refine_channels[k]

        # Final object detection
        # Objectness scores (2), center residual (3),
        # heading class+residual (num_heading_bin*2), size class +
        # residual(num_size_cluster*4)
        conv_out_channel = (2 + 3 + bbox_coder['num_dir_bins'] * 2 +
                            bbox_coder['num_sizes'] * 4 + self.num_classes)
        self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1))
Exemple #30
0
    def __init__(self,
                 num_classes,
                 in_channels,
                 bbox_coder,
                 num_decoder_layers,
                 transformerlayers,
                 decoder_self_posembeds=dict(type='ConvBNPositionalEncoding',
                                             input_channel=6,
                                             num_pos_feats=288),
                 decoder_cross_posembeds=dict(type='ConvBNPositionalEncoding',
                                              input_channel=3,
                                              num_pos_feats=288),
                 train_cfg=None,
                 test_cfg=None,
                 num_proposal=128,
                 pred_layer_cfg=None,
                 size_cls_agnostic=True,
                 gt_per_seed=3,
                 sampling_objectness_loss=None,
                 objectness_loss=None,
                 center_loss=None,
                 dir_class_loss=None,
                 dir_res_loss=None,
                 size_class_loss=None,
                 size_res_loss=None,
                 size_reg_loss=None,
                 semantic_loss=None,
                 init_cfg=None):
        super(GroupFree3DHead, self).__init__(init_cfg=init_cfg)
        self.num_classes = num_classes
        self.train_cfg = train_cfg
        self.test_cfg = test_cfg
        self.num_proposal = num_proposal
        self.in_channels = in_channels
        self.num_decoder_layers = num_decoder_layers
        self.size_cls_agnostic = size_cls_agnostic
        self.gt_per_seed = gt_per_seed

        # Transformer decoder layers
        if isinstance(transformerlayers, ConfigDict):
            transformerlayers = [
                copy.deepcopy(transformerlayers)
                for _ in range(num_decoder_layers)
            ]
        else:
            assert isinstance(transformerlayers, list) and \
                   len(transformerlayers) == num_decoder_layers
        self.decoder_layers = nn.ModuleList()
        for i in range(self.num_decoder_layers):
            self.decoder_layers.append(
                build_transformer_layer(transformerlayers[i]))
        self.embed_dims = self.decoder_layers[0].embed_dims
        assert self.embed_dims == decoder_self_posembeds['num_pos_feats']
        assert self.embed_dims == decoder_cross_posembeds['num_pos_feats']

        # bbox_coder
        self.bbox_coder = build_bbox_coder(bbox_coder)
        self.num_sizes = self.bbox_coder.num_sizes
        self.num_dir_bins = self.bbox_coder.num_dir_bins

        # Initial object candidate sampling
        self.gsample_module = GeneralSamplingModule()
        self.fps_module = Points_Sampler([self.num_proposal])
        self.points_obj_cls = PointsObjClsModule(self.in_channels)

        self.fp16_enabled = False

        # initial candidate prediction
        self.conv_pred = BaseConvBboxHead(
            **pred_layer_cfg,
            num_cls_out_channels=self._get_cls_out_channels(),
            num_reg_out_channels=self._get_reg_out_channels())

        # query proj and key proj
        self.decoder_query_proj = nn.Conv1d(self.embed_dims,
                                            self.embed_dims,
                                            kernel_size=1)
        self.decoder_key_proj = nn.Conv1d(self.embed_dims,
                                          self.embed_dims,
                                          kernel_size=1)

        # query position embed
        self.decoder_self_posembeds = nn.ModuleList()
        for _ in range(self.num_decoder_layers):
            self.decoder_self_posembeds.append(
                build_positional_encoding(decoder_self_posembeds))
        # key position embed
        self.decoder_cross_posembeds = nn.ModuleList()
        for _ in range(self.num_decoder_layers):
            self.decoder_cross_posembeds.append(
                build_positional_encoding(decoder_cross_posembeds))

        # Prediction Head
        self.prediction_heads = nn.ModuleList()
        for i in range(self.num_decoder_layers):
            self.prediction_heads.append(
                BaseConvBboxHead(
                    **pred_layer_cfg,
                    num_cls_out_channels=self._get_cls_out_channels(),
                    num_reg_out_channels=self._get_reg_out_channels()))

        self.sampling_objectness_loss = build_loss(sampling_objectness_loss)
        self.objectness_loss = build_loss(objectness_loss)
        self.center_loss = build_loss(center_loss)
        self.dir_res_loss = build_loss(dir_res_loss)
        self.dir_class_loss = build_loss(dir_class_loss)
        self.semantic_loss = build_loss(semantic_loss)
        if self.size_cls_agnostic:
            self.size_reg_loss = build_loss(size_reg_loss)
        else:
            self.size_res_loss = build_loss(size_res_loss)
            self.size_class_loss = build_loss(size_class_loss)