def __init__(self, in_channels, num_classes=3, extra_width=0.2, seg_score_thr=0.3, init_cfg=None, loss_seg=dict( type='FocalLoss', use_sigmoid=True, reduction='sum', gamma=2.0, alpha=0.25, loss_weight=1.0), loss_part=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0)): super(PointwiseSemanticHead, self).__init__(init_cfg=init_cfg) self.extra_width = extra_width self.num_classes = num_classes self.seg_score_thr = seg_score_thr self.seg_cls_layer = nn.Linear(in_channels, 1, bias=True) self.seg_reg_layer = nn.Linear(in_channels, 3, bias=True) self.loss_seg = build_loss(loss_seg) self.loss_part = build_loss(loss_part)
def __init__(self, num_classes, bbox_coder, train_cfg=None, test_cfg=None, pred_layer_cfg=None, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), dir_res_loss=None, size_res_loss=None, semantic_loss=None): super(BRBboxHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.dir_res_loss = build_loss(dir_res_loss) self.size_res_loss = build_loss(size_res_loss) self.semantic_loss = build_loss(semantic_loss) self.bbox_coder = build_bbox_coder(bbox_coder) # Bbox classification and regression self.conv_pred = BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels())
def __init__(self, in_channels=[128], tasks=None, train_cfg=None, test_cfg=None, bbox_coder=None, common_heads=dict(), loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), loss_bbox=dict(type='L1Loss', reduction='none', loss_weight=0.25), separate_head=dict(type='SeparateHead', init_bias=-2.19, final_kernel=3), share_conv_channel=64, num_heatmap_convs=2, conv_cfg=dict(type='Conv2d'), norm_cfg=dict(type='BN2d'), bias='auto', norm_bbox=True): super(CenterHead, self).__init__() num_classes = [len(t['class_names']) for t in tasks] self.class_names = [t['class_names'] for t in tasks] # Feng Xiang code # code begin # num_attr = [len(t['attr_names']) for t in tasks] # self.attr_names = [t['attr_names'] for t in tasks] # code end self.train_cfg = train_cfg self.test_cfg = test_cfg self.in_channels = in_channels self.num_classes = num_classes self.norm_bbox = norm_bbox self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.bbox_coder = build_bbox_coder(bbox_coder) self.num_anchor_per_locs = [n for n in num_classes] self.fp16_enabled = False # a shared convolution self.shared_conv = ConvModule(in_channels, share_conv_channel, kernel_size=3, padding=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=bias) self.task_heads = nn.ModuleList() for num_cls in num_classes: heads = copy.deepcopy(common_heads) heads.update(dict(heatmap=(num_cls, num_heatmap_convs))) separate_head.update(in_channels=share_conv_channel, heads=heads, num_cls=num_cls) self.task_heads.append(builder.build_head(separate_head))
def __init__(self, num_classes, bbox_coder, train_cfg=None, test_cfg=None, vote_module_cfg=None, vote_aggregation_cfg=None, pred_layer_cfg=None, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, center_loss_mse=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, semantic_loss=None, iou_loss=None): super(VoteHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = vote_module_cfg['gt_per_seed'] self.num_proposal = vote_aggregation_cfg['num_point'] self.objectness_loss = build_loss(objectness_loss) self.dir_res_loss = build_loss(dir_res_loss) self.dir_class_loss = build_loss(dir_class_loss) self.size_res_loss = build_loss(size_res_loss) if size_class_loss is not None: self.size_class_loss = build_loss(size_class_loss) if semantic_loss is not None: self.semantic_loss = build_loss(semantic_loss) if iou_loss is not None: self.iou_loss = build_loss(iou_loss) else: self.iou_loss = None if center_loss is not None: self.center_loss = build_loss(center_loss) if center_loss_mse is not None: self.center_loss_mse = build_loss(center_loss_mse) self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins self.vote_module = VoteModule(**vote_module_cfg) self.vote_aggregation = build_sa_module(vote_aggregation_cfg) self.fp16_enabled = False # Bbox classification and regression self.conv_pred = BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels())
def __init__( self, img_backbone=None, pts_backbone=None, num_classes=None, prelogits_dim=None, class_weights=None, pretrained=None, contrast_criterion=None, max_pts=1024, # max_pts_per_group groups=1, # number of groups per sample lambda_contrast=0.1, img_fcs=(64, 64, 16), pts_fcs=(16, 16, 16), train_cfg=None, test_cfg=None): super(SegFusionContra, self).__init__() if img_backbone: self.img_backbone = builder.build_backbone(img_backbone) if pts_backbone: self.pts_backbone = builder.build_backbone(pts_backbone) self.init_weights(pretrained=pretrained) self.seg_head = nn.Linear(prelogits_dim, num_classes) self.class_weights = torch.tensor(class_weights) if contrast_criterion: self.contrast_criterion = builder.build_loss(contrast_criterion) self.max_pts = max_pts self.groups = groups self.lambda_contrast = lambda_contrast self.img_fc = build_mlp(img_fcs) self.pts_fc = build_mlp(pts_fcs)
def __init__(self, in_channels, vote_per_seed=1, gt_per_seed=3, conv_channels=(16, 16), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), norm_feats=True, vote_loss=None): super().__init__() self.in_channels = in_channels self.vote_per_seed = vote_per_seed self.gt_per_seed = gt_per_seed self.norm_feats = norm_feats self.vote_loss = build_loss(vote_loss) prev_channels = in_channels vote_conv_list = list() for k in range(len(conv_channels)): vote_conv_list.append( ConvModule(prev_channels, conv_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) prev_channels = conv_channels[k] self.vote_conv = nn.Sequential(*vote_conv_list) # conv_out predicts coordinate and residual features out_channel = (3 + in_channels) * self.vote_per_seed self.conv_out = nn.Conv1d(prev_channels, out_channel, 1)
def __init__(self, img_backbone=None, pts_backbone=None, num_classes=None, prelogits_dim=None, class_weights=None, pretrained=None, contrast_criterion=None, max_pts=1024, lambda_contrast=0.1, train_cfg=None, test_cfg=None): super(SegFusionV2, self).__init__() if img_backbone: self.img_backbone = builder.build_backbone(img_backbone) if pts_backbone: self.pts_backbone = builder.build_backbone(pts_backbone) self.init_weights(pretrained=pretrained) self.seg_head = nn.Linear(prelogits_dim, num_classes) self.class_weights = torch.tensor(class_weights) if contrast_criterion: self.contrast_criterion = builder.build_loss(contrast_criterion) self.max_pts = max_pts self.lambda_contrast = lambda_contrast self.g1 = nn.Sequential(nn.Linear(64, 64), nn.ReLU(inplace=True), nn.Linear(64, 16)) self.g2 = nn.Sequential(nn.Linear(16, 16), nn.ReLU(inplace=True), nn.Linear(16, 16))
def __init__(self, pts_voxel_layer=None, pts_voxel_encoder=None, pts_middle_encoder=None, img_backbone=None, img_seg_head=None, pts_backbone=None, pts_neck=None, pts_bbox_head=None, train_cfg=None, test_cfg=None, pretrained=None, pts_fc=[], contrast_criterion=None, max_pts=4096, lambda_contrast=0.1): super(FusionContrastV2, self).__init__() if img_backbone: self.img_backbone = builder.build_backbone(img_backbone) if img_seg_head: self.img_seg_head = builder.build_head(img_seg_head) if pts_voxel_layer: self.pts_voxel_layer = Voxelization(**pts_voxel_layer) if pts_voxel_encoder: self.pts_voxel_encoder = builder.build_voxel_encoder( pts_voxel_encoder) if pts_middle_encoder: self.pts_middle_encoder = builder.build_middle_encoder( pts_middle_encoder) if pts_backbone: self.pts_backbone = builder.build_backbone(pts_backbone) if pts_neck: self.pts_neck = builder.build_neck(pts_neck) if pts_bbox_head: pts_train_cfg = train_cfg.pts if train_cfg else None pts_bbox_head.update(train_cfg=pts_train_cfg) pts_test_cfg = test_cfg.pts if test_cfg else None pts_bbox_head.update(test_cfg=pts_test_cfg) self.pts_bbox_head = builder.build_head(pts_bbox_head) if contrast_criterion: self.contrast_criterion = builder.build_loss(contrast_criterion) self.max_pts = max_pts self.lambda_contrast = lambda_contrast fc_layers = [] for i, (in_c, out_c) in enumerate(zip(pts_fc[:-1], pts_fc[1:])): fc_layers.append(nn.Linear(in_c, out_c)) if i == len(pts_fc) - 2: break fc_layers.append(nn.ReLU(inplace=True)) self.fc_layers = nn.Sequential(*fc_layers) self.train_cfg = train_cfg self.test_cfg = test_cfg self.init_weights(pretrained=pretrained)
def __init__(self, num_classes, bbox_coder, in_channels=256, train_cfg=None, test_cfg=None, vote_module_cfg=None, vote_aggregation_cfg=None, prop_reasoning_cfg=None, pred_layer_cfg=None, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), act_cfg=dict(type='ReLU'), objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_res_loss=None, corner_loss=None, vote_loss=None): super(SSD3DHead_ProRe, self).__init__( num_classes, bbox_coder, train_cfg=train_cfg, test_cfg=test_cfg, vote_module_cfg=vote_module_cfg, vote_aggregation_cfg=vote_aggregation_cfg, pred_layer_cfg=pred_layer_cfg, conv_cfg=conv_cfg, norm_cfg=norm_cfg, objectness_loss=objectness_loss, center_loss=center_loss, dir_class_loss=dir_class_loss, dir_res_loss=dir_res_loss, size_class_loss=None, size_res_loss=size_res_loss, semantic_loss=None) self.prop_reason = ProRe(**prop_reasoning_cfg) self.corner_loss = build_loss(corner_loss) self.vote_loss = build_loss(vote_loss) self.num_candidates = vote_module_cfg['num_points']
def __init__(self, in_channels, vote_per_seed=1, gt_per_seed=3, num_points=-1, conv_channels=(16, 16), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), act_cfg=dict(type='ReLU'), norm_feats=True, with_res_feat=True, vote_xyz_range=None, vote_loss=None): super().__init__() self.in_channels = in_channels self.vote_per_seed = vote_per_seed self.gt_per_seed = gt_per_seed self.num_points = num_points self.norm_feats = norm_feats self.with_res_feat = with_res_feat assert vote_xyz_range is None or is_tuple_of(vote_xyz_range, float) self.vote_xyz_range = vote_xyz_range if vote_loss is not None: self.vote_loss = build_loss(vote_loss) prev_channels = in_channels vote_conv_list = list() for k in range(len(conv_channels)): vote_conv_list.append( ConvModule( prev_channels, conv_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg, bias=True, inplace=True)) prev_channels = conv_channels[k] self.vote_conv = nn.Sequential(*vote_conv_list) # conv_out predicts coordinate and residual features if with_res_feat: out_channel = (3 + in_channels) * self.vote_per_seed else: out_channel = 3 * self.vote_per_seed self.conv_out = nn.Conv1d(prev_channels, out_channel, 1)
def __init__(self, model, cfg, batch_processor=None, optimizer=None, work_dir=None, logger=None, meta=None, max_iters=None, max_epochs=None): super(ContrastRunnerV0, self).__init__(model, batch_processor, optimizer, work_dir, logger, meta, max_iters, max_epochs) self.lambda_contrast = cfg.lambda_contrast self.max_pts = cfg.max_pts self.contrast_criterion = build_loss(cfg.contrast_criterion)
def __init__(self, num_classes, seg_in_channels, part_in_channels, seg_conv_channels=None, part_conv_channels=None, merge_conv_channels=None, down_conv_channels=None, shared_fc_channels=None, cls_channels=None, reg_channels=None, dropout_ratio=0.1, roi_feat_size=14, with_corner_loss=True, bbox_coder=dict(type='DeltaXYZWLHRBBoxCoder'), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d', eps=1e-3, momentum=0.01), loss_bbox=dict(type='SmoothL1Loss', beta=1.0 / 9.0, loss_weight=2.0), loss_cls=dict(type='CrossEntropyLoss', use_sigmoid=True, reduction='none', loss_weight=1.0)): super(PartA2BboxHead, self).__init__() self.num_classes = num_classes self.with_corner_loss = with_corner_loss self.bbox_coder = build_bbox_coder(bbox_coder) self.loss_bbox = build_loss(loss_bbox) self.loss_cls = build_loss(loss_cls) self.use_sigmoid_cls = loss_cls.get('use_sigmoid', False) assert down_conv_channels[-1] == shared_fc_channels[0] # init layers part_channel_last = part_in_channels part_conv = [] for i, channel in enumerate(part_conv_channels): part_conv.append( make_sparse_convmodule(part_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key=f'rcnn_part{i}', conv_type='SubMConv3d')) part_channel_last = channel self.part_conv = spconv.SparseSequential(*part_conv) seg_channel_last = seg_in_channels seg_conv = [] for i, channel in enumerate(seg_conv_channels): seg_conv.append( make_sparse_convmodule(seg_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key=f'rcnn_seg{i}', conv_type='SubMConv3d')) seg_channel_last = channel self.seg_conv = spconv.SparseSequential(*seg_conv) self.conv_down = spconv.SparseSequential() merge_conv_channel_last = part_channel_last + seg_channel_last merge_conv = [] for i, channel in enumerate(merge_conv_channels): merge_conv.append( make_sparse_convmodule(merge_conv_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key='rcnn_down0')) merge_conv_channel_last = channel down_conv_channel_last = merge_conv_channel_last conv_down = [] for i, channel in enumerate(down_conv_channels): conv_down.append( make_sparse_convmodule(down_conv_channel_last, channel, 3, padding=1, norm_cfg=norm_cfg, indice_key='rcnn_down1')) down_conv_channel_last = channel self.conv_down.add_module('merge_conv', spconv.SparseSequential(*merge_conv)) self.conv_down.add_module( 'max_pool3d', spconv.SparseMaxPool3d(kernel_size=2, stride=2)) self.conv_down.add_module('down_conv', spconv.SparseSequential(*conv_down)) shared_fc_list = [] pool_size = roi_feat_size // 2 pre_channel = shared_fc_channels[0] * pool_size**3 for k in range(1, len(shared_fc_channels)): shared_fc_list.append( ConvModule(pre_channel, shared_fc_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = shared_fc_channels[k] if k != len(shared_fc_channels) - 1 and dropout_ratio > 0: shared_fc_list.append(nn.Dropout(dropout_ratio)) self.shared_fc = nn.Sequential(*shared_fc_list) # Classification layer channel_in = shared_fc_channels[-1] cls_channel = 1 cls_layers = [] pre_channel = channel_in for k in range(0, len(cls_channels)): cls_layers.append( ConvModule(pre_channel, cls_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = cls_channels[k] cls_layers.append( ConvModule(pre_channel, cls_channel, 1, padding=0, conv_cfg=conv_cfg, act_cfg=None)) if dropout_ratio >= 0: cls_layers.insert(1, nn.Dropout(dropout_ratio)) self.conv_cls = nn.Sequential(*cls_layers) # Regression layer reg_layers = [] pre_channel = channel_in for k in range(0, len(reg_channels)): reg_layers.append( ConvModule(pre_channel, reg_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, inplace=True)) pre_channel = reg_channels[k] reg_layers.append( ConvModule(pre_channel, self.bbox_coder.code_size, 1, padding=0, conv_cfg=conv_cfg, act_cfg=None)) if dropout_ratio >= 0: reg_layers.insert(1, nn.Dropout(dropout_ratio)) self.conv_reg = nn.Sequential(*reg_layers) self.init_weights()
def __init__(self, num_dims, num_classes, primitive_mode, train_cfg=None, test_cfg=None, vote_module_cfg=None, vote_aggregation_cfg=None, feat_channels=(128, 128), upper_thresh=100.0, surface_thresh=0.5, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, semantic_reg_loss=None, semantic_cls_loss=None, init_cfg=None): super(PrimitiveHead, self).__init__(init_cfg=init_cfg) assert primitive_mode in ['z', 'xy', 'line'] # The dimension of primitive semantic information. self.num_dims = num_dims self.num_classes = num_classes self.primitive_mode = primitive_mode self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = vote_module_cfg['gt_per_seed'] self.num_proposal = vote_aggregation_cfg['num_point'] self.upper_thresh = upper_thresh self.surface_thresh = surface_thresh self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.semantic_reg_loss = build_loss(semantic_reg_loss) self.semantic_cls_loss = build_loss(semantic_cls_loss) assert vote_aggregation_cfg['mlp_channels'][0] == vote_module_cfg[ 'in_channels'] # Primitive existence flag prediction self.flag_conv = ConvModule( vote_module_cfg['conv_channels'][-1], vote_module_cfg['conv_channels'][-1] // 2, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True) self.flag_pred = torch.nn.Conv1d( vote_module_cfg['conv_channels'][-1] // 2, 2, 1) self.vote_module = VoteModule(**vote_module_cfg) self.vote_aggregation = build_sa_module(vote_aggregation_cfg) prev_channel = vote_aggregation_cfg['mlp_channels'][-1] conv_pred_list = list() for k in range(len(feat_channels)): conv_pred_list.append( ConvModule( prev_channel, feat_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) prev_channel = feat_channels[k] self.conv_pred = nn.Sequential(*conv_pred_list) conv_out_channel = 3 + num_dims + num_classes self.conv_pred.add_module('conv_out', nn.Conv1d(prev_channel, conv_out_channel, 1))
def __init__(self, num_classes, suface_matching_cfg, line_matching_cfg, bbox_coder, train_cfg=None, test_cfg=None, gt_per_seed=1, num_proposal=256, feat_channels=(128, 128), primitive_feat_refine_streams=2, primitive_refine_channels=[128, 128, 128], upper_thresh=100.0, surface_thresh=0.5, line_thresh=0.5, conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, semantic_loss=None, cues_objectness_loss=None, cues_semantic_loss=None, proposal_objectness_loss=None, primitive_center_loss=None): super(H3DBboxHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = gt_per_seed self.num_proposal = num_proposal self.with_angle = bbox_coder['with_rot'] self.upper_thresh = upper_thresh self.surface_thresh = surface_thresh self.line_thresh = line_thresh self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.dir_class_loss = build_loss(dir_class_loss) self.dir_res_loss = build_loss(dir_res_loss) self.size_class_loss = build_loss(size_class_loss) self.size_res_loss = build_loss(size_res_loss) self.semantic_loss = build_loss(semantic_loss) self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins self.cues_objectness_loss = build_loss(cues_objectness_loss) self.cues_semantic_loss = build_loss(cues_semantic_loss) self.proposal_objectness_loss = build_loss(proposal_objectness_loss) self.primitive_center_loss = build_loss(primitive_center_loss) assert suface_matching_cfg['mlp_channels'][-1] == \ line_matching_cfg['mlp_channels'][-1] # surface center matching self.surface_center_matcher = build_sa_module(suface_matching_cfg) # line center matching self.line_center_matcher = build_sa_module(line_matching_cfg) # Compute the matching scores matching_feat_dims = suface_matching_cfg['mlp_channels'][-1] self.matching_conv = ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True) self.matching_pred = nn.Conv1d(matching_feat_dims, 2, 1) # Compute the semantic matching scores self.semantic_matching_conv = ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True) self.semantic_matching_pred = nn.Conv1d(matching_feat_dims, 2, 1) # Surface feature aggregation self.surface_feats_aggregation = list() for k in range(primitive_feat_refine_streams): self.surface_feats_aggregation.append( ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) self.surface_feats_aggregation = nn.Sequential( *self.surface_feats_aggregation) # Line feature aggregation self.line_feats_aggregation = list() for k in range(primitive_feat_refine_streams): self.line_feats_aggregation.append( ConvModule(matching_feat_dims, matching_feat_dims, 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) self.line_feats_aggregation = nn.Sequential( *self.line_feats_aggregation) # surface center(6) + line center(12) prev_channel = 18 * matching_feat_dims self.bbox_pred = nn.ModuleList() for k in range(len(primitive_refine_channels)): self.bbox_pred.append( ConvModule(prev_channel, primitive_refine_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=False)) prev_channel = primitive_refine_channels[k] # Final object detection # Objectness scores (2), center residual (3), # heading class+residual (num_heading_bin*2), size class + # residual(num_size_cluster*4) conv_out_channel = (2 + 3 + bbox_coder['num_dir_bins'] * 2 + bbox_coder['num_sizes'] * 4 + self.num_classes) self.bbox_pred.append(nn.Conv1d(prev_channel, conv_out_channel, 1))
def __init__(self, num_classes, in_channels, bbox_coder, num_decoder_layers, transformerlayers, decoder_self_posembeds=dict(type='ConvBNPositionalEncoding', input_channel=6, num_pos_feats=288), decoder_cross_posembeds=dict(type='ConvBNPositionalEncoding', input_channel=3, num_pos_feats=288), train_cfg=None, test_cfg=None, num_proposal=128, pred_layer_cfg=None, size_cls_agnostic=True, gt_per_seed=3, sampling_objectness_loss=None, objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, size_reg_loss=None, semantic_loss=None, init_cfg=None): super(GroupFree3DHead, self).__init__(init_cfg=init_cfg) self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.num_proposal = num_proposal self.in_channels = in_channels self.num_decoder_layers = num_decoder_layers self.size_cls_agnostic = size_cls_agnostic self.gt_per_seed = gt_per_seed # Transformer decoder layers if isinstance(transformerlayers, ConfigDict): transformerlayers = [ copy.deepcopy(transformerlayers) for _ in range(num_decoder_layers) ] else: assert isinstance(transformerlayers, list) and \ len(transformerlayers) == num_decoder_layers self.decoder_layers = nn.ModuleList() for i in range(self.num_decoder_layers): self.decoder_layers.append( build_transformer_layer(transformerlayers[i])) self.embed_dims = self.decoder_layers[0].embed_dims assert self.embed_dims == decoder_self_posembeds['num_pos_feats'] assert self.embed_dims == decoder_cross_posembeds['num_pos_feats'] # bbox_coder self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins # Initial object candidate sampling self.gsample_module = GeneralSamplingModule() self.fps_module = Points_Sampler([self.num_proposal]) self.points_obj_cls = PointsObjClsModule(self.in_channels) self.fp16_enabled = False # initial candidate prediction self.conv_pred = BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels()) # query proj and key proj self.decoder_query_proj = nn.Conv1d(self.embed_dims, self.embed_dims, kernel_size=1) self.decoder_key_proj = nn.Conv1d(self.embed_dims, self.embed_dims, kernel_size=1) # query position embed self.decoder_self_posembeds = nn.ModuleList() for _ in range(self.num_decoder_layers): self.decoder_self_posembeds.append( build_positional_encoding(decoder_self_posembeds)) # key position embed self.decoder_cross_posembeds = nn.ModuleList() for _ in range(self.num_decoder_layers): self.decoder_cross_posembeds.append( build_positional_encoding(decoder_cross_posembeds)) # Prediction Head self.prediction_heads = nn.ModuleList() for i in range(self.num_decoder_layers): self.prediction_heads.append( BaseConvBboxHead( **pred_layer_cfg, num_cls_out_channels=self._get_cls_out_channels(), num_reg_out_channels=self._get_reg_out_channels())) self.sampling_objectness_loss = build_loss(sampling_objectness_loss) self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.dir_res_loss = build_loss(dir_res_loss) self.dir_class_loss = build_loss(dir_class_loss) self.semantic_loss = build_loss(semantic_loss) if self.size_cls_agnostic: self.size_reg_loss = build_loss(size_reg_loss) else: self.size_res_loss = build_loss(size_res_loss) self.size_class_loss = build_loss(size_class_loss)
def __init__(self, num_classes, bbox_coder, train_cfg=None, test_cfg=None, vote_moudule_cfg=None, vote_aggregation_cfg=None, feat_channels=(128, 128), conv_cfg=dict(type='Conv1d'), norm_cfg=dict(type='BN1d'), objectness_loss=None, center_loss=None, dir_class_loss=None, dir_res_loss=None, size_class_loss=None, size_res_loss=None, semantic_loss=None): super(VoteHead, self).__init__() self.num_classes = num_classes self.train_cfg = train_cfg self.test_cfg = test_cfg self.gt_per_seed = vote_moudule_cfg['gt_per_seed'] self.num_proposal = vote_aggregation_cfg['num_point'] self.objectness_loss = build_loss(objectness_loss) self.center_loss = build_loss(center_loss) self.dir_class_loss = build_loss(dir_class_loss) self.dir_res_loss = build_loss(dir_res_loss) self.size_class_loss = build_loss(size_class_loss) self.size_res_loss = build_loss(size_res_loss) self.semantic_loss = build_loss(semantic_loss) assert vote_aggregation_cfg['mlp_channels'][0] == vote_moudule_cfg[ 'in_channels'] self.bbox_coder = build_bbox_coder(bbox_coder) self.num_sizes = self.bbox_coder.num_sizes self.num_dir_bins = self.bbox_coder.num_dir_bins self.vote_module = VoteModule(**vote_moudule_cfg) self.vote_aggregation = PointSAModule(**vote_aggregation_cfg) prev_channel = vote_aggregation_cfg['mlp_channels'][-1] conv_pred_list = list() for k in range(len(feat_channels)): conv_pred_list.append( ConvModule(prev_channel, feat_channels[k], 1, padding=0, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=True, inplace=True)) prev_channel = feat_channels[k] self.conv_pred = nn.Sequential(*conv_pred_list) # Objectness scores (2), center residual (3), # heading class+residual (num_dir_bins*2), # size class+residual(num_sizes*4) conv_out_channel = (2 + 3 + self.num_dir_bins * 2 + self.num_sizes * 4 + num_classes) self.conv_pred.add_module('conv_out', nn.Conv1d(prev_channel, conv_out_channel, 1))
def __init__(self, in_channels=[128], tasks=None, train_cfg=None, test_cfg=None, bbox_coder=None, common_heads=dict(), loss_cls=dict(type='GaussianFocalLoss', reduction='mean'), loss_bbox=dict(type='L1Loss', reduction='none', loss_weight=0.25), seperate_head=dict(type='SeparateHead', init_bias=-2.19, final_kernel=3), feature_channel=64, num_heatmap_convs=2, conv_cfg=dict(type='Conv2d'), norm_cfg=dict(type='BN2d'), bias='auto', norm_bbox=True, pred_mode=['voxel'], num_head_convs=2, num_classes=17, ignore_label=0, grid_size=[480, 360, 32]): super(SelectivesegHead, self).__init__() self.class_names = [t['class_names'] for t in tasks] self.train_cfg = train_cfg self.test_cfg = test_cfg self.in_channels = in_channels self.num_classes = num_classes self.norm_bbox = norm_bbox self.loss_cls = build_loss(loss_cls) self.loss_bbox = build_loss(loss_bbox) self.bbox_coder = build_bbox_coder(bbox_coder) self.fp16_enabled = False self.pred_mode = pred_mode assert ('voxel' in self.pred_mode) or ('point' in self.pred_mode) or ( 'pillar' in self.pred_mode) self.ignore_label = ignore_label self.grid_size = grid_size semantic_conv = [] for i in range(num_head_convs): if i > 0: in_channels = feature_channel conv = ConvModule(in_channels, feature_channel, kernel_size=1, conv_cfg=conv_cfg, norm_cfg=norm_cfg, bias=False) semantic_conv.append(conv) self.semantic_conv = nn.Sequential(*semantic_conv) self.semantic_cls = nn.Conv1d(feature_channel, self.num_classes, 1) self.semantic_CE_loss = torch.nn.CrossEntropyLoss( ignore_index=ignore_label)