def __init__(self,
                 output_shape,
                 num_class=2,
                 num_input_features=4,
                 vfe_class_name="VoxelFeatureExtractor",
                 vfe_num_filters=[32, 128],
                 with_distance=False,
                 middle_class_name="SparseMiddleExtractor",
                 middle_num_input_features=-1,
                 middle_num_filters_d1=[64],
                 middle_num_filters_d2=[64, 64],
                 rpn_class_name="RPN",
                 rpn_num_input_features=-1,
                 rpn_layer_nums=[3, 5, 5],
                 rpn_layer_strides=[2, 2, 2],
                 rpn_num_filters=[128, 128, 256],
                 rpn_upsample_strides=[1, 2, 4],
                 rpn_num_upsample_filters=[256, 256, 256],
                 use_norm=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_direction_classifier=True,
                 use_sigmoid_score=False,
                 encode_background_as_zeros=True,
                 use_rotate_nms=True,
                 multiclass_nms=False,
                 nms_score_thresholds=None,
                 nms_pre_max_sizes=None,
                 nms_post_max_sizes=None,
                 nms_iou_thresholds=None,
                 target_assigner=None,
                 cls_loss_weight=1.0,
                 loc_loss_weight=1.0,
                 pos_cls_weight=1.0,
                 neg_cls_weight=1.0,
                 direction_loss_weight=1.0,
                 loss_norm_type=LossNormType.NormByNumPositives,
                 encode_rad_error_by_sin=False,
                 loc_loss_ftor=None,
                 cls_loss_ftor=None,
                 measure_time=False,
                 voxel_generator=None,
                 post_center_range=None,
                 dir_offset=0.0,
                 sin_error_factor=1.0,
                 nms_class_agnostic=False,
                 num_direction_bins=2,
                 direction_limit_offset=0,
                 name='voxelnet'):
        super().__init__()
        self.name = name
        self._sin_error_factor = sin_error_factor
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_thresholds = nms_score_thresholds
        self._nms_pre_max_sizes = nms_pre_max_sizes
        self._nms_post_max_sizes = nms_post_max_sizes
        self._nms_iou_thresholds = nms_iou_thresholds
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_direction_classifier = use_direction_classifier
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self.target_assigner = target_assigner
        self.voxel_generator = voxel_generator
        self._pos_cls_weight = pos_cls_weight
        self._neg_cls_weight = neg_cls_weight
        self._encode_rad_error_by_sin = encode_rad_error_by_sin
        self._loss_norm_type = loss_norm_type
        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()
        self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss()
        self._dir_offset = dir_offset
        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._direction_loss_weight = direction_loss_weight
        self._cls_loss_weight = cls_loss_weight
        self._loc_loss_weight = loc_loss_weight
        self._post_center_range = post_center_range or []
        self.measure_time = measure_time
        self._nms_class_agnostic = nms_class_agnostic
        self._num_direction_bins = num_direction_bins
        self._dir_limit_offset = direction_limit_offset
        self.voxel_feature_extractor = voxel_encoder.get_vfe_class(
            vfe_class_name)(
                num_input_features,
                use_norm,
                num_filters=vfe_num_filters,
                with_distance=with_distance,
                voxel_size=self.voxel_generator.voxel_size,
                pc_range=self.voxel_generator.point_cloud_range,
            )
        self.middle_feature_extractor = middle.get_middle_class(
            middle_class_name)(output_shape,
                               use_norm,
                               num_input_features=middle_num_input_features,
                               num_filters_down1=middle_num_filters_d1,
                               num_filters_down2=middle_num_filters_d2)
        self.rpn = rpn.get_rpn_class(rpn_class_name)(
            use_norm=True,
            num_class=num_class,
            layer_nums=rpn_layer_nums,
            layer_strides=rpn_layer_strides,
            num_filters=rpn_num_filters,
            upsample_strides=rpn_upsample_strides,
            num_upsample_filters=rpn_num_upsample_filters,
            num_input_features=rpn_num_input_features,
            num_anchor_per_loc=target_assigner.num_anchors_per_location,
            encode_background_as_zeros=encode_background_as_zeros,
            use_direction_classifier=use_direction_classifier,
            use_groupnorm=use_groupnorm,
            num_groups=num_groups,
            box_code_size=target_assigner.box_coder.code_size,
            num_direction_bins=self._num_direction_bins)
        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.rpn_cls_loss = metrics.Scalar()
        self.rpn_loc_loss = metrics.Scalar()
        self.rpn_total_loss = metrics.Scalar()
        self.register_buffer("global_step", torch.LongTensor(1).zero_())

        self._time_dict = {}
        self._time_total_dict = {}
        self._time_count_dict = {}
Beispiel #2
0
    def __init__(self,
                 output_shape,
                 num_class=2,
                 num_input_features=4,
                 vfe_class_name="VoxelFeatureExtractor",
                 vfe_num_filters=[32, 128],
                 with_distance=False,
                 middle_class_name="SparseMiddleExtractor",
                 middle_num_input_features=-1,
                 middle_num_filters_d1=[64],
                 middle_num_filters_d2=[64, 64],
                 rpn_class_name="RPN",
                 rpn_num_input_features=-1,
                 rpn_layer_nums=[3, 5, 5],
                 rpn_layer_strides=[2, 2, 2],
                 rpn_num_filters=[128, 128, 256],
                 rpn_upsample_strides=[1, 2, 4],
                 rpn_num_upsample_filters=[256, 256, 256],
                 use_norm=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_direction_classifier=True,
                 use_sigmoid_score=False,
                 encode_background_as_zeros=True,
                 use_rotate_nms=True,
                 multiclass_nms=False,
                 nms_score_threshold=0.5,
                 nms_pre_max_size=1000,
                 nms_post_max_size=20,
                 nms_iou_threshold=0.1,
                 target_assigner=None,
                 cls_loss_weight=1.0,
                 loc_loss_weight=1.0,
                 pos_cls_weight=1.0,
                 neg_cls_weight=1.0,
                 direction_loss_weight=1.0,
                 loss_norm_type=LossNormType.NormByNumPositives,
                 encode_rad_error_by_sin=False,
                 loc_loss_ftor=None,
                 cls_loss_ftor=None,
                 measure_time=False,
                 voxel_generator=None,
                 post_center_range=None,
                 name='voxelnet'):
        super().__init__()
        self.name = name
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_threshold = nms_score_threshold
        self._nms_pre_max_size = nms_pre_max_size
        self._nms_post_max_size = nms_post_max_size
        self._nms_iou_threshold = nms_iou_threshold
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_direction_classifier = use_direction_classifier
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self.target_assigner = target_assigner
        self.voxel_generator = voxel_generator
        self._pos_cls_weight = pos_cls_weight
        self._neg_cls_weight = neg_cls_weight
        self._encode_rad_error_by_sin = encode_rad_error_by_sin
        self._loss_norm_type = loss_norm_type
        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()
        self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss()

        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._direction_loss_weight = direction_loss_weight
        self._cls_loss_weight = cls_loss_weight
        self._loc_loss_weight = loc_loss_weight
        self._post_center_range = post_center_range or []
        self.measure_time = measure_time
        vfe_class_dict = {
            "VoxelFeatureExtractor": voxel_encoder.VoxelFeatureExtractor,
            "VoxelFeatureExtractorV2": voxel_encoder.VoxelFeatureExtractorV2,
            "VoxelFeatureExtractorV3": voxel_encoder.VoxelFeatureExtractorV3,
            "SimpleVoxel": voxel_encoder.SimpleVoxel,
            "PillarFeatureNet": pointpillars.PillarFeatureNet,
        }
        vfe_class = vfe_class_dict[vfe_class_name]
        self.voxel_feature_extractor = vfe_class(
            num_input_features,
            use_norm,
            num_filters=vfe_num_filters,
            with_distance=with_distance,
            voxel_size=self.voxel_generator.voxel_size,
            pc_range=self.voxel_generator.point_cloud_range,
        )
        mid_class_dict = {
            "SparseMiddleExtractor": middle.SparseMiddleExtractor,
            "SpMiddleD4HD": middle.SpMiddleD4HD,
            "SpMiddleD8HD": middle.SpMiddleD8HD,
            "SpMiddleFHD": middle.SpMiddleFHD,
            "SpMiddleFHDV2": middle.SpMiddleFHDV2,
            "SpMiddleFHDLarge": middle.SpMiddleFHDLarge,
            "SpMiddleResNetFHD": middle.SpMiddleResNetFHD,
            "SpMiddleD4HDLite": middle.SpMiddleD4HDLite,
            "SpMiddleFHDLite": middle.SpMiddleFHDLite,
            "SpMiddle2K": middle.SpMiddle2K,
            "SpMiddleFHDPeople": middle.SpMiddleFHDPeople,
            "SpMiddle2KPeople": middle.SpMiddle2KPeople,
            "SpMiddleHDLite": middle.SpMiddleHDLite,
            "PointPillarsScatter": pointpillars.PointPillarsScatter,
        }
        mid_class = mid_class_dict[middle_class_name]
        self.middle_feature_extractor = mid_class(
            output_shape,
            use_norm,
            num_input_features=middle_num_input_features,
            num_filters_down1=middle_num_filters_d1,
            num_filters_down2=middle_num_filters_d2)
        rpn_class_dict = {
            "RPN": rpn.RPN,
            "RPNV2": rpn.RPNV2,
            "ResNetRPN": rpn.ResNetRPN,
        }
        rpn_class = rpn_class_dict[rpn_class_name]
        self.rpn = rpn_class(
            use_norm=True,
            num_class=num_class,
            layer_nums=rpn_layer_nums,
            layer_strides=rpn_layer_strides,
            num_filters=rpn_num_filters,
            upsample_strides=rpn_upsample_strides,
            num_upsample_filters=rpn_num_upsample_filters,
            num_input_features=rpn_num_input_features,
            num_anchor_per_loc=target_assigner.num_anchors_per_location,
            encode_background_as_zeros=encode_background_as_zeros,
            use_direction_classifier=use_direction_classifier,
            use_groupnorm=use_groupnorm,
            num_groups=num_groups,
            box_code_size=target_assigner.box_coder.code_size)

        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.rpn_cls_loss = metrics.Scalar()
        self.rpn_loc_loss = metrics.Scalar()
        self.rpn_total_loss = metrics.Scalar()
        self.register_buffer("global_step", torch.LongTensor(1).zero_())

        self._time_dict = {}
        self._time_total_dict = {}
        self._time_count_dict = {}
Beispiel #3
0
    def __init__(self,
                 output_shape,
                 num_class=2,
                 num_input_features=4,
                 vfe_class_name="VoxelFeatureExtractor",
                 vfe_num_filters=[32, 128],
                 with_distance=False,
                 middle_class_name="SparseMiddleExtractor",
                 middle_num_filters_d1=[64],
                 middle_num_filters_d2=[64, 64],
                 rpn_class_name="RPN",
                 rpn_layer_nums=[3, 5, 5],
                 rpn_layer_strides=[2, 2, 2],
                 rpn_num_filters=[128, 128, 256],
                 rpn_upsample_strides=[1, 2, 4],
                 rpn_num_upsample_filters=[256, 256, 256],
                 use_norm=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_sparse_rpn=False,
                 use_direction_classifier=True,
                 use_sigmoid_score=False,
                 encode_background_as_zeros=True,
                 use_rotate_nms=True,
                 multiclass_nms=False,
                 nms_score_threshold=0.5,
                 nms_pre_max_size=1000,
                 nms_post_max_size=20,
                 nms_iou_threshold=0.1,
                 target_assigner=None,
                 use_bev=False,
                 lidar_only=False,
                 cls_loss_weight=1.0,
                 loc_loss_weight=1.0,
                 pos_cls_weight=1.0,
                 neg_cls_weight=1.0,
                 direction_loss_weight=1.0,
                 loss_norm_type=LossNormType.NormByNumPositives,
                 encode_rad_error_by_sin=False,
                 loc_loss_ftor=None,
                 cls_loss_ftor=None,
                 name='voxelnet'):
        super().__init__()
        self.name = name
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_threshold = nms_score_threshold
        self._nms_pre_max_size = nms_pre_max_size
        self._nms_post_max_size = nms_post_max_size
        self._nms_iou_threshold = nms_iou_threshold
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_sparse_rpn = use_sparse_rpn
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._total_forward_time = 0.0
        self._total_postprocess_time = 0.0
        self._total_inference_count = 0
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self._lidar_only = lidar_only
        self.target_assigner = target_assigner
        self._pos_cls_weight = pos_cls_weight
        self._neg_cls_weight = neg_cls_weight
        self._encode_rad_error_by_sin = encode_rad_error_by_sin
        self._loss_norm_type = loss_norm_type
        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()

        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._direction_loss_weight = direction_loss_weight
        self._cls_loss_weight = cls_loss_weight
        self._loc_loss_weight = loc_loss_weight

        vfe_class_dict = {
            "VoxelFeatureExtractor": VoxelFeatureExtractor,
            "VoxelFeatureExtractorV2": VoxelFeatureExtractorV2,
        }
        vfe_class = vfe_class_dict[vfe_class_name]
        self.voxel_feature_extractor = vfe_class(num_input_features,
                                                 use_norm,
                                                 num_filters=vfe_num_filters,
                                                 with_distance=with_distance)
        mid_class_dict = {
            "MiddleExtractor": MiddleExtractor,
            "SparseMiddleExtractor": SparseMiddleExtractor,
        }
        mid_class = mid_class_dict[middle_class_name]
        self.middle_feature_extractor = mid_class(
            output_shape,
            use_norm,
            num_input_features=vfe_num_filters[-1],
            num_filters_down1=middle_num_filters_d1,
            num_filters_down2=middle_num_filters_d2)
        if len(middle_num_filters_d2) == 0:
            if len(middle_num_filters_d1) == 0:
                num_rpn_input_filters = vfe_num_filters[-1]
            else:
                num_rpn_input_filters = middle_num_filters_d1[-1]
        else:
            num_rpn_input_filters = middle_num_filters_d2[-1]
        rpn_class_dict = {
            "RPN": RPN,
        }
        rpn_class = rpn_class_dict[rpn_class_name]
        self.rpn = rpn_class(
            use_norm=True,
            num_class=num_class,
            layer_nums=rpn_layer_nums,
            layer_strides=rpn_layer_strides,
            num_filters=rpn_num_filters,
            upsample_strides=rpn_upsample_strides,
            num_upsample_filters=rpn_num_upsample_filters,
            num_input_filters=num_rpn_input_filters * 2,
            num_anchor_per_loc=target_assigner.num_anchors_per_location,
            encode_background_as_zeros=encode_background_as_zeros,
            use_direction_classifier=use_direction_classifier,
            use_bev=use_bev,
            use_groupnorm=use_groupnorm,
            num_groups=num_groups,
            box_code_size=target_assigner.box_coder.code_size)

        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.rpn_cls_loss = metrics.Scalar()
        self.rpn_loc_loss = metrics.Scalar()
        self.rpn_total_loss = metrics.Scalar()
        self.register_buffer("global_step", torch.LongTensor(1).zero_())
Beispiel #4
0
    def __init__(
        self,
        output_shape,
        num_class=2,
        num_input_features=4,
        vfe_class_name="VoxelFeatureExtractor",
        vfe_num_filters=[32, 128],
        with_distance=False,
        middle_class_name="SparseMiddleExtractor",
        middle_num_input_features=-1,
        middle_num_filters_d1=[64],
        middle_num_filters_d2=[64, 64],
        rpn_class_name="RPN",
        rpn_num_input_features=-1,
        rpn_layer_nums=[3, 5, 5],
        rpn_layer_strides=[2, 2, 2],
        rpn_num_filters=[128, 128, 256],
        rpn_upsample_strides=[1, 2, 4],
        rpn_num_upsample_filters=[256, 256, 256],
        use_norm=True,
        use_groupnorm=False,
        num_groups=32,
        use_sparse_rpn=False,
        use_voxel_classifier=False,
        use_direction_classifier=True,
        use_sigmoid_score=False,
        encode_background_as_zeros=True,
        use_rotate_nms=True,
        multiclass_nms=False,
        nms_score_threshold=0.5,
        nms_pre_max_size=1000,
        nms_post_max_size=20,
        nms_iou_threshold=0.1,
        target_assigner=None,
        use_bev=False,
        use_rc_net=False,
        lidar_only=False,
        cls_loss_weight=1.0,
        loc_loss_weight=1.0,
        pos_cls_weight=1.0,
        neg_cls_weight=1.0,
        direction_loss_weight=1.0,
        loss_norm_type=LossNormType.NormByNumPositives,
        encode_rad_error_by_sin=False,
        loc_loss_ftor=None,
        cls_loss_ftor=None,
        measure_time=False,
        name='voxelnet',
        use_iou_branch=False,
        iou_dict=None,
        iou_loss_weight=1.0,
        iou_loss_ftor=None,
        use_iou_param_partaa=False,
    ):
        super().__init__()
        self.name = name
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_threshold = nms_score_threshold
        self._nms_pre_max_size = nms_pre_max_size
        self._nms_post_max_size = nms_post_max_size
        self._nms_iou_threshold = nms_iou_threshold
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_sparse_rpn = use_sparse_rpn
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self._lidar_only = lidar_only
        self.target_assigner = target_assigner
        self._pos_cls_weight = pos_cls_weight
        self._neg_cls_weight = neg_cls_weight
        self._encode_rad_error_by_sin = encode_rad_error_by_sin
        self._loss_norm_type = loss_norm_type
        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()

        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._direction_loss_weight = direction_loss_weight
        self._cls_loss_weight = cls_loss_weight
        self._loc_loss_weight = loc_loss_weight
        self.measure_time = measure_time
        vfe_class_dict = {
            "VoxelFeatureExtractor": voxel_encoder.VoxelFeatureExtractor,
            "VoxelFeatureExtractorV2": voxel_encoder.VoxelFeatureExtractorV2,
            "VoxelFeatureExtractorV3": voxel_encoder.VoxelFeatureExtractorV3,
            "SimpleVoxel": voxel_encoder.SimpleVoxel
        }
        vfe_class = vfe_class_dict[vfe_class_name]
        self.voxel_feature_extractor = vfe_class(num_input_features,
                                                 use_norm,
                                                 num_filters=vfe_num_filters,
                                                 with_distance=with_distance)
        if len(middle_num_filters_d2) == 0:
            if len(middle_num_filters_d1) == 0:
                num_rpn_input_filters = vfe_num_filters[-1]
            else:
                num_rpn_input_filters = middle_num_filters_d1[-1]
        else:
            num_rpn_input_filters = middle_num_filters_d2[-1]

        if use_sparse_rpn:  # don't use this. just for fun.
            self.sparse_rpn = rpn.SparseRPN(
                output_shape,
                # num_input_features=vfe_num_filters[-1],
                num_filters_down1=middle_num_filters_d1,
                num_filters_down2=middle_num_filters_d2,
                use_norm=True,
                num_class=num_class,
                layer_nums=rpn_layer_nums,
                layer_strides=rpn_layer_strides,
                num_filters=rpn_num_filters,
                upsample_strides=rpn_upsample_strides,
                num_upsample_filters=rpn_num_upsample_filters,
                num_input_features=num_rpn_input_filters * 2,
                num_anchor_per_loc=target_assigner.num_anchors_per_location,
                encode_background_as_zeros=encode_background_as_zeros,
                use_direction_classifier=use_direction_classifier,
                use_bev=use_bev,
                use_groupnorm=use_groupnorm,
                num_groups=num_groups,
                box_code_size=target_assigner.box_coder.code_size)
        else:
            mid_class_dict = {
                "SparseMiddleExtractor": middle.SparseMiddleExtractor,
                "SpMiddleD4HD": middle.SpMiddleD4HD,
                "SpMiddleD8HD": middle.SpMiddleD8HD,
                "SpMiddleFHD": middle.SpMiddleFHD,
                "SpMiddleFHDV2": middle.SpMiddleFHDV2,
                "SpMiddleFHDLarge": middle.SpMiddleFHDLarge,
                "SpMiddleResNetFHD": middle.SpMiddleResNetFHD,
                "SpMiddleD4HDLite": middle.SpMiddleD4HDLite,
                "SpMiddleFHDLite": middle.SpMiddleFHDLite,
                "SpMiddle2K": middle.SpMiddle2K,
            }
            mid_class = mid_class_dict[middle_class_name]
            self.middle_feature_extractor = mid_class(
                output_shape,
                use_norm,
                num_input_features=middle_num_input_features,
                num_filters_down1=middle_num_filters_d1,
                num_filters_down2=middle_num_filters_d2)
            rpn_class_dict = {
                "RPN": rpn.RPN,
                "RPNV2": rpn.RPNV2,
                "RPN_FUSION": rpn.RPN_FUSION,
            }
            self.rpn_class_name = rpn_class_name
            rpn_class = rpn_class_dict[self.rpn_class_name]
            self.rpn = rpn_class(
                use_norm=True,
                num_class=num_class,
                layer_nums=rpn_layer_nums,
                layer_strides=rpn_layer_strides,
                num_filters=rpn_num_filters,
                upsample_strides=rpn_upsample_strides,
                num_upsample_filters=rpn_num_upsample_filters,
                num_input_features=rpn_num_input_features,
                num_anchor_per_loc=target_assigner.num_anchors_per_location,
                encode_background_as_zeros=encode_background_as_zeros,
                use_direction_classifier=use_direction_classifier,
                use_bev=use_bev,
                use_groupnorm=use_groupnorm,
                num_groups=num_groups,
                box_code_size=target_assigner.box_coder.code_size)

        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.rpn_cls_loss = metrics.Scalar()
        self.rpn_loc_loss = metrics.Scalar()
        self.rpn_total_loss = metrics.Scalar()
        self.register_buffer("global_step", torch.LongTensor(1).zero_())

        self._time_dict = {}
        self._time_total_dict = {}
        self._time_count_dict = {}
Beispiel #5
0
    def __init__(self,
                 output_shape,
                 num_class=2,
                 num_input_features=4,
                 voxelization_name="BV",
                 vfe_num_filters=[32, 128],
                 with_distance=False,
                 tdbn_name="tDBN_1",
                 tdbn_filters_d1=[64],
                 tdbn_filters_d2=[64, 64],
                 det_net_name="det_net",
                 det_net_layer_nums=[3, 5, 5],
                 det_net_layer_strides=[2, 2, 2],
                 det_net_num_filters=[128, 128, 256],
                 det_net_upsample_strides=[1, 2, 4],
                 det_net_num_upsample_filters=[256, 256, 256],
                 use_norm=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_direction_classifier=True,
                 use_sigmoid_score=False,
                 encode_background_as_zeros=True,
                 use_rotate_nms=True,
                 multiclass_nms=False,
                 nms_score_threshold=0.5,
                 nms_pre_max_size=1000,
                 nms_post_max_size=20,
                 nms_iou_threshold=0.1,
                 target_assigner=None,
                 lidar_only=False,
                 cls_loss_weight=1.0,
                 loc_loss_weight=1.0,
                 pos_cls_weight=1.0,
                 neg_cls_weight=1.0,
                 direction_loss_weight=1.0,
                 loss_norm_type=LossNormType.NormByNumPositives,
                 encode_rad_error_by_sin=False,
                 loc_loss_ftor=None,
                 cls_loss_ftor=None,
                 name='model_net'):
        super().__init__()
        self.name = name
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_threshold = nms_score_threshold
        self._nms_pre_max_size = nms_pre_max_size
        self._nms_post_max_size = nms_post_max_size
        self._nms_iou_threshold = nms_iou_threshold
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_direction_classifier = use_direction_classifier
        self._total_forward_time = 0.0
        self._total_postprocess_time = 0.0
        self._total_inference_count = 0
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self._lidar_only = lidar_only
        self.target_assigner = target_assigner
        self._pos_cls_weight = pos_cls_weight
        self._neg_cls_weight = neg_cls_weight
        self._encode_rad_error_by_sin = encode_rad_error_by_sin
        self._loss_norm_type = loss_norm_type
        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()
        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._direction_loss_weight = direction_loss_weight
        self._cls_loss_weight = cls_loss_weight
        self._loc_loss_weight = loc_loss_weight

        voxelization_class_dict = {
            "VEF": voxelization.VoxelFeatureExtractor,
            "BV": voxelization.BinaryVoxel,
        }
        voxelization_class = voxelization_class_dict[voxelization_name]
        self.voxel_feature_extractor = voxelization_class(
            num_input_features,
            use_norm,
            num_filters=vfe_num_filters,
            with_distance=with_distance)
        tdbn_class_dict = {
            "tDBN_1": tDBN.tDBN_1,
            "tDBN_2": tDBN.tDBN_2,
            "tDBN_bv_1": tDBN.tDBN_bv_1,
            "tDBN_bv_2": tDBN.tDBN_bv_2,
        }
        tdbn_class = tdbn_class_dict[tdbn_name]
        self.tdbn_feature_extractor = tdbn_class(
            output_shape,
            use_norm,
            num_filters_down1=tdbn_filters_d1,
            num_filters_down2=tdbn_filters_d2)

        det_net_class_dict = {
            "det_net": det_net.det_net,
            "det_net_2": det_net.det_net_2,
        }
        det_net_class = det_net_class_dict[det_net_name]
        self.det_net = det_net_class(
            use_norm=True,
            num_class=num_class,
            layer_nums=det_net_layer_nums,
            layer_strides=det_net_layer_strides,
            num_filters=det_net_num_filters,
            upsample_strides=det_net_upsample_strides,
            num_upsample_filters=det_net_num_upsample_filters,
            num_anchor_per_loc=target_assigner.num_anchors_per_location,
            encode_background_as_zeros=encode_background_as_zeros,
            use_direction_classifier=use_direction_classifier,
            use_groupnorm=use_groupnorm,
            num_groups=num_groups,
            box_code_size=target_assigner.box_coder.code_size)

        self.det_net_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.det_net_precision = metrics.Precision(dim=-1)
        self.det_net_recall = metrics.Recall(dim=-1)
        self.det_net_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.det_net_cls_loss = metrics.Scalar()
        self.det_net_loc_loss = metrics.Scalar()
        self.det_net_total_loss = metrics.Scalar()
        self.register_buffer("global_step", torch.LongTensor(1).zero_())
Beispiel #6
0
    def __init__(self,
                 output_shape,
                 num_class=2,
                 num_input_features=4,
                 vfe_class_name="VoxelFeatureExtractor",
                 vfe_num_filters=[32, 128],
                 with_distance=False,
                 middle_class_name="SparseMiddleExtractor",
                 middle_num_input_features=-1,
                 middle_num_filters_d1=[64],
                 middle_num_filters_d2=[64, 64],
                 rpn_class_name="RPN",
                 rpn_num_input_features=-1,
                 rpn_layer_nums=[3, 5, 5],
                 rpn_layer_strides=[2, 2, 2],
                 rpn_num_filters=[128, 128, 256],
                 rpn_upsample_strides=[1, 2, 4],
                 rpn_num_upsample_filters=[256, 256, 256],
                 use_norm=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_direction_classifier=True,
                 use_sigmoid_score=False,
                 encode_background_as_zeros=True,
                 use_rotate_nms=True,
                 multiclass_nms=False,
                 nms_score_thresholds=None,
                 nms_pre_max_sizes=None,
                 nms_post_max_sizes=None,
                 nms_iou_thresholds=None,
                 target_assigner=None,
                 cls_loss_weight=1.0,
                 loc_loss_weight=1.0,
                 pos_cls_weight=1.0,
                 neg_cls_weight=1.0,
                 direction_loss_weight=1.0,
                 loss_norm_type=LossNormType.NormByNumPositives,
                 encode_rad_error_by_sin=False,
                 loc_loss_ftor=None,
                 cls_loss_ftor=None,
                 measure_time=False,
                 voxel_generator=None,
                 post_center_range=None,
                 dir_offset=0.0,
                 sin_error_factor=1.0,
                 nms_class_agnostic=False,
                 num_direction_bins=2,
                 direction_limit_offset=0,
                 name='voxelnet'):
        super().__init__()
        self.name = name
        self._sin_error_factor = sin_error_factor
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_thresholds = nms_score_thresholds
        self._nms_pre_max_sizes = nms_pre_max_sizes
        self._nms_post_max_sizes = nms_post_max_sizes
        self._nms_iou_thresholds = nms_iou_thresholds
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_direction_classifier = use_direction_classifier
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self.target_assigner = target_assigner
        self.voxel_generator = voxel_generator
        self._pos_cls_weight = pos_cls_weight
        self._neg_cls_weight = neg_cls_weight
        self._encode_rad_error_by_sin = encode_rad_error_by_sin
        self._loss_norm_type = loss_norm_type
        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()
        self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss()
        self._dir_offset = dir_offset
        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._direction_loss_weight = direction_loss_weight
        self._cls_loss_weight = cls_loss_weight
        self._loc_loss_weight = loc_loss_weight
        self._post_center_range = post_center_range or []
        self.measure_time = measure_time
        self._nms_class_agnostic = nms_class_agnostic
        self._num_direction_bins = num_direction_bins
        self._dir_limit_offset = direction_limit_offset
        self.voxel_feature_extractor = voxel_encoder.get_vfe_class(vfe_class_name)(
            num_input_features,
            use_norm,
            num_filters=vfe_num_filters,
            with_distance=with_distance,
            voxel_size=self.voxel_generator.voxel_size,
            pc_range=self.voxel_generator.point_cloud_range,
        )
        self.middle_feature_extractor = middle.get_middle_class(middle_class_name)(
            output_shape,
            use_norm,
            num_input_features=middle_num_input_features,
            num_filters_down1=middle_num_filters_d1,
            num_filters_down2=middle_num_filters_d2)
        self._rpn_layer_nums = rpn_layer_nums
        self.rpn = rpn.get_rpn_class(rpn_class_name)(
            use_norm=True,
            num_class=num_class,
            layer_nums=rpn_layer_nums,
            layer_strides=rpn_layer_strides,
            num_filters=rpn_num_filters,
            upsample_strides=rpn_upsample_strides,
            num_upsample_filters=rpn_num_upsample_filters,
            num_input_features=rpn_num_input_features,
            num_anchor_per_loc=target_assigner.num_anchors_per_location,
            encode_background_as_zeros=encode_background_as_zeros,
            use_direction_classifier=use_direction_classifier,
            use_groupnorm=use_groupnorm,
            num_groups=num_groups,
            box_code_size=target_assigner.box_coder.code_size,
            num_direction_bins=self._num_direction_bins)
        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.rpn_cls_loss = metrics.Scalar()
        self.rpn_loc_loss = metrics.Scalar()
        self.rpn_total_loss = metrics.Scalar()
        self.register_buffer("global_step", torch.LongTensor(1).zero_())

        self._time_dict = {
                'PFE': [],
                #'PillarGen': [],
                #'PillarPrep': [],
                #'PillarFeatureNet': [],
                #'PillarScatter': [],
                'RPN-stage-1': [],
                'RPN-stage-2': [],
                'RPN-stage-3': [],
                'RPN-finalize': [],
                'Predict': [],
                'Pre-stage-1': [],
                'Post-stage-1': [],
                'End-to-end': [],}
        self._cuda_event_dict = copy.deepcopy(self._time_dict)

        self._det_num = 1
        self._dump_scores = False
        if self._dump_scores:
            for i in range(1, len(rpn_layer_nums) + 1):
                mypath = os.getenv('HOME') + "/scores_" + str(i)
                if not os.path.exists(mypath):
                    os.makedirs(mypath)

        #self._cuda_streams = []
        #for i in range(4): # We are assuming 4 slices
        #    self._cuda_streams.append(torch.cuda.Stream())

        self._num_class_with_bg = self._num_class
        if not self._encode_background_as_zeros:
            self._num_class_with_bg = self._num_class + 1
Beispiel #7
0
    def __init__(self,
                 output_shape,
                 num_class=2,
                 num_input_features=4,
                 vfe_class_name="VoxelFeatureExtractor",
                 vfe_num_filters=[32, 128],
                 with_distance=False,
                 middle_class_name="MiddleExtractor",
                 middle_num_filters_d1=[64],
                 middle_num_filters_d2=[64, 64],
                 rpn_class_name="RPN",
                 rpn_layer_nums=[3, 5, 5],
                 rpn_layer_strides=[2, 2, 2],
                 rpn_num_filters=[128, 128, 256],
                 rpn_upsample_strides=[1, 2, 4],
                 rpn_num_upsample_filters=[256, 256, 256],
                 use_norm=True,
                 use_groupnorm=False,
                 num_groups=32,
                 use_sparse_rpn=False,
                 use_direction_classifier=True,
                 use_sigmoid_score=False,
                 encode_background_as_zeros=True,
                 use_rotate_nms=True,
                 multiclass_nms=False,
                 nms_score_threshold=0.5,
                 nms_pre_max_size=1000,
                 nms_post_max_size=20,
                 nms_iou_threshold=0.1,
                 target_assigner=None,
                 use_bev=False,
                 lidar_only=False,
                 cls_loss_weight=1.0,
                 loc_loss_weight=1.0,
                 pos_cls_weight=1.0,
                 neg_cls_weight=1.0,
                 direction_loss_weight=1.0,
                 loss_norm_type=LossNormType.NormByNumPositives,
                 encode_rad_error_by_sin=False,
                 loc_loss_ftor=None,
                 cls_loss_ftor=None,
                 voxel_size=(0.2, 0.2, 4),
                 pc_range=(0, -40, -3, 70.4, 40, 1),
                 name='voxelnet',
                 K=100,
                 hm_weight=1,
                 dim_weight=1,
                 rot_weight=1,
                 off_weight=1,
                 centernet_layers=50):
        super().__init__()
        self.name = name
        self._num_class = num_class
        self._use_rotate_nms = use_rotate_nms
        self._multiclass_nms = multiclass_nms
        self._nms_score_threshold = nms_score_threshold
        self._nms_pre_max_size = nms_pre_max_size
        self._nms_post_max_size = nms_post_max_size
        self._nms_iou_threshold = nms_iou_threshold
        self._use_sigmoid_score = use_sigmoid_score
        self._encode_background_as_zeros = encode_background_as_zeros
        self._use_sparse_rpn = use_sparse_rpn
        self._use_direction_classifier = use_direction_classifier
        self._use_bev = use_bev
        self._total_forward_time = 0.0
        self._total_postprocess_time = 0.0
        self._total_inference_count = 0
        self._num_input_features = num_input_features
        self._box_coder = target_assigner.box_coder
        self._lidar_only = lidar_only

        self.K = K
        self.hm_weight = hm_weight
        self.dim_weight = dim_weight
        self.rot_weight = rot_weight
        self.off_weight = off_weight
        self.centernet_layers = centernet_layers
        self.pc_range = pc_range

        vfe_class_dict = {
            "VoxelFeatureExtractor": VoxelFeatureExtractor,
            "VoxelFeatureExtractorV2": VoxelFeatureExtractorV2,
            "PillarFeatureNet": PillarFeatureNet
        }
        vfe_class = vfe_class_dict[vfe_class_name]
        if vfe_class_name == "PillarFeatureNet":
            self.voxel_feature_extractor = vfe_class(
                num_input_features,
                use_norm,
                num_filters=vfe_num_filters,
                with_distance=with_distance,
                voxel_size=voxel_size,
                pc_range=pc_range)
        else:
            self.voxel_feature_extractor = vfe_class(
                num_input_features,
                use_norm,
                num_filters=vfe_num_filters,
                with_distance=with_distance)

        print("middle_class_name", middle_class_name)
        if middle_class_name == "PointPillarsScatter":
            self.middle_feature_extractor = PointPillarsScatter(
                output_shape=output_shape,
                num_input_features=vfe_num_filters[-1])
        else:
            mid_class_dict = {
                "MiddleExtractor": MiddleExtractor
                # "SparseMiddleExtractor": SparseMiddleExtractor,
            }
            mid_class = mid_class_dict[middle_class_name]
            self.middle_feature_extractor = mid_class(
                output_shape,
                use_norm,
                num_input_features=vfe_num_filters[-1],
                num_filters_down1=middle_num_filters_d1,
                num_filters_down2=middle_num_filters_d2)

        #======================= New detection heads ==========================
        heads = {'hm': self._num_class, 'rot': 8, 'dim': 3, 'reg': 2}
        self.centernet = get_pose_net(num_layers=self.centernet_layers,
                                      heads=heads)
        #=======================================================================

        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.hm_loss = metrics.Scalar()
        self.dim_loss = metrics.Scalar()
        self.rot_loss = metrics.Scalar()
        self.total_loss = metrics.Scalar()

        self.register_buffer("global_step", torch.LongTensor(1).zero_())
    def __init__(self, target_assigner, nms_score_thresholds,
                 nms_iou_thresholds, nms_pre_max_sizes, nms_post_max_sizes,
                 loc_loss_ftor, cls_loss_ftor, loc_loss_weight,
                 cls_loss_weight):
        super().__init__()
        self.name = "pointpillars_loss"
        self.measure_time = False

        encode_background_as_zeros = True
        use_sigmoid_score = True

        self._num_class = 4
        self._encode_rad_error_by_sin = True
        self._encode_background_as_zeros = encode_background_as_zeros
        self._sin_error_factor = 1.0
        self._num_direction_bins = 2
        self._dir_offset = 0
        self._dir_limit_offset = 1
        self._direction_loss_weight = 0.2
        self._use_direction_classifier = True

        self.target_assigner = target_assigner
        self._box_coder = target_assigner.box_coder

        self._pos_cls_weight = 1.0
        self._neg_cls_weight = 1.0
        self._loss_norm_type = LossNormType.NormByNumPositives

        self._loc_loss_ftor = loc_loss_ftor
        self._cls_loss_ftor = cls_loss_ftor
        self._loc_loss_weight = loc_loss_weight
        self._cls_loss_weight = cls_loss_weight

        self._dir_loss_ftor = WeightedSoftmaxClassificationLoss()
        self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss()

        # postprocess
        self._post_center_range = [-40, -80, -5, 40, 40, 5]
        self._use_sigmoid_score = True
        self._use_rotate_nms = False
        self._multiclass_nms = False
        self._nms_class_agnostic = False
        self._nms_score_thresholds = nms_score_thresholds
        self._nms_iou_thresholds = nms_iou_thresholds
        self._nms_pre_max_sizes = nms_pre_max_sizes
        self._nms_post_max_sizes = nms_post_max_sizes

        self.rpn_acc = metrics.Accuracy(
            dim=-1, encode_background_as_zeros=encode_background_as_zeros)
        self.rpn_precision = metrics.Precision(dim=-1)
        self.rpn_recall = metrics.Recall(dim=-1)
        self.rpn_metrics = metrics.PrecisionRecall(
            dim=-1,
            thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95],
            use_sigmoid_score=use_sigmoid_score,
            encode_background_as_zeros=encode_background_as_zeros)

        self.rpn_cls_loss = metrics.Scalar()
        self.rpn_loc_loss = metrics.Scalar()
        self.rpn_total_loss = metrics.Scalar()

        self.register_buffer("global_step", torch.LongTensor(1).zero_())

        self._time_dict = {}
        self._time_total_dict = {}
        self._time_count_dict = {}