def __init__(self, output_shape, num_class=2, num_input_features=4, vfe_class_name="VoxelFeatureExtractor", vfe_num_filters=[32, 128], with_distance=False, middle_class_name="SparseMiddleExtractor", middle_num_input_features=-1, middle_num_filters_d1=[64], middle_num_filters_d2=[64, 64], rpn_class_name="RPN", rpn_num_input_features=-1, rpn_layer_nums=[3, 5, 5], rpn_layer_strides=[2, 2, 2], rpn_num_filters=[128, 128, 256], rpn_upsample_strides=[1, 2, 4], rpn_num_upsample_filters=[256, 256, 256], use_norm=True, use_groupnorm=False, num_groups=32, use_direction_classifier=True, use_sigmoid_score=False, encode_background_as_zeros=True, use_rotate_nms=True, multiclass_nms=False, nms_score_thresholds=None, nms_pre_max_sizes=None, nms_post_max_sizes=None, nms_iou_thresholds=None, target_assigner=None, cls_loss_weight=1.0, loc_loss_weight=1.0, pos_cls_weight=1.0, neg_cls_weight=1.0, direction_loss_weight=1.0, loss_norm_type=LossNormType.NormByNumPositives, encode_rad_error_by_sin=False, loc_loss_ftor=None, cls_loss_ftor=None, measure_time=False, voxel_generator=None, post_center_range=None, dir_offset=0.0, sin_error_factor=1.0, nms_class_agnostic=False, num_direction_bins=2, direction_limit_offset=0, name='voxelnet'): super().__init__() self.name = name self._sin_error_factor = sin_error_factor self._num_class = num_class self._use_rotate_nms = use_rotate_nms self._multiclass_nms = multiclass_nms self._nms_score_thresholds = nms_score_thresholds self._nms_pre_max_sizes = nms_pre_max_sizes self._nms_post_max_sizes = nms_post_max_sizes self._nms_iou_thresholds = nms_iou_thresholds self._use_sigmoid_score = use_sigmoid_score self._encode_background_as_zeros = encode_background_as_zeros self._use_direction_classifier = use_direction_classifier self._num_input_features = num_input_features self._box_coder = target_assigner.box_coder self.target_assigner = target_assigner self.voxel_generator = voxel_generator self._pos_cls_weight = pos_cls_weight self._neg_cls_weight = neg_cls_weight self._encode_rad_error_by_sin = encode_rad_error_by_sin self._loss_norm_type = loss_norm_type self._dir_loss_ftor = WeightedSoftmaxClassificationLoss() self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss() self._dir_offset = dir_offset self._loc_loss_ftor = loc_loss_ftor self._cls_loss_ftor = cls_loss_ftor self._direction_loss_weight = direction_loss_weight self._cls_loss_weight = cls_loss_weight self._loc_loss_weight = loc_loss_weight self._post_center_range = post_center_range or [] self.measure_time = measure_time self._nms_class_agnostic = nms_class_agnostic self._num_direction_bins = num_direction_bins self._dir_limit_offset = direction_limit_offset self.voxel_feature_extractor = voxel_encoder.get_vfe_class( vfe_class_name)( num_input_features, use_norm, num_filters=vfe_num_filters, with_distance=with_distance, voxel_size=self.voxel_generator.voxel_size, pc_range=self.voxel_generator.point_cloud_range, ) self.middle_feature_extractor = middle.get_middle_class( middle_class_name)(output_shape, use_norm, num_input_features=middle_num_input_features, num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2) self.rpn = rpn.get_rpn_class(rpn_class_name)( use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, layer_strides=rpn_layer_strides, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, num_input_features=rpn_num_input_features, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, use_groupnorm=use_groupnorm, num_groups=num_groups, box_code_size=target_assigner.box_coder.code_size, num_direction_bins=self._num_direction_bins) self.rpn_acc = metrics.Accuracy( dim=-1, encode_background_as_zeros=encode_background_as_zeros) self.rpn_precision = metrics.Precision(dim=-1) self.rpn_recall = metrics.Recall(dim=-1) self.rpn_metrics = metrics.PrecisionRecall( dim=-1, thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95], use_sigmoid_score=use_sigmoid_score, encode_background_as_zeros=encode_background_as_zeros) self.rpn_cls_loss = metrics.Scalar() self.rpn_loc_loss = metrics.Scalar() self.rpn_total_loss = metrics.Scalar() self.register_buffer("global_step", torch.LongTensor(1).zero_()) self._time_dict = {} self._time_total_dict = {} self._time_count_dict = {}
def __init__(self, output_shape, num_class=2, num_input_features=4, vfe_class_name="VoxelFeatureExtractor", vfe_num_filters=[32, 128], with_distance=False, middle_class_name="SparseMiddleExtractor", middle_num_filters_d1=[64], middle_num_filters_d2=[64, 64], rpn_class_name="RPN", rpn_layer_nums=[3, 5, 5], rpn_layer_strides=[2, 2, 2], rpn_num_filters=[128, 128, 256], rpn_upsample_strides=[1, 2, 4], rpn_num_upsample_filters=[256, 256, 256], use_norm=True, use_groupnorm=False, num_groups=32, use_sparse_rpn=False, use_direction_classifier=True, use_sigmoid_score=False, encode_background_as_zeros=True, use_rotate_nms=True, multiclass_nms=False, nms_score_threshold=0.5, nms_pre_max_size=1000, nms_post_max_size=20, nms_iou_threshold=0.1, target_assigner=None, use_bev=False, lidar_only=False, cls_loss_weight=1.0, loc_loss_weight=1.0, pos_cls_weight=1.0, neg_cls_weight=1.0, direction_loss_weight=1.0, loss_norm_type=LossNormType.NormByNumPositives, encode_rad_error_by_sin=False, loc_loss_ftor=None, cls_loss_ftor=None, name='voxelnet'): super().__init__() self.name = name self._num_class = num_class self._use_rotate_nms = use_rotate_nms self._multiclass_nms = multiclass_nms self._nms_score_threshold = nms_score_threshold self._nms_pre_max_size = nms_pre_max_size self._nms_post_max_size = nms_post_max_size self._nms_iou_threshold = nms_iou_threshold self._use_sigmoid_score = use_sigmoid_score self._encode_background_as_zeros = encode_background_as_zeros self._use_sparse_rpn = use_sparse_rpn self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._total_forward_time = 0.0 self._total_postprocess_time = 0.0 self._total_inference_count = 0 self._num_input_features = num_input_features self._box_coder = target_assigner.box_coder self._lidar_only = lidar_only self.target_assigner = target_assigner self._pos_cls_weight = pos_cls_weight self._neg_cls_weight = neg_cls_weight self._encode_rad_error_by_sin = encode_rad_error_by_sin self._loss_norm_type = loss_norm_type self._dir_loss_ftor = WeightedSoftmaxClassificationLoss() self._loc_loss_ftor = loc_loss_ftor self._cls_loss_ftor = cls_loss_ftor self._direction_loss_weight = direction_loss_weight self._cls_loss_weight = cls_loss_weight self._loc_loss_weight = loc_loss_weight vfe_class_dict = { "VoxelFeatureExtractor": VoxelFeatureExtractor, "VoxelFeatureExtractorV2": VoxelFeatureExtractorV2, } vfe_class = vfe_class_dict[vfe_class_name] self.voxel_feature_extractor = vfe_class(num_input_features, use_norm, num_filters=vfe_num_filters, with_distance=with_distance) mid_class_dict = { "MiddleExtractor": MiddleExtractor, "SparseMiddleExtractor": SparseMiddleExtractor, } mid_class = mid_class_dict[middle_class_name] self.middle_feature_extractor = mid_class( output_shape, use_norm, num_input_features=vfe_num_filters[-1], num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2) if len(middle_num_filters_d2) == 0: if len(middle_num_filters_d1) == 0: num_rpn_input_filters = vfe_num_filters[-1] else: num_rpn_input_filters = middle_num_filters_d1[-1] else: num_rpn_input_filters = middle_num_filters_d2[-1] rpn_class_dict = { "RPN": RPN, } rpn_class = rpn_class_dict[rpn_class_name] self.rpn = rpn_class( use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, layer_strides=rpn_layer_strides, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, num_input_filters=num_rpn_input_filters * 2, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, use_bev=use_bev, use_groupnorm=use_groupnorm, num_groups=num_groups, box_code_size=target_assigner.box_coder.code_size) self.rpn_acc = metrics.Accuracy( dim=-1, encode_background_as_zeros=encode_background_as_zeros) self.rpn_precision = metrics.Precision(dim=-1) self.rpn_recall = metrics.Recall(dim=-1) self.rpn_metrics = metrics.PrecisionRecall( dim=-1, thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95], use_sigmoid_score=use_sigmoid_score, encode_background_as_zeros=encode_background_as_zeros) self.rpn_cls_loss = metrics.Scalar() self.rpn_loc_loss = metrics.Scalar() self.rpn_total_loss = metrics.Scalar() self.register_buffer("global_step", torch.LongTensor(1).zero_())
def __init__(self, output_shape, num_class=2, num_input_features=4, vfe_class_name="VoxelFeatureExtractor", vfe_num_filters=[32, 128], with_distance=False, middle_class_name="SparseMiddleExtractor", middle_num_input_features=-1, middle_num_filters_d1=[64], middle_num_filters_d2=[64, 64], rpn_class_name="RPN", rpn_num_input_features=-1, rpn_layer_nums=[3, 5, 5], rpn_layer_strides=[2, 2, 2], rpn_num_filters=[128, 128, 256], rpn_upsample_strides=[1, 2, 4], rpn_num_upsample_filters=[256, 256, 256], use_norm=True, use_groupnorm=False, num_groups=32, use_direction_classifier=True, use_sigmoid_score=False, encode_background_as_zeros=True, use_rotate_nms=True, multiclass_nms=False, nms_score_threshold=0.5, nms_pre_max_size=1000, nms_post_max_size=20, nms_iou_threshold=0.1, target_assigner=None, cls_loss_weight=1.0, loc_loss_weight=1.0, pos_cls_weight=1.0, neg_cls_weight=1.0, direction_loss_weight=1.0, loss_norm_type=LossNormType.NormByNumPositives, encode_rad_error_by_sin=False, loc_loss_ftor=None, cls_loss_ftor=None, measure_time=False, voxel_generator=None, post_center_range=None, name='voxelnet'): super().__init__() self.name = name self._num_class = num_class self._use_rotate_nms = use_rotate_nms self._multiclass_nms = multiclass_nms self._nms_score_threshold = nms_score_threshold self._nms_pre_max_size = nms_pre_max_size self._nms_post_max_size = nms_post_max_size self._nms_iou_threshold = nms_iou_threshold self._use_sigmoid_score = use_sigmoid_score self._encode_background_as_zeros = encode_background_as_zeros self._use_direction_classifier = use_direction_classifier self._num_input_features = num_input_features self._box_coder = target_assigner.box_coder self.target_assigner = target_assigner self.voxel_generator = voxel_generator self._pos_cls_weight = pos_cls_weight self._neg_cls_weight = neg_cls_weight self._encode_rad_error_by_sin = encode_rad_error_by_sin self._loss_norm_type = loss_norm_type self._dir_loss_ftor = WeightedSoftmaxClassificationLoss() self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss() self._loc_loss_ftor = loc_loss_ftor self._cls_loss_ftor = cls_loss_ftor self._direction_loss_weight = direction_loss_weight self._cls_loss_weight = cls_loss_weight self._loc_loss_weight = loc_loss_weight self._post_center_range = post_center_range or [] self.measure_time = measure_time vfe_class_dict = { "VoxelFeatureExtractor": voxel_encoder.VoxelFeatureExtractor, "VoxelFeatureExtractorV2": voxel_encoder.VoxelFeatureExtractorV2, "VoxelFeatureExtractorV3": voxel_encoder.VoxelFeatureExtractorV3, "SimpleVoxel": voxel_encoder.SimpleVoxel, "PillarFeatureNet": pointpillars.PillarFeatureNet, } vfe_class = vfe_class_dict[vfe_class_name] self.voxel_feature_extractor = vfe_class( num_input_features, use_norm, num_filters=vfe_num_filters, with_distance=with_distance, voxel_size=self.voxel_generator.voxel_size, pc_range=self.voxel_generator.point_cloud_range, ) mid_class_dict = { "SparseMiddleExtractor": middle.SparseMiddleExtractor, "SpMiddleD4HD": middle.SpMiddleD4HD, "SpMiddleD8HD": middle.SpMiddleD8HD, "SpMiddleFHD": middle.SpMiddleFHD, "SpMiddleFHDV2": middle.SpMiddleFHDV2, "SpMiddleFHDLarge": middle.SpMiddleFHDLarge, "SpMiddleResNetFHD": middle.SpMiddleResNetFHD, "SpMiddleD4HDLite": middle.SpMiddleD4HDLite, "SpMiddleFHDLite": middle.SpMiddleFHDLite, "SpMiddle2K": middle.SpMiddle2K, "SpMiddleFHDPeople": middle.SpMiddleFHDPeople, "SpMiddle2KPeople": middle.SpMiddle2KPeople, "SpMiddleHDLite": middle.SpMiddleHDLite, "PointPillarsScatter": pointpillars.PointPillarsScatter, } mid_class = mid_class_dict[middle_class_name] self.middle_feature_extractor = mid_class( output_shape, use_norm, num_input_features=middle_num_input_features, num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2) rpn_class_dict = { "RPN": rpn.RPN, "RPNV2": rpn.RPNV2, "ResNetRPN": rpn.ResNetRPN, } rpn_class = rpn_class_dict[rpn_class_name] self.rpn = rpn_class( use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, layer_strides=rpn_layer_strides, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, num_input_features=rpn_num_input_features, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, use_groupnorm=use_groupnorm, num_groups=num_groups, box_code_size=target_assigner.box_coder.code_size) self.rpn_acc = metrics.Accuracy( dim=-1, encode_background_as_zeros=encode_background_as_zeros) self.rpn_precision = metrics.Precision(dim=-1) self.rpn_recall = metrics.Recall(dim=-1) self.rpn_metrics = metrics.PrecisionRecall( dim=-1, thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95], use_sigmoid_score=use_sigmoid_score, encode_background_as_zeros=encode_background_as_zeros) self.rpn_cls_loss = metrics.Scalar() self.rpn_loc_loss = metrics.Scalar() self.rpn_total_loss = metrics.Scalar() self.register_buffer("global_step", torch.LongTensor(1).zero_()) self._time_dict = {} self._time_total_dict = {} self._time_count_dict = {}
def __init__( self, output_shape, num_class=2, num_input_features=4, vfe_class_name="VoxelFeatureExtractor", vfe_num_filters=[32, 128], with_distance=False, middle_class_name="SparseMiddleExtractor", middle_num_input_features=-1, middle_num_filters_d1=[64], middle_num_filters_d2=[64, 64], rpn_class_name="RPN", rpn_num_input_features=-1, rpn_layer_nums=[3, 5, 5], rpn_layer_strides=[2, 2, 2], rpn_num_filters=[128, 128, 256], rpn_upsample_strides=[1, 2, 4], rpn_num_upsample_filters=[256, 256, 256], use_norm=True, use_groupnorm=False, num_groups=32, use_sparse_rpn=False, use_voxel_classifier=False, use_direction_classifier=True, use_sigmoid_score=False, encode_background_as_zeros=True, use_rotate_nms=True, multiclass_nms=False, nms_score_threshold=0.5, nms_pre_max_size=1000, nms_post_max_size=20, nms_iou_threshold=0.1, target_assigner=None, use_bev=False, use_rc_net=False, lidar_only=False, cls_loss_weight=1.0, loc_loss_weight=1.0, pos_cls_weight=1.0, neg_cls_weight=1.0, direction_loss_weight=1.0, loss_norm_type=LossNormType.NormByNumPositives, encode_rad_error_by_sin=False, loc_loss_ftor=None, cls_loss_ftor=None, measure_time=False, name='voxelnet', use_iou_branch=False, iou_dict=None, iou_loss_weight=1.0, iou_loss_ftor=None, use_iou_param_partaa=False, ): super().__init__() self.name = name self._num_class = num_class self._use_rotate_nms = use_rotate_nms self._multiclass_nms = multiclass_nms self._nms_score_threshold = nms_score_threshold self._nms_pre_max_size = nms_pre_max_size self._nms_post_max_size = nms_post_max_size self._nms_iou_threshold = nms_iou_threshold self._use_sigmoid_score = use_sigmoid_score self._encode_background_as_zeros = encode_background_as_zeros self._use_sparse_rpn = use_sparse_rpn self._use_direction_classifier = use_direction_classifier self._use_bev = use_bev self._num_input_features = num_input_features self._box_coder = target_assigner.box_coder self._lidar_only = lidar_only self.target_assigner = target_assigner self._pos_cls_weight = pos_cls_weight self._neg_cls_weight = neg_cls_weight self._encode_rad_error_by_sin = encode_rad_error_by_sin self._loss_norm_type = loss_norm_type self._dir_loss_ftor = WeightedSoftmaxClassificationLoss() self._loc_loss_ftor = loc_loss_ftor self._cls_loss_ftor = cls_loss_ftor self._direction_loss_weight = direction_loss_weight self._cls_loss_weight = cls_loss_weight self._loc_loss_weight = loc_loss_weight self.measure_time = measure_time vfe_class_dict = { "VoxelFeatureExtractor": voxel_encoder.VoxelFeatureExtractor, "VoxelFeatureExtractorV2": voxel_encoder.VoxelFeatureExtractorV2, "VoxelFeatureExtractorV3": voxel_encoder.VoxelFeatureExtractorV3, "SimpleVoxel": voxel_encoder.SimpleVoxel } vfe_class = vfe_class_dict[vfe_class_name] self.voxel_feature_extractor = vfe_class(num_input_features, use_norm, num_filters=vfe_num_filters, with_distance=with_distance) if len(middle_num_filters_d2) == 0: if len(middle_num_filters_d1) == 0: num_rpn_input_filters = vfe_num_filters[-1] else: num_rpn_input_filters = middle_num_filters_d1[-1] else: num_rpn_input_filters = middle_num_filters_d2[-1] if use_sparse_rpn: # don't use this. just for fun. self.sparse_rpn = rpn.SparseRPN( output_shape, # num_input_features=vfe_num_filters[-1], num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2, use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, layer_strides=rpn_layer_strides, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, num_input_features=num_rpn_input_filters * 2, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, use_bev=use_bev, use_groupnorm=use_groupnorm, num_groups=num_groups, box_code_size=target_assigner.box_coder.code_size) else: mid_class_dict = { "SparseMiddleExtractor": middle.SparseMiddleExtractor, "SpMiddleD4HD": middle.SpMiddleD4HD, "SpMiddleD8HD": middle.SpMiddleD8HD, "SpMiddleFHD": middle.SpMiddleFHD, "SpMiddleFHDV2": middle.SpMiddleFHDV2, "SpMiddleFHDLarge": middle.SpMiddleFHDLarge, "SpMiddleResNetFHD": middle.SpMiddleResNetFHD, "SpMiddleD4HDLite": middle.SpMiddleD4HDLite, "SpMiddleFHDLite": middle.SpMiddleFHDLite, "SpMiddle2K": middle.SpMiddle2K, } mid_class = mid_class_dict[middle_class_name] self.middle_feature_extractor = mid_class( output_shape, use_norm, num_input_features=middle_num_input_features, num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2) rpn_class_dict = { "RPN": rpn.RPN, "RPNV2": rpn.RPNV2, "RPN_FUSION": rpn.RPN_FUSION, } self.rpn_class_name = rpn_class_name rpn_class = rpn_class_dict[self.rpn_class_name] self.rpn = rpn_class( use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, layer_strides=rpn_layer_strides, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, num_input_features=rpn_num_input_features, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, use_bev=use_bev, use_groupnorm=use_groupnorm, num_groups=num_groups, box_code_size=target_assigner.box_coder.code_size) self.rpn_acc = metrics.Accuracy( dim=-1, encode_background_as_zeros=encode_background_as_zeros) self.rpn_precision = metrics.Precision(dim=-1) self.rpn_recall = metrics.Recall(dim=-1) self.rpn_metrics = metrics.PrecisionRecall( dim=-1, thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95], use_sigmoid_score=use_sigmoid_score, encode_background_as_zeros=encode_background_as_zeros) self.rpn_cls_loss = metrics.Scalar() self.rpn_loc_loss = metrics.Scalar() self.rpn_total_loss = metrics.Scalar() self.register_buffer("global_step", torch.LongTensor(1).zero_()) self._time_dict = {} self._time_total_dict = {} self._time_count_dict = {}
def __init__(self, output_shape, num_class=2, num_input_features=4, vfe_class_name="VoxelFeatureExtractor", vfe_num_filters=[32, 128], with_distance=False, middle_class_name="SparseMiddleExtractor", middle_num_input_features=-1, middle_num_filters_d1=[64], middle_num_filters_d2=[64, 64], rpn_class_name="RPN", rpn_num_input_features=-1, rpn_layer_nums=[3, 5, 5], rpn_layer_strides=[2, 2, 2], rpn_num_filters=[128, 128, 256], rpn_upsample_strides=[1, 2, 4], rpn_num_upsample_filters=[256, 256, 256], use_norm=True, use_groupnorm=False, num_groups=32, use_direction_classifier=True, use_sigmoid_score=False, encode_background_as_zeros=True, use_rotate_nms=True, multiclass_nms=False, nms_score_thresholds=None, nms_pre_max_sizes=None, nms_post_max_sizes=None, nms_iou_thresholds=None, target_assigner=None, cls_loss_weight=1.0, loc_loss_weight=1.0, pos_cls_weight=1.0, neg_cls_weight=1.0, direction_loss_weight=1.0, loss_norm_type=LossNormType.NormByNumPositives, encode_rad_error_by_sin=False, loc_loss_ftor=None, cls_loss_ftor=None, measure_time=False, voxel_generator=None, post_center_range=None, dir_offset=0.0, sin_error_factor=1.0, nms_class_agnostic=False, num_direction_bins=2, direction_limit_offset=0, name='voxelnet'): super().__init__() self.name = name self._sin_error_factor = sin_error_factor self._num_class = num_class self._use_rotate_nms = use_rotate_nms self._multiclass_nms = multiclass_nms self._nms_score_thresholds = nms_score_thresholds self._nms_pre_max_sizes = nms_pre_max_sizes self._nms_post_max_sizes = nms_post_max_sizes self._nms_iou_thresholds = nms_iou_thresholds self._use_sigmoid_score = use_sigmoid_score self._encode_background_as_zeros = encode_background_as_zeros self._use_direction_classifier = use_direction_classifier self._num_input_features = num_input_features self._box_coder = target_assigner.box_coder self.target_assigner = target_assigner self.voxel_generator = voxel_generator self._pos_cls_weight = pos_cls_weight self._neg_cls_weight = neg_cls_weight self._encode_rad_error_by_sin = encode_rad_error_by_sin self._loss_norm_type = loss_norm_type self._dir_loss_ftor = WeightedSoftmaxClassificationLoss() self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss() self._dir_offset = dir_offset self._loc_loss_ftor = loc_loss_ftor self._cls_loss_ftor = cls_loss_ftor self._direction_loss_weight = direction_loss_weight self._cls_loss_weight = cls_loss_weight self._loc_loss_weight = loc_loss_weight self._post_center_range = post_center_range or [] self.measure_time = measure_time self._nms_class_agnostic = nms_class_agnostic self._num_direction_bins = num_direction_bins self._dir_limit_offset = direction_limit_offset self.voxel_feature_extractor = voxel_encoder.get_vfe_class(vfe_class_name)( num_input_features, use_norm, num_filters=vfe_num_filters, with_distance=with_distance, voxel_size=self.voxel_generator.voxel_size, pc_range=self.voxel_generator.point_cloud_range, ) self.middle_feature_extractor = middle.get_middle_class(middle_class_name)( output_shape, use_norm, num_input_features=middle_num_input_features, num_filters_down1=middle_num_filters_d1, num_filters_down2=middle_num_filters_d2) self._rpn_layer_nums = rpn_layer_nums self.rpn = rpn.get_rpn_class(rpn_class_name)( use_norm=True, num_class=num_class, layer_nums=rpn_layer_nums, layer_strides=rpn_layer_strides, num_filters=rpn_num_filters, upsample_strides=rpn_upsample_strides, num_upsample_filters=rpn_num_upsample_filters, num_input_features=rpn_num_input_features, num_anchor_per_loc=target_assigner.num_anchors_per_location, encode_background_as_zeros=encode_background_as_zeros, use_direction_classifier=use_direction_classifier, use_groupnorm=use_groupnorm, num_groups=num_groups, box_code_size=target_assigner.box_coder.code_size, num_direction_bins=self._num_direction_bins) self.rpn_acc = metrics.Accuracy( dim=-1, encode_background_as_zeros=encode_background_as_zeros) self.rpn_precision = metrics.Precision(dim=-1) self.rpn_recall = metrics.Recall(dim=-1) self.rpn_metrics = metrics.PrecisionRecall( dim=-1, thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95], use_sigmoid_score=use_sigmoid_score, encode_background_as_zeros=encode_background_as_zeros) self.rpn_cls_loss = metrics.Scalar() self.rpn_loc_loss = metrics.Scalar() self.rpn_total_loss = metrics.Scalar() self.register_buffer("global_step", torch.LongTensor(1).zero_()) self._time_dict = { 'PFE': [], #'PillarGen': [], #'PillarPrep': [], #'PillarFeatureNet': [], #'PillarScatter': [], 'RPN-stage-1': [], 'RPN-stage-2': [], 'RPN-stage-3': [], 'RPN-finalize': [], 'Predict': [], 'Pre-stage-1': [], 'Post-stage-1': [], 'End-to-end': [],} self._cuda_event_dict = copy.deepcopy(self._time_dict) self._det_num = 1 self._dump_scores = False if self._dump_scores: for i in range(1, len(rpn_layer_nums) + 1): mypath = os.getenv('HOME') + "/scores_" + str(i) if not os.path.exists(mypath): os.makedirs(mypath) #self._cuda_streams = [] #for i in range(4): # We are assuming 4 slices # self._cuda_streams.append(torch.cuda.Stream()) self._num_class_with_bg = self._num_class if not self._encode_background_as_zeros: self._num_class_with_bg = self._num_class + 1
def __init__(self, target_assigner, nms_score_thresholds, nms_iou_thresholds, nms_pre_max_sizes, nms_post_max_sizes, loc_loss_ftor, cls_loss_ftor, loc_loss_weight, cls_loss_weight): super().__init__() self.name = "pointpillars_loss" self.measure_time = False encode_background_as_zeros = True use_sigmoid_score = True self._num_class = 4 self._encode_rad_error_by_sin = True self._encode_background_as_zeros = encode_background_as_zeros self._sin_error_factor = 1.0 self._num_direction_bins = 2 self._dir_offset = 0 self._dir_limit_offset = 1 self._direction_loss_weight = 0.2 self._use_direction_classifier = True self.target_assigner = target_assigner self._box_coder = target_assigner.box_coder self._pos_cls_weight = 1.0 self._neg_cls_weight = 1.0 self._loss_norm_type = LossNormType.NormByNumPositives self._loc_loss_ftor = loc_loss_ftor self._cls_loss_ftor = cls_loss_ftor self._loc_loss_weight = loc_loss_weight self._cls_loss_weight = cls_loss_weight self._dir_loss_ftor = WeightedSoftmaxClassificationLoss() self._diff_loc_loss_ftor = WeightedSmoothL1LocalizationLoss() # postprocess self._post_center_range = [-40, -80, -5, 40, 40, 5] self._use_sigmoid_score = True self._use_rotate_nms = False self._multiclass_nms = False self._nms_class_agnostic = False self._nms_score_thresholds = nms_score_thresholds self._nms_iou_thresholds = nms_iou_thresholds self._nms_pre_max_sizes = nms_pre_max_sizes self._nms_post_max_sizes = nms_post_max_sizes self.rpn_acc = metrics.Accuracy( dim=-1, encode_background_as_zeros=encode_background_as_zeros) self.rpn_precision = metrics.Precision(dim=-1) self.rpn_recall = metrics.Recall(dim=-1) self.rpn_metrics = metrics.PrecisionRecall( dim=-1, thresholds=[0.1, 0.3, 0.5, 0.7, 0.8, 0.9, 0.95], use_sigmoid_score=use_sigmoid_score, encode_background_as_zeros=encode_background_as_zeros) self.rpn_cls_loss = metrics.Scalar() self.rpn_loc_loss = metrics.Scalar() self.rpn_total_loss = metrics.Scalar() self.register_buffer("global_step", torch.LongTensor(1).zero_()) self._time_dict = {} self._time_total_dict = {} self._time_count_dict = {}