예제 #1
0
    def Task(self):
        p = super(StarNetPedCycModel0704, self).Task()
        p.train.learning_rate = 7e-4

        builder = starnet.Builder()
        builder.linear_params_init = py_utils.WeightInit.KaimingUniformFanInRelu(
        )
        gin_layer_sizes = [32, 256, 512, 256, 256, 128]
        num_laser_features = 1
        gin_layers = [
            # Each layer should expect as input - 2 * dims of the last layer's
            # output. We assume a middle layer that's the size of 2 * dim_out.
            [dim_in * 2, dim_out * 2, dim_out]
            for (dim_in,
                 dim_out) in zip(gin_layer_sizes[:-1], gin_layer_sizes[1:])
        ]
        p.cell_feature_dims = sum(gin_layer_sizes)
        # Disable BN on first layer
        p.cell_featurizer = builder.GINFeaturizerV2('feat',
                                                    gin_layer_sizes[0],
                                                    gin_layers,
                                                    num_laser_features,
                                                    fc_use_bn=False)
        p.anchor_projected_feature_dims = 512

        class_name_to_idx = kitti_input_generator.KITTILabelExtractor.KITTI_CLASS_NAMES
        num_classes = len(class_name_to_idx)
        p.per_class_loss_weight = [0.] * num_classes
        p.per_class_loss_weight[class_name_to_idx.index('Pedestrian')] = 3.5
        p.per_class_loss_weight[class_name_to_idx.index('Cyclist')] = 3.25

        p.focal_loss_alpha = 0.9
        p.focal_loss_gamma = 1.25

        p.use_oriented_per_class_nms = True
        p.max_nms_boxes = 1024
        p.nms_iou_threshold = [0.0] * num_classes
        p.nms_iou_threshold[class_name_to_idx.index('Cyclist')] = 0.49
        p.nms_iou_threshold[class_name_to_idx.index('Pedestrian')] = 0.32

        p.nms_score_threshold = [1.0] * num_classes
        p.nms_score_threshold[class_name_to_idx.index('Cyclist')] = 0.11
        p.nms_score_threshold[class_name_to_idx.index('Pedestrian')] = 0.23

        p.output_decoder.filter_predictions_outside_frustum = True
        p.output_decoder.truncation_threshold = 0.65
        # Equally weight pedestrian and cyclist moderate classes.
        p.output_decoder.ap_metric.metric_weights = {
            'easy': np.array([0.0, 0.0, 0.0]),
            'moderate': np.array([0.0, 1.0, 1.0]),
            'hard': np.array([0.0, 0.0, 0.0])
        }

        return p
예제 #2
0
    def Task(self):
        p = super(StarNetCarModel0701, self).Task()

        # Builder configuration.
        builder = starnet.Builder()
        builder.linear_params_init = py_utils.WeightInit.KaimingUniformFanInRelu(
        )
        gin_layer_sizes = [32, 256, 512, 256, 256, 128]
        num_laser_features = 1
        gin_layers = [
            # Each layer should expect as input - 2 * dims of the last layer's
            # output. We assume a middle layer that's the size of 2 * dim_out.
            [dim_in * 2, dim_out * 2, dim_out]
            for (dim_in,
                 dim_out) in zip(gin_layer_sizes[:-1], gin_layer_sizes[1:])
        ]
        p.cell_feature_dims = sum(gin_layer_sizes)
        p.cell_featurizer = builder.GINFeaturizerV2(
            name='feat',
            fc_dims=gin_layer_sizes[0],
            mlp_dims=gin_layers,
            num_laser_features=num_laser_features,
            fc_use_bn=False)
        p.anchor_projected_feature_dims = 512

        # Loss and training params
        p.train.learning_rate = 0.001 / 2.  # Divide by batch size.
        p.focal_loss_alpha = 0.2
        p.focal_loss_gamma = 3.0
        class_name_to_idx = kitti_input_generator.KITTILabelExtractor.KITTI_CLASS_NAMES
        num_classes = len(class_name_to_idx)
        p.per_class_loss_weight = [0.] * num_classes
        p.per_class_loss_weight[class_name_to_idx.index('Car')] = 1.

        # Decoding / NMS params.
        p.use_oriented_per_class_nms = True
        p.max_nms_boxes = 512
        p.nms_iou_threshold = [0.0] * num_classes
        p.nms_iou_threshold[class_name_to_idx.index('Car')] = 0.0831011
        p.nms_score_threshold = [1.0] * num_classes
        p.nms_score_threshold[class_name_to_idx.index('Car')] = 0.321310
        p.output_decoder.truncation_threshold = 0.65
        p.output_decoder.filter_predictions_outside_frustum = True
        return p
예제 #3
0
    def Task(self):
        metadata = waymo_metadata.WaymoMetadata()
        num_classes = len(metadata.ClassNames())
        p = starnet.ModelV2.Params(
            num_classes,
            num_anchor_bboxes_offsets=self.NUM_ANCHOR_BBOX_OFFSETS,
            num_anchor_bboxes_rotations=self.NUM_ANCHOR_BBOX_ROTATIONS,
            num_anchor_bboxes_dimensions=self.NUM_ANCHOR_BBOX_DIMENSIONS,
            num_laser_features=3)

        # Update the Point Cloud Featurizer architecture
        starnet_builder = starnet.Builder()
        starnet_builder.linear_params_init = (
            py_utils.WeightInit.KaimingUniformFanInRelu())

        gin_layers = [[
            self.GIN_HIDDEN_DIMS * 2, self.GIN_HIDDEN_DIMS * 4,
            self.GIN_HIDDEN_DIMS
        ]] * self.NUM_GIN_LAYERS  # pyformat: disable

        p.cell_featurizer = starnet_builder.GINFeaturizerV2(
            'feat',
            num_laser_features=3,
            fc_dims=self.GIN_HIDDEN_DIMS,
            mlp_dims=gin_layers,
            fc_use_bn=False)
        p.cell_feature_dims = self.GIN_HIDDEN_DIMS * (self.NUM_GIN_LAYERS + 1)

        p.output_decoder = waymo_decoder.WaymoOpenDatasetDecoder.Params()
        p.max_nms_boxes = 512
        p.use_oriented_per_class_nms = True

        # Note: Sub-classes need to set nms_iou_threshold and nms_score_threshold
        # appropriately.
        p.nms_iou_threshold = [0.0] * num_classes

        # TODO(jngiam): 1.1 for untrained classes is needed to avoid an issue
        # with boxutils error.
        p.nms_score_threshold = [1.1] * num_classes

        p.name = 'starnet'
        tp = p.train
        tp.optimizer = optimizer.Adam.Params()
        tp.clip_gradient_norm_to_value = 5

        ep = p.eval

        # Train set uses a smaller decoding set, so we can
        # safely eval over the entire input.
        ep.samples_per_summary = 0

        # To be tuned.
        p.train.l2_regularizer_weight = 1e-8

        cluster = cluster_factory.Current()
        train_cluster_p = cluster.params.Copy()
        train_cluster_p.job = 'trainer_client'
        train_cluster_p.mode = 'sync'

        # When running a decoding only job, there are no trainer workers, so we set
        # worker replicas to 1 as a dummy value.
        if train_cluster_p.worker.replicas <= 0:
            train_cluster_p.worker.replicas = 1

        # Set learning rate and schedule.
        with cluster_factory.Cluster(train_cluster_p):
            train_input_p = self.Train()

        # Adapted from V1 tuning.
        tp.ema_decay = 0.99
        # TODO(b/148537111): consider setting this to True.
        tp.ema_decay_moving_vars = False
        tp.learning_rate = 0.001
        lr_util.SetExponentialLR(train_p=tp,
                                 train_input_p=train_input_p,
                                 exp_start_epoch=5,
                                 total_epoch=75)

        p.dimension_loss_weight = .3
        p.location_loss_weight = 3.
        p.loss_weight_classification = 1.
        p.loss_weight_localization = 3.
        p.rotation_loss_weight = 0.3

        return p