Ejemplo n.º 1
0
    def __init__(self, model_config, train_val_test, dataset):
        """
        Args:
            model_config: configuration for the model
            train_val_test: "train", "val", or "test"
            dataset: the dataset that will provide samples and ground truth
        """

        # Sets model configs (_config)
        super(AvodModel, self).__init__(model_config)
        #pdb.set_trace()

        self.dataset = dataset

        # Dataset config
        self._num_final_classes = self.dataset.num_classes + 1

        # Input config
        input_config = self._config.input_config
        """
        self._bev_pixel_size = np.asarray([input_config.bev_dims_h,
                                           input_config.bev_dims_w])
        self._bev_depth = input_config.bev_depth
        """

        self._img_pixel_size = np.asarray(
            [input_config.img_dims_h, input_config.img_dims_w])
        self._img_depth = [input_config.img_depth]

        # AVOD config
        avod_config = self._config.avod_config
        self._proposal_roi_crop_size = \
            [avod_config.avod_proposal_roi_crop_size] * 2
        self._positive_selection = avod_config.avod_positive_selection
        self._nms_size = avod_config.avod_nms_size
        self._nms_iou_threshold = avod_config.avod_nms_iou_thresh
        self._path_drop_probabilities = self._config.path_drop_probabilities
        self._box_rep = avod_config.avod_box_representation

        if self._box_rep not in [
                'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca'
        ]:
            raise ValueError('Invalid box representation', self._box_rep)

        # Create the RpnModel
        self._rpn_model = RpnModel(model_config, train_val_test, dataset)

        if train_val_test not in ["train", "val", "test"]:
            raise ValueError('Invalid train_val_test value,'
                             'should be one of ["train", "val", "test"]')
        self._train_val_test = train_val_test
        self._is_training = (self._train_val_test == 'train')

        self.sample_info = {}
Ejemplo n.º 2
0
    def test_path_drop_weights(self):
        # Tests the effect of path-drop on network's feature maps.
        # It sets up a minimal-training process to check the
        # feature before and after running the 'train_op' while
        # path-drop is in effect.

        train_val_test = 'train'
        # overwrite the training iterations
        self.train_config.max_iterations = 2
        self.train_config.overwrite_checkpoints = True

        # Overwrite path drop probabilities
        model_config = config_builder.proto_to_obj(self.model_config)
        model_config.path_drop_probabilities = [0.0, 0.8]

        with tf.Graph().as_default():
            # Set a graph-level seed
            tf.set_random_seed(1245)
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=self.dataset)
            prediction_dict = model.build()
            losses_dict, total_loss = model.loss(prediction_dict)

            global_summaries = set([])
            # Optimizer
            training_optimizer = optimizer_builder.build(
                self.train_config.optimizer,
                global_summaries)
            train_op = slim.learning.create_train_op(
                total_loss,
                training_optimizer)

            init_op = tf.global_variables_initializer()

            with tf.Session() as sess:
                sess.run(init_op)
                for step in range(1, self.train_config.max_iterations):
                    feed_dict = model.create_feed_dict()
                    if step == 1:
                        current_feature_maps = sess.run(model.img_feature_maps,
                                                        feed_dict=feed_dict)
                        exp_feature_maps = current_feature_maps
                    train_op_loss = sess.run(train_op, feed_dict=feed_dict)
                    print('Step {}, Total Loss {:0.3f} '.
                          format(step, train_op_loss))

                    updated_feature_maps = sess.run(model.img_feature_maps,
                                                    feed_dict=feed_dict)
            # The feature maps should have remained the same since
            # the image path was dropped
            np.testing.assert_array_almost_equal(
                updated_feature_maps, exp_feature_maps, decimal=4)
Ejemplo n.º 3
0
def train(model_config, train_config, dataset_config):

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    train_val_test = 'train'
    model_name = model_config.model_name

    with tf.Graph().as_default():
        if model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=dataset)
        elif model_name == 'bev_only_rpn_model':
            model = BevOnlyRpnModel(model_config,
                                    train_val_test=train_val_test,
                                    dataset=dataset)
        elif model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=train_val_test,
                              dataset=dataset)
        elif model_name == 'bev_only_avod_model':
            model = BevOnlyAvodModel(model_config,
                                     train_val_test=train_val_test,
                                     dataset=dataset)
        else:
            raise ValueError('Invalid model_name')

        trainer.train(model, train_config)
Ejemplo n.º 4
0
def set_up_model(pipeline_config, data_split):

    model_config, train_config, _, dataset_config = \
        config_builder.get_configs_from_pipeline_file(
            pipeline_config, is_training=False)

    dataset_config = config_builder.proto_to_obj(dataset_config)

    train_val_test = data_split
    # Always run in test mode
    dataset_config.data_split = 'test'
    dataset_config.data_split_dir = 'testing'
    dataset_config.has_labels = False
    dataset_config.aug_list = []

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    model_name = model_config.model_name
    if model_name == 'rpn_model':
        model = RpnModel(model_config,
                         train_val_test=train_val_test,
                         dataset=dataset)
    elif model_name == 'avod_model':
        model = AvodModel(model_config,
                          train_val_test=train_val_test,
                          dataset=dataset)
    elif model_name == 'avod_ssd_model':
        model = AvodSSDModel(model_config,
                             train_val_test=train_val_test,
                             dataset=dataset)
    else:
        raise ValueError('Invalid model_name')

    return model
Ejemplo n.º 5
0
def train(model_config, train_config, dataset_config):
    # 读取config文件里面的详细内容,包括:
    # model_config:模型参数
    # train_config:训练参数
    # dataset_config:数据集参数
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    train_val_test = 'train'
    # 包括avod_model 和 rpn_model
    model_name = model_config.model_name

    with tf.Graph().as_default():
        if model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=dataset)
        elif model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=train_val_test,
                              dataset=dataset)
        else:
            raise ValueError('Invalid model_name')

        trainer.train(model, train_config)
Ejemplo n.º 6
0
def train(model_config, train_config, dataset_config):

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    train_val_test = 'train'
    model_name = model_config.model_name

    with tf.Graph().as_default():
        if model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=dataset)
        elif model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=train_val_test,
                              dataset=dataset)
        elif model_name == 'retinanet_model':
            model = RetinanetModel(model_config,
                                   train_val_test=train_val_test,
                                   dataset=dataset)
        else:
            raise ValueError('Invalid model_name')

        #import pdb
        #pdb.set_trace()

        trainer.train(model, train_config)
Ejemplo n.º 7
0
    def test_rpn_loss(self):
        # Use "val" so that the first sample is loaded each time
        rpn_model = RpnModel(self.model_config,
                             train_val_test="val",
                             dataset=self.dataset)

        predictions = rpn_model.build()

        loss, total_loss = rpn_model.loss(predictions)

        feed_dict = rpn_model.create_feed_dict()

        with self.test_session() as sess:
            init = tf.global_variables_initializer()
            sess.run(init)
            loss_dict_out = sess.run(loss, feed_dict=feed_dict)
            print('Losses ', loss_dict_out)
Ejemplo n.º 8
0
def inference(model_config, eval_config, dataset_config, data_split,
              ckpt_indices):

    # Overwrite the defaults
    dataset_config = config_builder.proto_to_obj(dataset_config)

    dataset_config.data_split = data_split
    dataset_config.data_split_dir = 'training'
    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    eval_config.eval_mode = 'test'
    eval_config.evaluate_repeatedly = False

    dataset_config.has_labels = False
    # Enable this to see the actually memory being used
    eval_config.allow_gpu_mem_growth = True

    eval_config = config_builder.proto_to_obj(eval_config)
    # Grab the checkpoint indices to evaluate
    eval_config.ckpt_indices = ckpt_indices

    # Remove augmentation during evaluation in test mode
    dataset_config.aug_list = []

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Setup the model
    model_name = model_config.model_name
    # Overwrite repeated field
    model_config = config_builder.proto_to_obj(model_config)
    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    with tf.Graph().as_default():
        if model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=eval_config.eval_mode,
                              dataset=dataset)
        elif model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=eval_config.eval_mode,
                             dataset=dataset)
        elif model_name == 'bev_only_rpn_model':
            model = BevOnlyRpnModel(model_config,
                                    train_val_test=eval_config.eval_mode,
                                    dataset=dataset)
        elif model_name == 'bev_only_avod_model':
            model = BevOnlyAvodModel(model_config,
                                     train_val_test=eval_config.eval_mode,
                                     dataset=dataset)
        else:
            raise ValueError('Invalid model name {}'.format(model_name))

        model_evaluator = Evaluator(model, dataset_config, eval_config)
        model_evaluator.run_latest_checkpoints()
Ejemplo n.º 9
0
    def test_disable_path_drop(self):
        # Test path drop is disabled when the probabilities
        # are set to 1.0.

        train_val_test = 'train'
        # Overwrite path drop probabilities
        model_config = config_builder.proto_to_obj(self.model_config)
        model_config.path_drop_probabilities = [1.0, 1.0]

        with tf.Graph().as_default():
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=self.dataset)
            model.build()
            # These variables are set during path drop only
            # in the case of no path-drop, they should be non-existence
            self.assertFalse(hasattr(model, 'img_path_drop_mask'))
            self.assertFalse(hasattr(model, 'bev_path_drop_mask'))
Ejemplo n.º 10
0
def inferPerspective(model_config, eval_config, dataset_config,
                     additional_cls):
    model_name = model_config.model_name

    entity_perspect_dir = dataset_config.dataset_dir + dataset_config.data_split_dir + '/'

    logging.debug("Inferring perspective: %s\n %s\n %s",
                  dataset_config.data_split, entity_perspect_dir,
                  dataset_config.dataset_dir)

    files_in_range = create_split.create_split(dataset_config.dataset_dir,
                                               entity_perspect_dir,
                                               dataset_config.data_split)

    # If there are no files within the range cfg.MIN_IDX, cfg.MAX_IDX
    # then skip this perspective
    if not files_in_range:
        logging.debug(
            "No files within the range cfg.MIN_IDX, cfg.MAX_IDX, skipping perspective"
        )
        return

    if not additional_cls:
        estimate_ground_planes.estimate_ground_planes(entity_perspect_dir,
                                                      dataset_config, 0)

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    #Switch inference output directory
    model_config.paths_config.pred_dir = entity_perspect_dir + '/{}/'.format(
        cfg.AVOD_OUTPUT_DIR)
    logging.debug("Prediction directory: %s",
                  model_config.paths_config.pred_dir)

    with tf.Graph().as_default():
        if model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=eval_config.eval_mode,
                              dataset=dataset)
        elif model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=eval_config.eval_mode,
                             dataset=dataset)
        else:
            raise ValueError('Invalid model name {}'.format(model_name))

        model_evaluator = Evaluator(model, dataset_config, eval_config)
        model_evaluator.run_latest_checkpoints()

    save_kitti_predictions.convertPredictionsToKitti(
        dataset, model_config.paths_config.pred_dir, additional_cls)
Ejemplo n.º 11
0
def train(model_config, train_config, dataset_config):

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    train_val_test = 'train'
    model_name = model_config.model_name

    with tf.Graph().as_default():
        if model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=train_val_test,
                             dataset=dataset)
            trainer.train(model, train_config)
        elif model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=train_val_test,
                              dataset=dataset)
            trainer.train(model, train_config)
        elif model_name == 'avod_moe_model':
            model = AvodMoeModel(model_config,
                                 train_val_test=train_val_test,
                                 dataset=dataset)
            trainer_moe.train(model, train_config)
        elif model_name == 'epbrm':
            model = epBRM(model_config, dataset=dataset)
            epbrm_trainer.train(model, train_config)

        elif model_name == 'avod_model_new_bev':
            model = AvodModelBEV(model_config,
                                 train_val_test=train_val_test,
                                 dataset=dataset)
        elif model_name == 'avod_model_double_fusion_new_bev':
            model = AvodModelDoubleFusionBEV(model_config,
                                             train_val_test=train_val_test,
                                             dataset=dataset)
        else:
            raise ValueError('Invalid model_name')

        if model_name == 'avod_model_new_bev' or model_name == 'avod_model_double_fusion_new_bev':
            trainer_new_bev.train(model, train_config)
        else:
            trainer.train(model, train_config)
Ejemplo n.º 12
0
def set_up_model_test_mode(pipeline_config_path, data_split):
    """Returns the model and its config in test mode."""

    model_config, _, _,  dataset_config = \
        config_builder.get_configs_from_pipeline_file(
            pipeline_config_path, is_training=False)

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Overwrite the defaults
    dataset_config = config_builder.proto_to_obj(dataset_config)

    # Use the validation set
    dataset_config.data_split = data_split
    dataset_config.data_split_dir = 'training'
    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    # Remove augmentation when in test mode
    dataset_config.aug_list = []

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    model_name = model_config.model_name
    if model_name == 'rpn_model':
        model = RpnModel(model_config, train_val_test='test', dataset=dataset)
    elif model_name == 'avod_model':
        model = AvodModel(model_config, train_val_test='test', dataset=dataset)
    elif model_name == 'avod_ssd_model':
        model = AvodSSDModel(model_config,
                             train_val_test='test',
                             dataset=dataset)
    else:
        raise ValueError('Invalid model_name')

    return model, model_config
Ejemplo n.º 13
0
def set_up_model_train_mode(pipeline_config_path, data_split):
    """Returns the model and its train_op."""

    model_config, train_config, _,  dataset_config = \
        config_builder.get_configs_from_pipeline_file(
            pipeline_config_path, is_training=True)

    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    model_name = model_config.model_name
    if model_name == 'rpn_model':
        model = RpnModel(model_config,
                         train_val_test=data_split,
                         dataset=dataset)
    elif model_name == 'avod_model':
        model = AvodModel(model_config,
                          train_val_test=data_split,
                          dataset=dataset)
    elif model_name == 'avod_ssd_model':
        model = AvodSSDModel(model_config,
                             train_val_test=data_split,
                             dataset=dataset)
    else:
        raise ValueError('Invalid model_name')

    prediction_dict = model.build()
    losses_dict, total_loss = model.loss(prediction_dict)

    # These parameters are required to set up the optimizer
    global_summaries = set([])
    global_step_tensor = tf.Variable(0, trainable=False)
    training_optimizer = optimizer_builder.build(train_config.optimizer,
                                                 global_summaries,
                                                 global_step_tensor)

    # Set up the train op
    train_op = slim.learning.create_train_op(total_loss, training_optimizer)

    return model, train_op
Ejemplo n.º 14
0
class AvodModel(model.DetectionModel):
    TRAIN_REFINE = True

    ##############################
    # Keys for Predictions
    ##############################
    # Mini batch (mb) ground truth
    PRED_MB_CLASSIFICATIONS_GT = 'avod_mb_classifications_gt'
    PRED_MB_OFFSETS_GT = 'avod_mb_offsets_gt'
    PRED_MB_ORIENTATIONS_GT = 'avod_mb_orientations_gt'

    PRED_MB_POS_GT = 'avod_mb_pos_gt'
    PRED_MB_NEG_GT = 'avod_mb_neg_gt'

    # Mini batch (mb) predictions
    PRED_MB_CLASSIFICATION_LOGITS = 'avod_mb_classification_logits'
    PRED_MB_CLASSIFICATION_SOFTMAX = 'avod_mb_classification_softmax'
    PRED_MB_OFFSETS = 'avod_mb_offsets'
    PRED_MB_ANGLE_VECTORS = 'avod_mb_angle_vectors'

    # Top predictions after BEV NMS
    PRED_TOP_CLASSIFICATION_LOGITS = 'avod_top_classification_logits'
    PRED_TOP_CLASSIFICATION_SOFTMAX = 'avod_top_classification_softmax'

    PRED_TOP_PREDICTION_ANCHORS = 'avod_top_prediction_anchors'
    PRED_TOP_PREDICTION_BOXES_3D = 'avod_top_prediction_boxes_3d'
    PRED_TOP_ORIENTATIONS = 'avod_top_orientations'

    # Other box representations
    PRED_TOP_BOXES_8C = 'avod_top_regressed_boxes_8c'
    PRED_TOP_BOXES_4C = 'avod_top_prediction_boxes_4c'

    # Mini batch (mb) predictions (for debugging)
    PRED_MB_MASK = 'avod_mb_mask'
    PRED_MB_POS_MASK = 'avod_mb_pos_mask'
    PRED_MB_ANCHORS_GT = 'avod_mb_anchors_gt'
    PRED_MB_CLASS_INDICES_GT = 'avod_mb_gt_classes'

    # All predictions (for debugging)
    PRED_ALL_CLASSIFICATIONS = 'avod_classifications'
    PRED_ALL_OFFSETS = 'avod_offsets'
    PRED_ALL_ANGLE_VECTORS = 'avod_angle_vectors'

    PRED_MAX_IOUS = 'avod_max_ious'
    PRED_ALL_IOUS = 'avod_anchor_ious'

    ##############################
    # Keys for Loss
    ##############################
    LOSS_FINAL_CLASSIFICATION = 'avod_classification_loss'
    LOSS_FINAL_REGRESSION = 'avod_regression_loss'

    # (for debugging)
    LOSS_FINAL_ORIENTATION = 'avod_orientation_loss'
    LOSS_FINAL_LOCALIZATION = 'avod_localization_loss'

    def __init__(self, model_config, train_val_test, dataset):
        """
        Args:
            model_config: configuration for the model
            train_val_test: "train", "val", or "test"
            dataset: the dataset that will provide samples and ground truth
        """

        # Sets model configs (_config)
        super(AvodModel, self).__init__(model_config)
        #pdb.set_trace()

        self.dataset = dataset

        # Dataset config
        self._num_final_classes = self.dataset.num_classes + 1

        # Input config
        input_config = self._config.input_config
        """
        self._bev_pixel_size = np.asarray([input_config.bev_dims_h,
                                           input_config.bev_dims_w])
        self._bev_depth = input_config.bev_depth
        """

        self._img_pixel_size = np.asarray(
            [input_config.img_dims_h, input_config.img_dims_w])
        self._img_depth = [input_config.img_depth]

        # AVOD config
        avod_config = self._config.avod_config
        self._proposal_roi_crop_size = \
            [avod_config.avod_proposal_roi_crop_size] * 2
        self._positive_selection = avod_config.avod_positive_selection
        self._nms_size = avod_config.avod_nms_size
        self._nms_iou_threshold = avod_config.avod_nms_iou_thresh
        self._path_drop_probabilities = self._config.path_drop_probabilities
        self._box_rep = avod_config.avod_box_representation

        if self._box_rep not in [
                'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca'
        ]:
            raise ValueError('Invalid box representation', self._box_rep)

        # Create the RpnModel
        self._rpn_model = RpnModel(model_config, train_val_test, dataset)

        if train_val_test not in ["train", "val", "test"]:
            raise ValueError('Invalid train_val_test value,'
                             'should be one of ["train", "val", "test"]')
        self._train_val_test = train_val_test
        self._is_training = (self._train_val_test == 'train')

        self.sample_info = {}

    def _norm(self, x):
        norm = tf.norm(x, axis=[1, 2], keep_dims=True)
        x = tf.divide(x, norm + 1e-7)
        return x

    def _sub_mean(self, x):
        mean = tf.reduce_mean(x, axis=[1, 2], keep_dims=True)
        x = x - mean
        return x

    def build(self):
        rpn_model = self._rpn_model

        # Share the same prediction dict as RPN
        prediction_dict = rpn_model.build()

        top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]
        ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]

        class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]

        with tf.variable_scope('avod_projection'):

            if self._config.expand_proposals_xz > 0.0:

                expand_length = self._config.expand_proposals_xz

                # Expand anchors along x and z
                with tf.variable_scope('expand_xz'):
                    expanded_dim_x = top_anchors[:, 3] + expand_length
                    expanded_dim_z = top_anchors[:, 5] + expand_length

                    expanded_anchors = tf.stack([
                        top_anchors[:, 0], top_anchors[:, 1], top_anchors[:,
                                                                          2],
                        expanded_dim_x, top_anchors[:, 4], expanded_dim_z
                    ],
                                                axis=1)

                avod_projection_in = expanded_anchors

            else:
                avod_projection_in = top_anchors

            with tf.variable_scope('bev'):
                # Project top anchors into bev and image spaces
                bev_proposal_boxes, bev_proposal_boxes_norm = \
                    anchor_projector.project_to_bev(
                        avod_projection_in,
                        self.dataset.kitti_utils.bev_extents)

                # Reorder projected boxes into [y1, x1, y2, x2]
                bev_proposal_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes)
                bev_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes_norm)

            with tf.variable_scope('img'):
                image_shape = tf.cast(
                    tf.shape(
                        rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2],
                    tf.float32)
                img_proposal_boxes, img_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        avod_projection_in,
                        rpn_model.placeholders[RpnModel.PL_CALIB_P2],
                        image_shape)
                # Only reorder the normalized img
                img_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        img_proposal_boxes_norm)

            with tf.variable_scope('img_r'):
                image_r_shape = tf.cast(
                    tf.shape(
                        rpn_model.placeholders[RpnModel.PL_IMG_R_INPUT])[0:2],
                    tf.float32)
                img_r_proposal_boxes, img_r_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        avod_projection_in,
                        rpn_model.placeholders[RpnModel.PL_CALIB_P3],
                        image_r_shape)

                # Only reorder the normalized img
                img_r_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        img_r_proposal_boxes_norm)

        #bev_feature_maps = rpn_model.bev_feature_maps
        img_feature_maps = rpn_model.img_feature_maps
        img_r_feature_maps = rpn_model.img_r_feature_maps
        """
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):

            with tf.variable_scope('avod_path_drop'):

                img_mask = rpn_model.img_path_drop_mask
                #bev_mask = rpn_model.bev_path_drop_mask
                img_r_mask = rpn_model.img_r_path_drop_mask


                img_feature_maps = tf.multiply(img_feature_maps,
                                               img_mask)
                
                #bev_feature_maps = tf.multiply(bev_feature_maps,
                #                               bev_mask)
                img_r_feature_maps = tf.multiply(img_r_feature_maps,
                                               img_r_mask)

        else:
            #bev_mask = tf.constant(1.0)
            img_mask = tf.constant(1.0)
            img_r_mask = tf.constant(1.0)

        """
        img_mask = tf.constant(1.0)
        img_r_mask = tf.constant(1.0)

        # ROI Pooling
        with tf.variable_scope('avod_roi_pooling'):

            def get_box_indices(boxes):
                proposals_shape = boxes.get_shape().as_list()
                if any(dim is None for dim in proposals_shape):
                    proposals_shape = tf.shape(boxes)
                ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                multiplier = tf.expand_dims(
                    tf.range(start=0, limit=proposals_shape[0]), 1)
                return tf.reshape(ones_mat * multiplier, [-1])

            """
            bev_boxes_norm_batches = tf.expand_dims(
                bev_proposal_boxes_norm, axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_rois = tf.image.crop_and_resize(
                bev_feature_maps,
                bev_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='bev_rois')
            """

            img_boxes_norm_batches = tf.expand_dims(img_proposal_boxes_norm,
                                                    axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(img_boxes_norm_batches)

            # Do ROI Pooling on image
            img_rois = tf.image.crop_and_resize(
                img_feature_maps,
                img_proposal_boxes_norm_tf_order,
                tf_box_indices, (32, 32),
                name='img_rois')

            img_r_rois = tf.image.crop_and_resize(
                img_r_feature_maps,
                img_r_proposal_boxes_norm_tf_order,
                tf_box_indices, (32, 32),
                name='img_r_rois')

            img_rois = self._sub_mean(img_rois)
            img_r_rois = self._sub_mean(img_r_rois)

            cos_simi = tf.reduce_sum(img_rois * img_r_rois, \
                                     axis=[1, 2], keep_dims=True)

            cos_simi = cos_simi / (tf.norm(img_rois + 1e-5, axis=[1, 2], keep_dims=True) * \
                                   tf.norm(img_r_rois + 1e-5, axis=[1, 2], keep_dims=True))

            cos_simi = tf.nn.relu(cos_simi)

            img_rois = tf.image.resize_bilinear(
                img_rois, self._proposal_roi_crop_size) * cos_simi
            img_r_rois = tf.image.resize_bilinear(
                img_r_rois, self._proposal_roi_crop_size) * cos_simi

        # Fully connected layers (Box Predictor)
        avod_layers_config = self.model_config.layers_config.avod_config

        fc_output_layers = \
            avod_fc_layers_builder.build(
                layers_config=avod_layers_config,
                input_rois=[img_rois, img_r_rois],
                input_weights=[img_mask, img_r_mask],
                num_final_classes=self._num_final_classes,
                box_rep=self._box_rep,
                top_anchors=top_anchors,
                ground_plane=ground_plane,
                is_training=self._is_training)

        all_cls_logits = \
            fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS]
        all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS]

        # This may be None
        all_angle_vectors = \
            fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS)

        with tf.variable_scope('softmax'):
            all_cls_softmax = tf.nn.softmax(all_cls_logits)

        ######################################################
        # Subsample mini_batch for the loss function
        ######################################################
        # Get the ground truth tensors
        anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS]
        if self._box_rep in ['box_3d', 'box_4ca']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
            orientations_gt = boxes_3d_gt[:, 6]
        elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
        else:
            raise NotImplementedError('Ground truth tensors not implemented')

        # Project anchor_gts to 2D bev
        with tf.variable_scope('avod_gt_projection'):
            bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev(
                anchors_gt, self.dataset.kitti_utils.bev_extents)

            bev_anchor_boxes_gt_tf_order = \
                anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt)

        with tf.variable_scope('avod_box_list'):
            # Convert to box_list format
            anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order)
            anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order)

        mb_mask, mb_class_label_indices, mb_gt_indices = \
            self.sample_mini_batch(
                anchor_box_list_gt=anchor_box_list_gt,
                anchor_box_list=anchor_box_list,
                class_labels=class_labels)

        # Create classification one_hot vector
        with tf.variable_scope('avod_one_hot_classes'):
            mb_classification_gt = tf.one_hot(
                mb_class_label_indices,
                depth=self._num_final_classes,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=(self._config.label_smoothing_epsilon /
                           self.dataset.num_classes))

        # TODO: Don't create a mini batch in test mode
        # Mask predictions
        with tf.variable_scope('avod_apply_mb_mask'):
            # Classification
            mb_classifications_logits = tf.boolean_mask(
                all_cls_logits, mb_mask)
            mb_classifications_softmax = tf.boolean_mask(
                all_cls_softmax, mb_mask)

            # Offsets
            mb_offsets = tf.boolean_mask(all_offsets, mb_mask)

            # Angle Vectors
            if all_angle_vectors is not None:
                mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask)
            else:
                mb_angle_vectors = None

        # Encode anchor offsets
        with tf.variable_scope('avod_encode_mb_anchors'):
            mb_anchors = tf.boolean_mask(top_anchors, mb_mask)

            if self._box_rep == 'box_3d':
                # Gather corresponding ground truth anchors for each mb sample
                mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices)
                mb_offsets_gt = anchor_encoder.tf_anchor_to_offset(
                    mb_anchors, mb_anchors_gt)

                # Gather corresponding ground truth orientation for each
                # mb sample
                mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices)
            elif self._box_rep in ['box_8c', 'box_8co']:

                # Get boxes_3d ground truth mini-batch and convert to box_8c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                if self._box_rep == 'box_8c':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt)
                elif self._box_rep == 'box_8co':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt)

                # Convert proposals: anchors -> box_3d -> box8c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_8c = \
                    box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

                # Get mini batch offsets
                mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask)
                mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets(
                    mb_boxes_8c, mb_boxes_8c_gt)

                # Flatten the offsets to a (N x 24) vector
                mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24])

            elif self._box_rep in ['box_4c', 'box_4ca']:

                # Get ground plane for box_4c conversion
                ground_plane = self._rpn_model.placeholders[
                    self._rpn_model.PL_GROUND_PLANE]

                # Convert gt boxes_3d -> box_4c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c(
                    mb_boxes_3d_gt, ground_plane)

                # Convert proposals: anchors -> box_3d -> box_4c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_4c = \
                    box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                       ground_plane)

                # Get mini batch
                mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask)
                mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets(
                    mb_boxes_4c, mb_boxes_4c_gt)

                if self._box_rep == 'box_4ca':
                    # Gather corresponding ground truth orientation for each
                    # mb sample
                    mb_orientations_gt = tf.gather(orientations_gt,
                                                   mb_gt_indices)

            else:
                raise NotImplementedError(
                    'Anchor encoding not implemented for', self._box_rep)

        ######################################################
        # ROI summary images
        ######################################################
        avod_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size
        """
        with tf.variable_scope('bev_avod_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(
                bev_proposal_boxes_norm_tf_order, mb_mask)
            mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(
                self._rpn_model._bev_preprocessed,
                mb_bev_anchors_norm,
                mb_bev_box_indices,
                (32, 32))

            bev_input_roi_summary_images = tf.split(
                bev_input_rois, self._bev_depth, axis=3)
            tf.summary.image('bev_avod_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=avod_mini_batch_size)
        """

        with tf.variable_scope('img_avod_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(
                img_proposal_boxes_norm_tf_order, mb_mask)
            mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(
                self._rpn_model._img_preprocessed, mb_img_anchors_norm,
                mb_img_box_indices, (32, 32))

            tf.summary.image('img_avod_rois',
                             img_input_rois,
                             max_outputs=avod_mini_batch_size)

        with tf.variable_scope('img_r_avod_rois'):
            # ROIs on image input
            mb_img_r_anchors_norm = tf.boolean_mask(
                img_r_proposal_boxes_norm_tf_order, mb_mask)
            mb_img_r_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_r_input_rois = tf.image.crop_and_resize(
                self._rpn_model._img_r_preprocessed, mb_img_r_anchors_norm,
                mb_img_r_box_indices, (32, 32))

            tf.summary.image('img_r_avod_rois',
                             img_r_input_rois,
                             max_outputs=avod_mini_batch_size)

        ######################################################
        # Final Predictions
        ######################################################
        # Get orientations from angle vectors
        if all_angle_vectors is not None:
            with tf.variable_scope('avod_orientation'):
                all_orientations = \
                    orientation_encoder.tf_angle_vector_to_orientation(
                        all_angle_vectors)

        # Apply offsets to regress proposals
        with tf.variable_scope('avod_regression'):
            if self._box_rep == 'box_3d':
                prediction_anchors = \
                    anchor_encoder.offset_to_anchor(top_anchors,
                                                    all_offsets)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Reshape the 24-dim regressed offsets to (N x 3 x 8)
                reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8])
                # Given the offsets, get the boxes_8c
                prediction_boxes_8c = \
                    box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c,
                                                        reshaped_offsets)
                # Convert corners back to box3D
                prediction_boxes_3d = \
                    box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c)

                # Convert the box_3d to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            elif self._box_rep in ['box_4c', 'box_4ca']:
                # Convert predictions box_4c -> box_3d
                prediction_boxes_4c = \
                    box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c,
                                                        all_offsets)

                prediction_boxes_3d = \
                    box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c,
                                                       ground_plane)

                # Convert to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            else:
                raise NotImplementedError('Regression not implemented for',
                                          self._box_rep)

        # Apply Non-oriented NMS in BEV
        with tf.variable_scope('avod_nms'):
            bev_extents = self.dataset.kitti_utils.bev_extents

            with tf.variable_scope('bev_projection'):
                # Project predictions into BEV
                avod_bev_boxes, _ = anchor_projector.project_to_bev(
                    prediction_anchors, bev_extents)
                avod_bev_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        avod_bev_boxes)

            # Get top score from second column onward
            all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1)

            # Apply NMS in BEV
            nms_indices = tf.image.non_max_suppression(
                avod_bev_boxes_tf_order,
                all_top_scores,
                max_output_size=self._nms_size,
                iou_threshold=self._nms_iou_threshold)

            # Gather predictions from NMS indices
            top_classification_logits = tf.gather(all_cls_logits, nms_indices)
            top_classification_softmax = tf.gather(all_cls_softmax,
                                                   nms_indices)
            top_prediction_anchors = tf.gather(prediction_anchors, nms_indices)

            if self._box_rep == 'box_3d':
                top_orientations = tf.gather(all_orientations, nms_indices)

            elif self._box_rep in ['box_8c', 'box_8co']:
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_8c = tf.gather(prediction_boxes_8c,
                                                    nms_indices)

            elif self._box_rep == 'box_4c':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)

            elif self._box_rep == 'box_4ca':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)
                top_orientations = tf.gather(all_orientations, nms_indices)

            else:
                raise NotImplementedError('NMS gather not implemented for',
                                          self._box_rep)

        if self._train_val_test in ['train', 'val']:
            # Additional entries are added to the shared prediction_dict
            # Mini batch predictions
            prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \
                mb_classifications_logits
            prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \
                mb_classifications_softmax
            prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets

            # Mini batch ground truth
            prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \
                mb_classification_gt
            prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt

            # Top NMS predictions
            prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \
                top_classification_logits
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax

            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

            # Mini batch predictions (for debugging)
            prediction_dict[self.PRED_MB_MASK] = mb_mask
            # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask
            prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \
                mb_class_label_indices

            # All predictions (for debugging)
            prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \
                all_cls_logits
            prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets

            # Path drop masks (for debugging)
            #prediction_dict['bev_mask'] = bev_mask
            prediction_dict['img_mask'] = img_mask
            prediction_dict['img_r_mask'] = img_r_mask

        else:
            # self._train_val_test == 'test'
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

        if self._box_rep == 'box_3d':
            prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt
            prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt
            prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

            # For debugging
            prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors

        elif self._box_rep in ['box_8c', 'box_8co']:
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d

            # Store the corners before converting for visualization purposes
            prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c

        elif self._box_rep == 'box_4c':
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c

        elif self._box_rep == 'box_4ca':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c
            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

        else:
            raise NotImplementedError('Prediction dict not implemented for',
                                      self._box_rep)

        # prediction_dict[self.PRED_MAX_IOUS] = max_ious
        # prediction_dict[self.PRED_ALL_IOUS] = all_ious

        return prediction_dict

    def sample_mini_batch(self, anchor_box_list_gt, anchor_box_list,
                          class_labels):

        with tf.variable_scope('avod_create_mb_mask'):
            # Get IoU for every anchor
            all_ious = box_list_ops.iou(anchor_box_list_gt, anchor_box_list)
            max_ious = tf.reduce_max(all_ious, axis=0)
            max_iou_indices = tf.argmax(all_ious, axis=0)

            # Sample a pos/neg mini-batch from anchors with highest IoU match
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            mb_mask, mb_pos_mask = mini_batch_utils.sample_avod_mini_batch(
                max_ious)
            mb_class_label_indices = mini_batch_utils.mask_class_label_indices(
                mb_pos_mask, mb_mask, max_iou_indices, class_labels)

            mb_gt_indices = tf.boolean_mask(max_iou_indices, mb_mask)

        return mb_mask, mb_class_label_indices, mb_gt_indices

    def create_feed_dict(self):
        feed_dict = self._rpn_model.create_feed_dict()
        self.sample_info = self._rpn_model.sample_info
        return feed_dict

    def loss(self, prediction_dict):
        # Note: The loss should be using mini-batch values only
        loss_dict, rpn_loss, rpn_acc_all, rpn_acc_pos, \
            rpn_acc_score_neg, rpn_acc_score_pos = self._rpn_model.loss(prediction_dict)
        losses_output, avod_acc_all, avod_acc_pos = avod_loss_builder.build(
            self, prediction_dict)

        classification_loss = \
            losses_output[avod_loss_builder.KEY_CLASSIFICATION_LOSS]

        final_reg_loss = losses_output[avod_loss_builder.KEY_REGRESSION_LOSS]

        avod_loss = losses_output[avod_loss_builder.KEY_AVOD_LOSS]

        offset_loss_norm = \
            losses_output[avod_loss_builder.KEY_OFFSET_LOSS_NORM]

        loss_dict.update({self.LOSS_FINAL_CLASSIFICATION: classification_loss})
        loss_dict.update({self.LOSS_FINAL_REGRESSION: final_reg_loss})

        # Add localization and orientation losses to loss dict for plotting
        loss_dict.update({self.LOSS_FINAL_LOCALIZATION: offset_loss_norm})

        ang_loss_loss_norm = losses_output.get(
            avod_loss_builder.KEY_ANG_LOSS_NORM)
        if ang_loss_loss_norm is not None:
            loss_dict.update({self.LOSS_FINAL_ORIENTATION: ang_loss_loss_norm})

        with tf.variable_scope('model_total_loss'):
            if self.TRAIN_REFINE:
                total_loss = rpn_loss + avod_loss
            else:
                total_loss = rpn_loss

        rpn_score_2d_loss = loss_dict[self._rpn_model.LOSS_RPN_SCORE_2D]
        rpn_class_loss = loss_dict[self._rpn_model.LOSS_RPN_OBJECTNESS]
        rpn_reg_loss = loss_dict[self._rpn_model.LOSS_RPN_REGRESSION]
        refine_class_loss = classification_loss
        refine_reg_loss = final_reg_loss

        return loss_dict, \
               total_loss, \
               rpn_score_2d_loss, \
               rpn_acc_score_neg, \
               rpn_acc_score_pos, \
               rpn_class_loss, \
               rpn_reg_loss, \
               rpn_acc_all, \
               rpn_acc_pos, \
               refine_class_loss, \
               refine_reg_loss, \
               avod_acc_all, \
               avod_acc_pos
Ejemplo n.º 15
0
class AvodModelDoubleFusionBEV(model.DetectionModel):
    ##############################
    # Keys for Predictions
    ##############################
    # Mini batch (mb) ground truth
    PRED_MB_CLASSIFICATIONS_GT = 'avod_mb_classifications_gt'
    PRED_MB_OFFSETS_GT = 'avod_mb_offsets_gt'
    PRED_MB_ORIENTATIONS_GT = 'avod_mb_orientations_gt'

    # Mini batch (mb) predictions
    PRED_MB_CLASSIFICATION_LOGITS = 'avod_mb_classification_logits'
    PRED_MB_CLASSIFICATION_SOFTMAX = 'avod_mb_classification_softmax'
    PRED_MB_OFFSETS = 'avod_mb_offsets'
    PRED_MB_ANGLE_VECTORS = 'avod_mb_angle_vectors'

    # Top predictions after BEV NMS
    PRED_TOP_CLASSIFICATION_LOGITS = 'avod_top_classification_logits'
    PRED_TOP_CLASSIFICATION_SOFTMAX = 'avod_top_classification_softmax'

    PRED_TOP_PREDICTION_ANCHORS = 'avod_top_prediction_anchors'
    PRED_TOP_PREDICTION_BOXES_3D = 'avod_top_prediction_boxes_3d'
    PRED_TOP_ORIENTATIONS = 'avod_top_orientations'

    # Other box representations
    PRED_TOP_BOXES_8C = 'avod_top_regressed_boxes_8c'
    PRED_TOP_BOXES_4C = 'avod_top_prediction_boxes_4c'

    # Mini batch (mb) predictions (for debugging)
    PRED_MB_MASK = 'avod_mb_mask'
    PRED_MB_POS_MASK = 'avod_mb_pos_mask'
    PRED_MB_ANCHORS_GT = 'avod_mb_anchors_gt'
    PRED_MB_CLASS_INDICES_GT = 'avod_mb_gt_classes'

    # All predictions (for debugging)
    PRED_ALL_CLASSIFICATIONS = 'avod_classifications'
    PRED_ALL_OFFSETS = 'avod_offsets'
    PRED_ALL_ANGLE_VECTORS = 'avod_angle_vectors'

    PRED_MAX_IOUS = 'avod_max_ious'
    PRED_ALL_IOUS = 'avod_anchor_ious'

    ##############################
    # Keys for Loss
    ##############################
    LOSS_FINAL_CLASSIFICATION = 'avod_classification_loss'
    LOSS_FINAL_REGRESSION = 'avod_regression_loss'

    # (for debugging)
    LOSS_FINAL_ORIENTATION = 'avod_orientation_loss'
    LOSS_FINAL_LOCALIZATION = 'avod_localization_loss'

    def __init__(self, model_config, train_val_test, dataset):
        """
        Args:
            model_config: configuration for the model
            train_val_test: "train", "val", or "test"
            dataset: the dataset that will provide samples and ground truth
        """

        # Sets model configs (_config)
        super(AvodModelDoubleFusionBEV, self).__init__(model_config)

        self.dataset = dataset

        # Dataset config
        self._num_final_classes = self.dataset.num_classes + 1

        # Input config
        input_config = self._config.input_config
        self._bev_pixel_size = np.asarray(
            [input_config.bev_dims_h, input_config.bev_dims_w])
        self._bev_depth = input_config.bev_depth

        self._img_pixel_size = np.asarray(
            [input_config.img_dims_h, input_config.img_dims_w])
        self._img_depth = [input_config.img_depth]

        # AVOD config
        avod_config = self._config.avod_config
        self._proposal_roi_crop_size = \
            [avod_config.avod_proposal_roi_crop_size] * 2
        self._positive_selection = avod_config.avod_positive_selection
        self._nms_size = avod_config.avod_nms_size
        self._nms_iou_threshold = avod_config.avod_nms_iou_thresh
        self._path_drop_probabilities = self._config.path_drop_probabilities
        self._box_rep = avod_config.avod_box_representation

        if self._box_rep not in [
                'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca'
        ]:
            raise ValueError('Invalid box representation', self._box_rep)

        # Create the RpnModel
        self._rpn_model = RpnModel(model_config, train_val_test, dataset)

        if train_val_test not in ["train", "val", "test"]:
            raise ValueError('Invalid train_val_test value,'
                             'should be one of ["train", "val", "test"]')
        self._train_val_test = train_val_test
        self._is_training = (self._train_val_test == 'train')

        self.sample_info = {}

    ####################################################################################
    # TODO PROJECT: scale the features to features with larger maximum values
    def scale_bev(self, bev_rois, img_rois):
        val_to_mul = tf.divide(self.max_img_feature_val,
                               self.max_bev_feature_val)
        return tf.multiply(bev_rois, val_to_mul)

    def scale_img(self, bev_rois, img_rois):
        val_to_mul = tf.divide(self.max_bev_feature_val,
                               self.max_img_feature_val)
        return tf.multiply(bev_rois, val_to_mul)

    ####################################################################################

    def build(self):
        rpn_model = self._rpn_model

        # Share the same prediction dict as RPN
        prediction_dict = rpn_model.build()

        top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]
        ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]

        class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]

        with tf.variable_scope('avod_projection'):

            if self._config.expand_proposals_xz > 0.0:

                expand_length = self._config.expand_proposals_xz

                # Expand anchors along x and z
                with tf.variable_scope('expand_xz'):
                    expanded_dim_x = top_anchors[:, 3] + expand_length
                    expanded_dim_z = top_anchors[:, 5] + expand_length

                    expanded_anchors = tf.stack([
                        top_anchors[:, 0], top_anchors[:, 1], top_anchors[:,
                                                                          2],
                        expanded_dim_x, top_anchors[:, 4], expanded_dim_z
                    ],
                                                axis=1)

                avod_projection_in = expanded_anchors

            else:
                avod_projection_in = top_anchors

            with tf.variable_scope('bev'):
                # Project top anchors into bev and image spaces
                # bev_proposal_boxes are boxes' x and z coordinate relative to bev_extents
                # bev_proposal_boxes_norm are normalized boxes in bev_extents' range
                bev_proposal_boxes, bev_proposal_boxes_norm = \
                    anchor_projector.project_to_bev(
                        avod_projection_in,
                        self.dataset.kitti_utils.bev_extents)

                # Reorder projected boxes into [y1, x1, y2, x2]
                bev_proposal_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes)
                bev_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes_norm)

            with tf.variable_scope('img'):
                image_shape = tf.cast(
                    tf.shape(
                        rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2],
                    tf.float32)
                img_proposal_boxes, img_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        avod_projection_in,
                        rpn_model.placeholders[RpnModel.PL_CALIB_P2],
                        image_shape)
                # Only reorder the normalized img
                img_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        img_proposal_boxes_norm)

        bev_feature_maps = rpn_model.bev_feature_maps
        img_feature_maps = rpn_model.img_feature_maps

        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):

            with tf.variable_scope('avod_path_drop'):

                img_mask = rpn_model.img_path_drop_mask
                bev_mask = rpn_model.bev_path_drop_mask

                img_feature_maps = tf.multiply(img_feature_maps, img_mask)

                bev_feature_maps = tf.multiply(bev_feature_maps, bev_mask)
        else:
            bev_mask = tf.constant(1.0)
            img_mask = tf.constant(1.0)

        # ROI Pooling
        with tf.variable_scope('avod_roi_pooling'):

            def get_box_indices(boxes):
                proposals_shape = boxes.get_shape().as_list()
                if any(dim is None for dim in proposals_shape):
                    proposals_shape = tf.shape(boxes)
                ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                multiplier = tf.expand_dims(
                    tf.range(start=0, limit=proposals_shape[0]), 1)
                return tf.reshape(ones_mat * multiplier, [-1])

            bev_boxes_norm_batches = tf.expand_dims(bev_proposal_boxes_norm,
                                                    axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            # tf_box_indices contains 1D tensor with size [num_boxes], each element specifies
            # batch index to whom this box belongs. Because the batch size here is 1, so it
            # doesn't matter
            # bev_rois is a 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]
            ####################################################################################
            # TODO PROJECT: set bev_feature_maps or img_feature_maps to zeros for testing
            # bev_feature_maps = tf.zeros_like(bev_feature_maps)
            # self.bev_feature_maps = tf.zeros_like(bev_feature_maps)
            # bev_feature_maps = self.bev_feature_maps
            ####################################################################################

            bev_rois = tf.image.crop_and_resize(
                bev_feature_maps,
                bev_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='bev_rois')
            # Do ROI Pooling on image
            img_rois = tf.image.crop_and_resize(
                img_feature_maps,
                img_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='img_rois')

            ####################################################################################
            # TODO PROJECT: create member variables for accessing
            # bev_rois4moe = tf.image.crop_and_resize(
            #     bev_feature_maps,
            #     bev_proposal_boxes_norm_tf_order,
            #     tf_box_indices,
            #     [28,28],
            #     name='bev_rois4moe')
            # # Do ROI Pooling on image
            # img_rois4moe = tf.image.crop_and_resize(
            #     img_feature_maps,
            #     img_proposal_boxes_norm_tf_order,
            #     tf_box_indices,
            #     [28,28],
            #     name='img_rois4moe')
            ####################################################################################

            ####################################################################################
            # TODO PROJECT: create member variables for accessing
            # self.bev_rois = bev_rois
            # self.img_rois = img_rois
            self.bev_boxes = bev_proposal_boxes_tf_order
            self.bev_boxes_norm = bev_proposal_boxes_norm
            self.img_boxes = img_proposal_boxes
            self.img_boxes_norm = img_proposal_boxes_norm
            # self.bev_mask = rpn_model.bev_path_drop_mask
            # self.img_mask = rpn_model.img_path_drop_mask
            ####################################################################################

            ####################################################################################
            # TODO PROJECT: scale the features to features with larger maximum values
            # self.max_img_feature_val = tf.reduce_max(img_rois, axis=None)
            # self.max_bev_feature_val = tf.reduce_max(bev_rois, axis=None)
#
# bev_rois_moe = tf.cond(tf.greater(self.max_img_feature_val, self.max_bev_feature_val),
#    lambda: self.scale_bev(bev_rois, img_rois),
#    lambda: bev_rois)
# img_rois_moe = tf.cond(tf.greater(self.max_bev_feature_val, self.max_img_feature_val),
#    lambda: self.scale_img(bev_rois, img_rois),
#    lambda: img_rois)

####################################################################################

####################################################################################
# TODO PROJECT: insert code here to add mixture of experts

# self._moe_model = MoeModel(img_rois, bev_rois, img_proposal_boxes, bev_proposal_boxes)
# self._moe_model = MoeModel(img_feature_maps, bev_feature_maps, img_proposal_boxes, bev_proposal_boxes)
# self._moe_model._set_up_input_pls()
# self.moe_prediction = self._moe_model.build()

####################################################################################
####################################################################################
# TODO PROJECT: weight the feature before average img and bev
# img_weights = tf.reshape(self.moe_prediction['img_weight'],[-1,1,1,1])
# bev_weights = tf.reshape(self.moe_prediction['bev_weight'],[-1,1,1,1])
# img_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32)
# bev_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32)
# weighted_img_rois = tf.multiply(img_weights,img_rois)
# weighted_bev_rois = tf.multiply(bev_weights,bev_rois)

####################################################################################
####################################################################################
# TODO PROJECT: create fused bev
        _, bev_mar_boxes_norm = cf.add_margin_to_regions(
            bev_proposal_boxes, self.dataset.kitti_utils.bev_extents)

        bev_pixels_loc = cf.bev_pixel_eq_1_loc(
            self._rpn_model._bev_preprocessed)

        max_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_hi
        min_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_lo
        num_slices = self.dataset.config.kitti_utils_config.bev_generator.slices.num_slices

        height_list = [
            min_height + (2 * x + 1) * (max_height - min_height) /
            (2.0 * num_slices) for x in range(num_slices)
        ]
        print("bev_preprocess shape: ",
              (self._rpn_model._bev_preprocessed).shape)

        velo_pc = cf.bev_pixel_loc_to_3d_velo(
            bev_pixels_loc,
            tf.shape(self._rpn_model._bev_preprocessed)[1:3], height_list,
            self.dataset.kitti_utils.bev_extents)
        print("PL_CALIB_P2 shape: ",
              self._rpn_model.placeholders[RpnModel.PL_CALIB_P2].shape)

        p_2d = anchor_projector.project_to_image_tensor(
            tf.transpose(tf.cast(velo_pc, tf.float32)),
            self._rpn_model.placeholders[RpnModel.PL_CALIB_P2])

        print("image feature maps [0] shape: ", img_feature_maps[0].shape)
        features_at_p_2d = tf.gather_nd(
            img_feature_maps[0], tf.cast(tf.round(tf.transpose(p_2d)),
                                         tf.int32))

        print("features_at_p_2d shape: ", features_at_p_2d.shape)
        new_bev = cf.create_fused_bev(
            tf.shape(self._rpn_model._bev_preprocessed), bev_pixels_loc,
            features_at_p_2d)
        # raise Exception("finish fused_bev generation!")

        self._new_bev_feature_extractor = feature_extractor_builder.get_extractor(
            self.model_config.layers_config.bev_feature_extractor)
        self.new_bev_feature_maps, self.new_bev_end_points = \
            self._new_bev_feature_extractor.build(
                new_bev,
                self._bev_pixel_size,
                self._is_training,
                scope='new_bev_vgg'
            )

        new_bev_rois = tf.image.crop_and_resize(
            self.new_bev_feature_maps,
            bev_proposal_boxes_norm_tf_order,
            tf_box_indices,
            self._proposal_roi_crop_size,
            name='new_bev_rois')

        ####################################################################################

        # Fully connected layers (Box Predictor)
        avod_layers_config = self.model_config.layers_config.avod_config

        # fc_output_layers = \
        # avod_fc_layers_builder.build(
        # layers_config=avod_layers_config,
        # input_rois=[bev_rois, img_rois],
        # input_weights=[bev_mask, img_mask],
        # num_final_classes=self._num_final_classes,
        # box_rep=self._box_rep,
        # top_anchors=top_anchors,
        # ground_plane=ground_plane,
        # is_training=self._is_training)
        ####################################################################################
        # TODO PROJECT: average img and bev features first and then concat with new bev
        rois_sum = tf.reduce_sum([bev_rois, img_rois], axis=0)
        rois_mean = tf.divide(rois_sum, tf.reduce_sum([bev_mask, img_mask]))
        fc_output_layers = \
            avod_fc_layers_builder.build(
                layers_config=avod_layers_config,
                input_rois=[rois_mean, new_bev_rois],
                input_weights=[1, img_mask],
                num_final_classes=self._num_final_classes,
                box_rep=self._box_rep,
                top_anchors=top_anchors,
                ground_plane=ground_plane,
                is_training=self._is_training)

        ####################################################################################

        ####################################################################################
        # TODO PROJECT: input weighted bev_rois and img_rois to output layer
        # fc_output_layers = \
        #     avod_fc_layers_builder.build(
        #         layers_config=avod_layers_config,
        #         input_rois=[weighted_bev_rois, weighted_img_rois],
        #         input_weights=[bev_mask * bev_weights, img_mask * img_weights],
        #         num_final_classes=self._num_final_classes,
        #         box_rep=self._box_rep,
        #         top_anchors=top_anchors,
        #         ground_plane=ground_plane,
        #         is_training=self._is_training)
        ####################################################################################


        all_cls_logits = \
            fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS]
        all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS]

        # This may be None
        all_angle_vectors = \
            fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS)

        with tf.variable_scope('softmax'):
            all_cls_softmax = tf.nn.softmax(all_cls_logits)

        ######################################################
        # Subsample mini_batch for the loss function
        ######################################################
        # Get the ground truth tensors
        anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS]
        if self._box_rep in ['box_3d', 'box_4ca']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
            orientations_gt = boxes_3d_gt[:, 6]
        elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
        else:
            raise NotImplementedError('Ground truth tensors not implemented')

        # Project anchor_gts to 2D bev
        with tf.variable_scope('avod_gt_projection'):
            bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev(
                anchors_gt, self.dataset.kitti_utils.bev_extents)

            bev_anchor_boxes_gt_tf_order = \
                anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt)

        with tf.variable_scope('avod_box_list'):
            # Convert to box_list format
            anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order)
            anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order)

        mb_mask, mb_class_label_indices, mb_gt_indices = \
            self.sample_mini_batch(
                anchor_box_list_gt=anchor_box_list_gt,
                anchor_box_list=anchor_box_list,
                class_labels=class_labels)

        # Create classification one_hot vector
        with tf.variable_scope('avod_one_hot_classes'):
            mb_classification_gt = tf.one_hot(
                mb_class_label_indices,
                depth=self._num_final_classes,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=(self._config.label_smoothing_epsilon /
                           self.dataset.num_classes))

        # TODO: Don't create a mini batch in test mode
        # Mask predictions
        with tf.variable_scope('avod_apply_mb_mask'):
            # Classification
            mb_classifications_logits = tf.boolean_mask(
                all_cls_logits, mb_mask)
            mb_classifications_softmax = tf.boolean_mask(
                all_cls_softmax, mb_mask)

            # Offsets
            mb_offsets = tf.boolean_mask(all_offsets, mb_mask)

            # Angle Vectors
            if all_angle_vectors is not None:
                mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask)
            else:
                mb_angle_vectors = None

        # Encode anchor offsets
        with tf.variable_scope('avod_encode_mb_anchors'):
            mb_anchors = tf.boolean_mask(top_anchors, mb_mask)

            if self._box_rep == 'box_3d':
                # Gather corresponding ground truth anchors for each mb sample
                mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices)
                mb_offsets_gt = anchor_encoder.tf_anchor_to_offset(
                    mb_anchors, mb_anchors_gt)

                # Gather corresponding ground truth orientation for each
                # mb sample
                mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices)
            elif self._box_rep in ['box_8c', 'box_8co']:

                # Get boxes_3d ground truth mini-batch and convert to box_8c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                if self._box_rep == 'box_8c':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt)
                elif self._box_rep == 'box_8co':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt)

                # Convert proposals: anchors -> box_3d -> box8c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_8c = \
                    box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

                # Get mini batch offsets
                mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask)
                mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets(
                    mb_boxes_8c, mb_boxes_8c_gt)

                # Flatten the offsets to a (N x 24) vector
                mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24])

            elif self._box_rep in ['box_4c', 'box_4ca']:

                # Get ground plane for box_4c conversion
                ground_plane = self._rpn_model.placeholders[
                    self._rpn_model.PL_GROUND_PLANE]

                # Convert gt boxes_3d -> box_4c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c(
                    mb_boxes_3d_gt, ground_plane)

                # Convert proposals: anchors -> box_3d -> box_4c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_4c = \
                    box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                       ground_plane)

                # Get mini batch
                mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask)
                mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets(
                    mb_boxes_4c, mb_boxes_4c_gt)

                if self._box_rep == 'box_4ca':
                    # Gather corresponding ground truth orientation for each
                    # mb sample
                    mb_orientations_gt = tf.gather(orientations_gt,
                                                   mb_gt_indices)

            else:
                raise NotImplementedError(
                    'Anchor encoding not implemented for', self._box_rep)

        ######################################################
        # ROI summary images
        ######################################################
        avod_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size
        with tf.variable_scope('bev_avod_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(
                bev_proposal_boxes_norm_tf_order, mb_mask)
            mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(
                self._rpn_model._bev_preprocessed, mb_bev_anchors_norm,
                mb_bev_box_indices, (32, 32))

            bev_input_roi_summary_images = tf.split(bev_input_rois,
                                                    self._bev_depth,
                                                    axis=3)
            tf.summary.image('bev_avod_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=avod_mini_batch_size)

        with tf.variable_scope('img_avod_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(
                img_proposal_boxes_norm_tf_order, mb_mask)
            mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(
                self._rpn_model._img_preprocessed, mb_img_anchors_norm,
                mb_img_box_indices, (32, 32))

            tf.summary.image('img_avod_rois',
                             img_input_rois,
                             max_outputs=avod_mini_batch_size)

        ######################################################
        # Final Predictions
        ######################################################
        # Get orientations from angle vectors
        if all_angle_vectors is not None:
            with tf.variable_scope('avod_orientation'):
                all_orientations = \
                    orientation_encoder.tf_angle_vector_to_orientation(
                        all_angle_vectors)

        # Apply offsets to regress proposals
        with tf.variable_scope('avod_regression'):
            if self._box_rep == 'box_3d':
                prediction_anchors = \
                    anchor_encoder.offset_to_anchor(top_anchors,
                                                    all_offsets)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Reshape the 24-dim regressed offsets to (N x 3 x 8)
                reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8])
                # Given the offsets, get the boxes_8c
                prediction_boxes_8c = \
                    box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c,
                                                        reshaped_offsets)
                # Convert corners back to box3D
                prediction_boxes_3d = \
                    box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c)

                # Convert the box_3d to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            elif self._box_rep in ['box_4c', 'box_4ca']:
                # Convert predictions box_4c -> box_3d
                prediction_boxes_4c = \
                    box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c,
                                                        all_offsets)

                prediction_boxes_3d = \
                    box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c,
                                                       ground_plane)

                # Convert to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            else:
                raise NotImplementedError('Regression not implemented for',
                                          self._box_rep)

        # Apply Non-oriented NMS in BEV
        with tf.variable_scope('avod_nms'):
            bev_extents = self.dataset.kitti_utils.bev_extents

            with tf.variable_scope('bev_projection'):
                # Project predictions into BEV
                avod_bev_boxes, _ = anchor_projector.project_to_bev(
                    prediction_anchors, bev_extents)
                avod_bev_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        avod_bev_boxes)

            # Get top score from second column onward
            all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1)

            # Apply NMS in BEV
            nms_indices = tf.image.non_max_suppression(
                avod_bev_boxes_tf_order,
                all_top_scores,
                max_output_size=self._nms_size,
                iou_threshold=self._nms_iou_threshold)

            # Gather predictions from NMS indices
            top_classification_logits = tf.gather(all_cls_logits, nms_indices)
            top_classification_softmax = tf.gather(all_cls_softmax,
                                                   nms_indices)
            top_prediction_anchors = tf.gather(prediction_anchors, nms_indices)

            if self._box_rep == 'box_3d':
                top_orientations = tf.gather(all_orientations, nms_indices)

            elif self._box_rep in ['box_8c', 'box_8co']:
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_8c = tf.gather(prediction_boxes_8c,
                                                    nms_indices)

            elif self._box_rep == 'box_4c':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)

            elif self._box_rep == 'box_4ca':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)
                top_orientations = tf.gather(all_orientations, nms_indices)

            else:
                raise NotImplementedError('NMS gather not implemented for',
                                          self._box_rep)

        if self._train_val_test in ['train', 'val']:
            # Additional entries are added to the shared prediction_dict
            # Mini batch predictions
            prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \
                mb_classifications_logits
            prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \
                mb_classifications_softmax
            prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets

            # Mini batch ground truth
            prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \
                mb_classification_gt
            prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt

            # Top NMS predictions
            prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \
                top_classification_logits
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax

            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

            # Mini batch predictions (for debugging)
            prediction_dict[self.PRED_MB_MASK] = mb_mask
            # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask
            prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \
                mb_class_label_indices

            # All predictions (for debugging)
            prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \
                all_cls_logits
            prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets

            # Path drop masks (for debugging)
            prediction_dict['bev_mask'] = bev_mask
            prediction_dict['img_mask'] = img_mask

        else:
            # self._train_val_test == 'test'
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

        if self._box_rep == 'box_3d':
            prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt
            prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt
            prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

            # For debugging
            prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors

        # 8c means 8 corners
        elif self._box_rep in ['box_8c', 'box_8co']:
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d

            # Store the corners before converting for visualization purposes
            prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c

        # 4c means 4 corners
        elif self._box_rep == 'box_4c':
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c

        elif self._box_rep == 'box_4ca':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c
            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

        else:
            raise NotImplementedError('Prediction dict not implemented for',
                                      self._box_rep)

        # prediction_dict[self.PRED_MAX_IOUS] = max_ious
        # prediction_dict[self.PRED_ALL_IOUS] = all_ious

        return prediction_dict

    def sample_mini_batch(self, anchor_box_list_gt, anchor_box_list,
                          class_labels):

        with tf.variable_scope('avod_create_mb_mask'):
            # Get IoU for every anchor
            all_ious = box_list_ops.iou(anchor_box_list_gt, anchor_box_list)
            max_ious = tf.reduce_max(all_ious, axis=0)
            max_iou_indices = tf.argmax(all_ious, axis=0)

            # Sample a pos/neg mini-batch from anchors with highest IoU match
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            mb_mask, mb_pos_mask = mini_batch_utils.sample_avod_mini_batch(
                max_ious)
            mb_class_label_indices = mini_batch_utils.mask_class_label_indices(
                mb_pos_mask, mb_mask, max_iou_indices, class_labels)

            mb_gt_indices = tf.boolean_mask(max_iou_indices, mb_mask)

        return mb_mask, mb_class_label_indices, mb_gt_indices

    def create_feed_dict(self):
        feed_dict = self._rpn_model.create_feed_dict()
        self.sample_info = self._rpn_model.sample_info
        return feed_dict

    def loss(self, prediction_dict):
        # Note: The loss should be using mini-batch values only
        loss_dict, rpn_loss = self._rpn_model.loss(prediction_dict)
        losses_output = avod_loss_builder.build(self, prediction_dict)

        classification_loss = \
            losses_output[avod_loss_builder.KEY_CLASSIFICATION_LOSS]

        final_reg_loss = losses_output[avod_loss_builder.KEY_REGRESSION_LOSS]

        avod_loss = losses_output[avod_loss_builder.KEY_AVOD_LOSS]

        offset_loss_norm = \
            losses_output[avod_loss_builder.KEY_OFFSET_LOSS_NORM]

        loss_dict.update({self.LOSS_FINAL_CLASSIFICATION: classification_loss})
        loss_dict.update({self.LOSS_FINAL_REGRESSION: final_reg_loss})

        # Add localization and orientation losses to loss dict for plotting
        loss_dict.update({self.LOSS_FINAL_LOCALIZATION: offset_loss_norm})

        ang_loss_loss_norm = losses_output.get(
            avod_loss_builder.KEY_ANG_LOSS_NORM)
        if ang_loss_loss_norm is not None:
            loss_dict.update({self.LOSS_FINAL_ORIENTATION: ang_loss_loss_norm})

        with tf.variable_scope('model_total_loss'):
            total_loss = rpn_loss + avod_loss
        ################################################################################
        # TODO PROJECT: plot weight
        ################################################################################

        return loss_dict, total_loss
Ejemplo n.º 16
0
def test(model_config, eval_config,
              dataset_config, data_split,
              ckpt_indices):

    # Overwrite the defaults
    dataset_config = config_builder.proto_to_obj(dataset_config)

    dataset_config.data_split = data_split
    dataset_config.data_split_dir = 'training'
    if data_split == 'test':
        dataset_config.data_split_dir = 'testing'

    eval_config.eval_mode = 'test'
    eval_config.evaluate_repeatedly = False

    dataset_config.has_labels = False
    # Enable this to see the actually memory being used
    eval_config.allow_gpu_mem_growth = True

    eval_config = config_builder.proto_to_obj(eval_config)
    # Grab the checkpoint indices to evaluate
    eval_config.ckpt_indices = ckpt_indices

    # Remove augmentation during evaluation in test mode
    dataset_config.aug_list = []

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Setup the model
    model_name = model_config.model_name
    # Overwrite repeated field
    model_config = config_builder.proto_to_obj(model_config)
    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    with tf.Graph().as_default():
        if model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=eval_config.eval_mode,
                              dataset=dataset)
        elif model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=eval_config.eval_mode,
                             dataset=dataset)
        else:
            raise ValueError('Invalid model name {}'.format(model_name))

        #model_evaluator = Evaluator(model, dataset_config, eval_config)
        #model_evaluator.run_latest_checkpoints()

        # Create a variable tensor to hold the global step
        global_step_tensor = tf.Variable(0, trainable=False, name='global_step')

        allow_gpu_mem_growth = eval_config.allow_gpu_mem_growth
        if allow_gpu_mem_growth:
            # GPU memory config
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = allow_gpu_mem_growth
            _sess = tf.Session(config=config)
        else:
            _sess = tf.Session()

        _prediction_dict = model.build()
        _saver = tf.train.Saver()

        trainer_utils.load_checkpoints(model_config.paths_config.checkpoint_dir,
                                       _saver)
        num_checkpoints = len(_saver.last_checkpoints)
        print("test:",num_checkpoints)
        checkpoint_to_restore = _saver.last_checkpoints[num_checkpoints-1]

        _saver.restore(_sess, checkpoint_to_restore)

        num_samples = model.dataset.num_samples
        num_valid_samples = 0

        current_epoch = model.dataset.epochs_completed
        while current_epoch == model.dataset.epochs_completed:
            # Keep track of feed_dict speed
            start_time = time.time()
            feed_dict = model.create_feed_dict()
            feed_dict_time = time.time() - start_time

            # Get sample name from model
            sample_name = model.sample_info['sample_name']

            num_valid_samples += 1
            print("Step: {} / {}, Inference on sample {}".format(
                num_valid_samples, num_samples,
                sample_name))

            print("test mode")
            inference_start_time = time.time()
            # Don't calculate loss or run summaries for test
            predictions = _sess.run(_prediction_dict,
                                         feed_dict=feed_dict)
            inference_time = time.time() - inference_start_time

            print("inference time:", inference_time)

            predictions_and_scores = get_avod_predicted_boxes_3d_and_scores(predictions)

            #print(predictions_and_scores)
            #im_path = os.path.join(dataset_dir, 'training/image_2/{:06d}.png'.format(img_idx))
            #im = cv2.imread(im_path)
            #cv2.imshow('result',im)
            #cv2.waitKey(30)

            prediction_boxes_3d = predictions_and_scores[:, 0:7]
            prediction_scores = predictions_and_scores[:, 7]
            prediction_class_indices = predictions_and_scores[:, 8]
            gt_classes = ['Car']
            fig_size = (10, 6.1)

            avod_score_threshold = 0.1
            if len(prediction_boxes_3d) > 0:

                # Apply score mask
                avod_score_mask = prediction_scores >= avod_score_threshold
                prediction_boxes_3d = prediction_boxes_3d[avod_score_mask]
                prediction_scores = prediction_scores[avod_score_mask]
                prediction_class_indices = \
                    prediction_class_indices[avod_score_mask]

            if len(prediction_boxes_3d) > 0:

                dataset_dir = model.dataset.dataset_dir
                sample_name = (model.dataset.sample_names[model.dataset._index_in_epoch - 1])
                img_idx = int(sample_name)
                print("frame_index",img_idx)
                image_path = model.dataset.get_rgb_image_path(sample_name)
                image = Image.open(image_path)
                image_size = image.size

                if model.dataset.has_labels:
                    gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx)
                else:
                    gt_objects = []
                filtered_gt_objs = model.dataset.kitti_utils.filter_labels(
                    gt_objects, classes=gt_classes)

                stereo_calib = calib_utils.read_calibration(dataset.calib_dir,
                                                            img_idx)
                calib_p2 = stereo_calib.p2
                # Project the 3D box predictions to image space
                image_filter = []
                final_boxes_2d = []
                for i in range(len(prediction_boxes_3d)):
                    box_3d = prediction_boxes_3d[i, 0:7]
                    img_box = box_3d_projector.project_to_image_space(
                        box_3d, calib_p2,
                        truncate=True, image_size=image_size,
                        discard_before_truncation=False)
                    if img_box is not None:
                        image_filter.append(True)
                        final_boxes_2d.append(img_box)
                    else:
                        image_filter.append(False)
                final_boxes_2d = np.asarray(final_boxes_2d)
                final_prediction_boxes_3d = prediction_boxes_3d[image_filter]
                final_scores = prediction_scores[image_filter]
                final_class_indices = prediction_class_indices[image_filter]

                num_of_predictions = final_boxes_2d.shape[0]

                # Convert to objs
                final_prediction_objs = \
                    [box_3d_encoder.box_3d_to_object_label(
                        prediction, obj_type='Prediction')
                        for prediction in final_prediction_boxes_3d]
                for (obj, score) in zip(final_prediction_objs, final_scores):
                    obj.score = score

                pred_fig, pred_2d_axes, pred_3d_axes = \
                    vis_utils.visualization(dataset.rgb_image_dir,
                                            img_idx,
                                            display=False,
                                            fig_size=fig_size)

                draw_predictions(filtered_gt_objs,
                                 calib_p2,
                                 num_of_predictions,
                                 final_prediction_objs,
                                 final_class_indices,
                                 final_boxes_2d,
                                 pred_2d_axes,
                                 pred_3d_axes,
                                 True,
                                 True,
                                 gt_classes,
                                 False)

                #cv2.imshow('result',pred_fig)
                print(type(pred_fig))
                pred_fig.canvas.draw()
                img = np.fromstring(pred_fig.canvas.tostring_rgb(), dtype=np.uint8, sep='')
                img  = img.reshape(pred_fig.canvas.get_width_height()[::-1] + (3,))
                cv2.imshow('result',img)

                #draw bird view
                kitti_utils = model.dataset.kitti_utils
                print(img.shape[0:2])
                point_cloud = kitti_utils.get_point_cloud(
                    'lidar', img_idx, (370, 1242))
                ground_plane = kitti_utils.get_ground_plane(sample_name)
                bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane)

                density_map = np.array(bev_images.get("density_map"))
                _, box_points_norm = box_3d_projector.project_to_bev(
                    final_prediction_boxes_3d, [[-40, 40], [0, 70]])
                density_map = draw_boxes(density_map, box_points_norm)
                cv2.imshow('lidar',density_map)
                cv2.waitKey(-1)
Ejemplo n.º 17
0
    def test_create_path_drop_masks(self):
        # Tests creating path drop choices
        # based on the given probabilities

        rpn_model = RpnModel(self.model_config,
                             train_val_test="val",
                             dataset=self.dataset)
        rpn_model.build()
        ##################################
        # Test-Case 1 : Keep img, Keep bev
        ##################################
        p_img = tf.constant(0.6)
        p_bev = tf.constant(0.85)

        # Set the random numbers for testing purposes
        rand_choice = [0.53, 0.83, 0.05]
        rand_choice_tensor = tf.convert_to_tensor(rand_choice)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 1.0)
            np.testing.assert_array_equal(bev_mask_out, 1.0)

        ##################################
        # Test-Case 2 : Kill img, Keep bev
        ##################################
        p_img = tf.constant(0.2)
        p_bev = tf.constant(0.85)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 0.0)
            np.testing.assert_array_equal(bev_mask_out, 1.0)

        ##################################
        # Test-Case 3 : Keep img, Kill bev
        ##################################
        p_img = tf.constant(0.9)
        p_bev = tf.constant(0.1)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 1.0)
            np.testing.assert_array_equal(bev_mask_out, 0.0)

        ##############################################
        # Test-Case 4 : Kill img, Kill bev, third flip
        ##############################################
        p_img = tf.constant(0.0)
        p_bev = tf.constant(0.1)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            np.testing.assert_array_equal(img_mask_out, 0.0)
            # Because of the third condition, we expect to be keeping bev
            np.testing.assert_array_equal(bev_mask_out, 1.0)

        ##############################################
        # Test-Case 5 : Kill img, Kill bev, third flip
        ##############################################
        # Let's flip the third chance and keep img instead
        rand_choice = [0.53, 0.83, 0.61]
        rand_choice_tensor = tf.convert_to_tensor(rand_choice)
        p_img = tf.constant(0.0)
        p_bev = tf.constant(0.1)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        with self.test_session():
            img_mask_out = img_mask.eval()
            bev_mask_out = bev_mask.eval()
            # Because of the third condition, we expect to be keeping img
            np.testing.assert_array_equal(img_mask_out, 1.0)
            np.testing.assert_array_equal(bev_mask_out, 0.0)
Ejemplo n.º 18
0
    def test_load_model_weights(self):
        # Tests loading weights

        train_val_test = 'train'

        # Overwrite the training iterations
        self.train_config.max_iterations = 1
        self.train_config.overwrite_checkpoints = True

        with tf.Graph().as_default():
            model = RpnModel(self.model_config,
                             train_val_test=train_val_test,
                             dataset=self.dataset)
            trainer.train(model, self.train_config)

            paths_config = self.model_config.paths_config
            rpn_checkpoint_dir = paths_config.checkpoint_dir

            # load the weights back in
            init_op = tf.global_variables_initializer()

            saver = tf.train.Saver()
            with tf.Session() as sess:
                sess.run(init_op)

                trainer_utils.load_checkpoints(rpn_checkpoint_dir, saver)
                checkpoint_to_restore = saver.last_checkpoints[-1]
                trainer_utils.load_model_weights(sess, checkpoint_to_restore)

                rpn_vars = slim.get_model_variables()
                rpn_weights = sess.run(rpn_vars)
                self.assertGreater(len(rpn_weights), 0,
                                   msg='Loaded RPN weights are empty')

        with tf.Graph().as_default():
            model = AvodModel(self.model_config,
                              train_val_test=train_val_test,
                              dataset=self.dataset)
            model.build()

            # load the weights back in
            init_op = tf.global_variables_initializer()

            saver = tf.train.Saver()
            with tf.Session() as sess:
                sess.run(init_op)

                trainer_utils.load_checkpoints(rpn_checkpoint_dir, saver)
                checkpoint_to_restore = saver.last_checkpoints[-1]
                trainer_utils.load_model_weights(sess, checkpoint_to_restore)

                avod_vars = slim.get_model_variables()
                avod_weights = sess.run(avod_vars)

                # AVOD weights should include both RPN + AVOD weights
                self.assertGreater(len(avod_weights),
                                   len(rpn_weights),
                                   msg='Expected more weights for AVOD')

                # grab weights corresponding to RPN by index
                # since the model variables are ordered
                rpn_len = len(rpn_weights)
                loaded_rpn_vars = avod_vars[0:rpn_len]
                rpn_weights_reload = sess.run(loaded_rpn_vars)

                # Make sure the reloaded weights match the originally
                # loaded weights
                for i in range(rpn_len):
                    np.testing.assert_array_equal(rpn_weights_reload[i],
                                                  rpn_weights[i])
Ejemplo n.º 19
0
def evaluate(model_config, eval_config, dataset_config):

    # Parse eval config
    eval_mode = eval_config.eval_mode
    if eval_mode not in ['val', 'test']:
        raise ValueError('Evaluation mode can only be set to `val` or `test`')
    evaluate_repeatedly = eval_config.evaluate_repeatedly

    # Parse dataset config
    data_split = dataset_config.data_split
    if data_split == 'train':
        dataset_config.data_split_dir = 'training'
        dataset_config.has_labels = True

    elif data_split.startswith('val'):
        dataset_config.data_split_dir = 'training'

        # Don't load labels for val split when running in test mode
        if eval_mode == 'val':
            dataset_config.has_labels = True
        elif eval_mode == 'test':
            dataset_config.has_labels = False

    elif data_split == 'test':
        dataset_config.data_split_dir = 'testing'
        dataset_config.has_labels = False

    else:
        raise ValueError('Invalid data split', data_split)

    # Convert to object to overwrite repeated fields
    dataset_config = config_builder.proto_to_obj(dataset_config)

    # Remove augmentation during evaluation
    dataset_config.aug_list = []

    # Build the dataset object
    dataset = DatasetBuilder.build_kitti_dataset(dataset_config,
                                                 use_defaults=False)

    # Setup the model
    model_name = model_config.model_name

    # Convert to object to overwrite repeated fields
    model_config = config_builder.proto_to_obj(model_config)

    # Switch path drop off during evaluation
    model_config.path_drop_probabilities = [1.0, 1.0]

    with tf.Graph().as_default():
        if model_name == 'avod_model':
            model = AvodModel(model_config,
                              train_val_test=eval_mode,
                              dataset=dataset)
        elif model_name == 'rpn_model':
            model = RpnModel(model_config,
                             train_val_test=eval_mode,
                             dataset=dataset)
        else:
            raise ValueError('Invalid model name {}'.format(model_name))

        model_evaluator = Evaluator(model, dataset_config, eval_config)

        if evaluate_repeatedly:
            model_evaluator.repeated_checkpoint_run()
        else:
            model_evaluator.run_latest_checkpoints()
Ejemplo n.º 20
0
    def test_path_drop_input_multiplication(self):
        # Tests the result of final image/bev inputs
        # based on the path drop decisions

        rpn_model = RpnModel(self.model_config,
                             train_val_test="val",
                             dataset=self.dataset)
        rpn_model.build()
        # Shape of input feature map
        dummy_img_feature_shape = [1, 30, 50, 2]
        random_values = np.random.randint(low=1.0,
                                          high=256.0,
                                          size=2).astype(np.float32)

        dummy_img_feature_map = tf.fill(dummy_img_feature_shape,
                                        random_values[0])
        # Assume both features map are the same size, this is not
        # the case inside the network
        dummy_bev_feature_map = tf.fill(dummy_img_feature_shape,
                                        random_values[1])

        ##################################
        # Test-Case 1 : Keep img, Kill bev
        ##################################
        exp_img_input = np.full(dummy_img_feature_shape, random_values[0])
        exp_bev_input = np.full(dummy_img_feature_shape, 0.0)

        p_img = tf.constant(0.6)
        p_bev = tf.constant(0.4)

        # Set the random numbers for testing purposes
        rand_choice = [0.53, 0.83, 0.05]
        rand_choice_tensor = tf.convert_to_tensor(rand_choice)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        final_img_input = tf.multiply(dummy_img_feature_map,
                                      img_mask)

        final_bev_input = tf.multiply(dummy_bev_feature_map,
                                      bev_mask)

        with self.test_session():
            final_img_input_out = final_img_input.eval()
            final_bev_input_out = final_bev_input.eval()
            np.testing.assert_array_equal(final_img_input_out,
                                          exp_img_input)
            np.testing.assert_array_equal(final_bev_input_out,
                                          exp_bev_input)

        ##################################
        # Test-Case 2 : Kill img, Keep bev
        ##################################
        exp_img_input = np.full(dummy_img_feature_shape, 0)
        exp_bev_input = np.full(dummy_img_feature_shape, random_values[1])

        p_img = tf.constant(0.4)
        p_bev = tf.constant(0.9)

        img_mask, bev_mask = rpn_model.create_path_drop_masks(
            p_img, p_bev, rand_choice_tensor)

        final_img_input = tf.multiply(dummy_img_feature_map,
                                      img_mask)

        final_bev_input = tf.multiply(dummy_bev_feature_map,
                                      bev_mask)

        with self.test_session():
            final_img_input_out = final_img_input.eval()
            final_bev_input_out = final_bev_input.eval()
            np.testing.assert_array_equal(final_img_input_out,
                                          exp_img_input)
            np.testing.assert_array_equal(final_bev_input_out,
                                          exp_bev_input)