예제 #1
0
    def test_offset_tensor_to_anchor(self):

        # anchor format is [x, y, z, dim_x, dim_y, dim_z]
        anchors = np.asarray([[1, 2, 3, 4, 6, 5], [0, 0, 0, 2, 3, 1]],
                             dtype=np.float32)

        anchor_tensor = \
            tf.convert_to_tensor(anchors, dtype=tf.float32)

        # anchor offset prediction is [tx, ty, tz, tdim_x, tdim_y, tdim_z]
        anchor_offsets = np.array([[0.5, 0.02, 0.01, 0.1, 0.4, 0.03],
                                   [0.04, 0.1, 0.03, 0.001, 0.3, 0.03]],
                                  dtype=np.float32)

        anchor_offset_tensor = \
            tf.convert_to_tensor(anchor_offsets, dtype=tf.float32)

        expected_anchors = np.array([[3.0, 2.12, 3.05, 4.420, 8.9509, 5.152],
                                     [0.08, 0.3, 0.03, 2.002, 4.05, 1.03]],
                                    dtype=np.float32)

        anchors_tensor = anchor_encoder.offset_to_anchor(
            anchor_tensor, anchor_offset_tensor)

        sess = tf.Session()
        with sess.as_default():
            anchors = anchors_tensor.eval()

            np.testing.assert_almost_equal(anchors,
                                           expected_anchors,
                                           decimal=3)
예제 #2
0
    def build(self):

        # Setup input placeholders
        self._set_up_input_pls()

        # Setup feature extractors
        self._set_up_feature_extractors()

        bev_proposal_input = self.bev_bottleneck
        img_proposal_input = self.img_bottleneck

        fusion_mean_div_factor = 2.0

        # If both img and bev probabilites are set to 1.0, don't do
        # path drop.
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):
            with tf.variable_scope('rpn_path_drop'):

                random_values = tf.random_uniform(shape=[3],
                                                  minval=0.0,
                                                  maxval=1.0)

                img_mask, bev_mask = self.create_path_drop_masks(
                    self._path_drop_probabilities[0],
                    self._path_drop_probabilities[1], random_values)

                img_proposal_input = tf.multiply(img_proposal_input, img_mask)

                bev_proposal_input = tf.multiply(bev_proposal_input, bev_mask)

                self.img_path_drop_mask = img_mask
                self.bev_path_drop_mask = bev_mask

                # Overwrite the division factor
                fusion_mean_div_factor = img_mask + bev_mask

        with tf.variable_scope('proposal_roi_pooling'):

            with tf.variable_scope('box_indices'):

                def get_box_indices(boxes):
                    proposals_shape = boxes.get_shape().as_list()
                    if any(dim is None for dim in proposals_shape):
                        proposals_shape = tf.shape(boxes)
                    ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                    multiplier = tf.expand_dims(
                        tf.range(start=0, limit=proposals_shape[0]), 1)
                    return tf.reshape(ones_mat * multiplier, [-1])

                bev_boxes_norm_batches = tf.expand_dims(
                    self._bev_anchors_norm_pl, axis=0)

                # These should be all 0's since there is only 1 image
                tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_proposal_rois = tf.image.crop_and_resize(
                bev_proposal_input, self._bev_anchors_norm_pl, tf_box_indices,
                self._proposal_roi_crop_size)

            # Do ROI Pooling on image
            img_proposal_rois = tf.image.crop_and_resize(
                img_proposal_input, self._img_anchors_norm_pl, tf_box_indices,
                self._proposal_roi_crop_size)

        with tf.variable_scope('proposal_roi_fusion'):
            rpn_fusion_out = None
            if self._fusion_method == 'mean':
                tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
                rpn_fusion_out = tf.divide(tf_features_sum,
                                           fusion_mean_div_factor)
            elif self._fusion_method == 'concat':
                rpn_fusion_out = tf.concat(
                    [bev_proposal_rois, img_proposal_rois], axis=3)
            ###############################
            ###############################
            ##   Chenye  Netgate begin   ##
            ###############################
            elif self._fusion_method == 'netgate':
                print("!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!")
                print("bev_proposal_rois is:", bev_proposal_rois)
                rpn_fusion_out = chenye_netgate.netgate(
                    bev_proposal_rois, img_proposal_rois, self._is_training)
            ################################
            ##   Chenye  Netgate end      ##
            ################################

            else:
                raise ValueError('Invalid fusion method', self._fusion_method)

        # TODO: move this section into an separate AnchorPredictor class
        with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):
            tensor_in = rpn_fusion_out

            # Parse rpn layers config
            layers_config = self._config.layers_config.rpn_config
            l2_weight_decay = layers_config.l2_weight_decay

            if l2_weight_decay > 0:
                weights_regularizer = slim.l2_regularizer(l2_weight_decay)
            else:
                weights_regularizer = None

            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=weights_regularizer):
                # Use conv2d instead of fully_connected layers.

                cls_fc6 = slim.conv2d(tensor_in,
                                      layers_config.cls_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='cls_fc6')

                cls_fc6_drop = slim.dropout(cls_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc6_drop')

                cls_fc7 = slim.conv2d(cls_fc6_drop,
                                      layers_config.cls_fc7, [1, 1],
                                      scope='cls_fc7')

                cls_fc7_drop = slim.dropout(cls_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc7_drop')

                cls_fc8 = slim.conv2d(cls_fc7_drop,
                                      2, [1, 1],
                                      activation_fn=None,
                                      scope='cls_fc8')

                objectness = tf.squeeze(cls_fc8, [1, 2],
                                        name='cls_fc8/squeezed')

                # Use conv2d instead of fully_connected layers.
                reg_fc6 = slim.conv2d(tensor_in,
                                      layers_config.reg_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='reg_fc6')

                reg_fc6_drop = slim.dropout(reg_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc6_drop')

                reg_fc7 = slim.conv2d(reg_fc6_drop,
                                      layers_config.reg_fc7, [1, 1],
                                      scope='reg_fc7')

                reg_fc7_drop = slim.dropout(reg_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc7_drop')

                reg_fc8 = slim.conv2d(reg_fc7_drop,
                                      6, [1, 1],
                                      activation_fn=None,
                                      scope='reg_fc8')

                offsets = tf.squeeze(reg_fc8, [1, 2], name='reg_fc8/squeezed')

        # Histogram summaries
        with tf.variable_scope('histograms_feature_extractor'):
            with tf.variable_scope('bev_vgg'):
                for end_point in self.bev_end_points:
                    tf.summary.histogram(end_point,
                                         self.bev_end_points[end_point])

            with tf.variable_scope('img_vgg'):
                for end_point in self.img_end_points:
                    tf.summary.histogram(end_point,
                                         self.img_end_points[end_point])

        with tf.variable_scope('histograms_rpn'):
            with tf.variable_scope('anchor_predictor'):
                fc_layers = [
                    cls_fc6, cls_fc7, cls_fc8, objectness, reg_fc6, reg_fc7,
                    reg_fc8, offsets
                ]
                for fc_layer in fc_layers:
                    # fix the name to avoid tf warnings
                    tf.summary.histogram(fc_layer.name.replace(':', '_'),
                                         fc_layer)

        # Return the proposals
        with tf.variable_scope('proposals'):
            anchors = self.placeholders[self.PL_ANCHORS]

            # Decode anchor regression offsets
            with tf.variable_scope('decoding'):
                regressed_anchors = anchor_encoder.offset_to_anchor(
                    anchors, offsets)

            with tf.variable_scope('bev_projection'):
                _, bev_proposal_boxes_norm = anchor_projector.project_to_bev(
                    regressed_anchors, self._bev_extents)

            with tf.variable_scope('softmax'):
                objectness_softmax = tf.nn.softmax(objectness)

            with tf.variable_scope('nms'):
                objectness_scores = objectness_softmax[:, 1]

                # Do NMS on regressed anchors
                top_indices = tf.image.non_max_suppression(
                    bev_proposal_boxes_norm,
                    objectness_scores,
                    max_output_size=self._nms_size,
                    iou_threshold=self._nms_iou_thresh)

                top_anchors = tf.gather(regressed_anchors, top_indices)
                top_objectness_softmax = tf.gather(objectness_scores,
                                                   top_indices)
                # top_offsets = tf.gather(offsets, top_indices)
                # top_objectness = tf.gather(objectness, top_indices)

        # Get mini batch
        all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS]
        all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS]
        all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES]

        with tf.variable_scope('mini_batch'):
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            mini_batch_mask, _ = \
                mini_batch_utils.sample_rpn_mini_batch(all_ious_gt)

        # ROI summary images
        rpn_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size
        with tf.variable_scope('bev_rpn_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_bev_box_indices = tf.zeros_like(tf.boolean_mask(
                all_classes_gt, mini_batch_mask),
                                               dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(self._bev_preprocessed,
                                                      mb_bev_anchors_norm,
                                                      mb_bev_box_indices,
                                                      (32, 32))

            bev_input_roi_summary_images = tf.split(bev_input_rois,
                                                    self._bev_depth,
                                                    axis=3)
            tf.summary.image('bev_rpn_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=rpn_mini_batch_size)

        with tf.variable_scope('img_rpn_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_img_box_indices = tf.zeros_like(tf.boolean_mask(
                all_classes_gt, mini_batch_mask),
                                               dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(self._img_preprocessed,
                                                      mb_img_anchors_norm,
                                                      mb_img_box_indices,
                                                      (32, 32))

            tf.summary.image('img_rpn_rois',
                             img_input_rois,
                             max_outputs=rpn_mini_batch_size)

        # Ground Truth Tensors
        with tf.variable_scope('one_hot_classes'):

            # Anchor classification ground truth
            # Object / Not Object
            min_pos_iou = \
                self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0]

            objectness_classes_gt = tf.cast(tf.greater_equal(
                all_ious_gt, min_pos_iou),
                                            dtype=tf.int32)
            objectness_gt = tf.one_hot(
                objectness_classes_gt,
                depth=2,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=self._config.label_smoothing_epsilon)

        # Mask predictions for mini batch
        with tf.variable_scope('prediction_mini_batch'):
            objectness_masked = tf.boolean_mask(objectness, mini_batch_mask)
            offsets_masked = tf.boolean_mask(offsets, mini_batch_mask)

        with tf.variable_scope('ground_truth_mini_batch'):
            objectness_gt_masked = tf.boolean_mask(objectness_gt,
                                                   mini_batch_mask)
            offsets_gt_masked = tf.boolean_mask(all_offsets_gt,
                                                mini_batch_mask)

        # Specify the tensors to evaluate
        predictions = dict()

        # Temporary predictions for debugging
        # predictions['anchor_ious'] = anchor_ious
        # predictions['anchor_offsets'] = all_offsets_gt

        if self._train_val_test in ['train', 'val']:
            # All anchors
            predictions[self.PRED_ANCHORS] = anchors

            # Mini-batch masks
            predictions[self.PRED_MB_MASK] = mini_batch_mask
            # Mini-batch predictions
            predictions[self.PRED_MB_OBJECTNESS] = objectness_masked
            predictions[self.PRED_MB_OFFSETS] = offsets_masked

            # Mini batch ground truth
            predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked
            predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked

            # Proposals after nms
            predictions[self.PRED_TOP_INDICES] = top_indices
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        else:
            # self._train_val_test == 'test'
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        return predictions, bev_proposal_rois, bev_proposal_rois
예제 #3
0
    def build(self):
        rpn_model = self._rpn_model

        # Share the same prediction dict as RPN
        prediction_dict = rpn_model.build()

        top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]
        ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]

        class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]

        with tf.variable_scope('avod_projection'):

            if self._config.expand_proposals_xz > 0.0:

                expand_length = self._config.expand_proposals_xz

                # Expand anchors along x and z
                with tf.variable_scope('expand_xz'):
                    expanded_dim_x = top_anchors[:, 3] + expand_length
                    expanded_dim_z = top_anchors[:, 5] + expand_length

                    expanded_anchors = tf.stack([
                        top_anchors[:, 0], top_anchors[:, 1], top_anchors[:,
                                                                          2],
                        expanded_dim_x, top_anchors[:, 4], expanded_dim_z
                    ],
                                                axis=1)

                avod_projection_in = expanded_anchors

            else:
                avod_projection_in = top_anchors

            with tf.variable_scope('bev'):
                # Project top anchors into bev and image spaces
                bev_proposal_boxes, bev_proposal_boxes_norm = \
                    anchor_projector.project_to_bev(
                        avod_projection_in,
                        self.dataset.kitti_utils.bev_extents)

                # Reorder projected boxes into [y1, x1, y2, x2]
                bev_proposal_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes)
                bev_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes_norm)

            with tf.variable_scope('img'):
                image_shape = tf.cast(
                    tf.shape(
                        rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2],
                    tf.float32)
                img_proposal_boxes, img_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        avod_projection_in,
                        rpn_model.placeholders[RpnModel.PL_CALIB_P2],
                        image_shape)
                # Only reorder the normalized img
                img_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        img_proposal_boxes_norm)

            with tf.variable_scope('img_r'):
                image_r_shape = tf.cast(
                    tf.shape(
                        rpn_model.placeholders[RpnModel.PL_IMG_R_INPUT])[0:2],
                    tf.float32)
                img_r_proposal_boxes, img_r_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        avod_projection_in,
                        rpn_model.placeholders[RpnModel.PL_CALIB_P3],
                        image_r_shape)

                # Only reorder the normalized img
                img_r_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        img_r_proposal_boxes_norm)

        #bev_feature_maps = rpn_model.bev_feature_maps
        img_feature_maps = rpn_model.img_feature_maps
        img_r_feature_maps = rpn_model.img_r_feature_maps
        """
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):

            with tf.variable_scope('avod_path_drop'):

                img_mask = rpn_model.img_path_drop_mask
                #bev_mask = rpn_model.bev_path_drop_mask
                img_r_mask = rpn_model.img_r_path_drop_mask


                img_feature_maps = tf.multiply(img_feature_maps,
                                               img_mask)
                
                #bev_feature_maps = tf.multiply(bev_feature_maps,
                #                               bev_mask)
                img_r_feature_maps = tf.multiply(img_r_feature_maps,
                                               img_r_mask)

        else:
            #bev_mask = tf.constant(1.0)
            img_mask = tf.constant(1.0)
            img_r_mask = tf.constant(1.0)

        """
        img_mask = tf.constant(1.0)
        img_r_mask = tf.constant(1.0)

        # ROI Pooling
        with tf.variable_scope('avod_roi_pooling'):

            def get_box_indices(boxes):
                proposals_shape = boxes.get_shape().as_list()
                if any(dim is None for dim in proposals_shape):
                    proposals_shape = tf.shape(boxes)
                ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                multiplier = tf.expand_dims(
                    tf.range(start=0, limit=proposals_shape[0]), 1)
                return tf.reshape(ones_mat * multiplier, [-1])

            """
            bev_boxes_norm_batches = tf.expand_dims(
                bev_proposal_boxes_norm, axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_rois = tf.image.crop_and_resize(
                bev_feature_maps,
                bev_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='bev_rois')
            """

            img_boxes_norm_batches = tf.expand_dims(img_proposal_boxes_norm,
                                                    axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(img_boxes_norm_batches)

            # Do ROI Pooling on image
            img_rois = tf.image.crop_and_resize(
                img_feature_maps,
                img_proposal_boxes_norm_tf_order,
                tf_box_indices, (32, 32),
                name='img_rois')

            img_r_rois = tf.image.crop_and_resize(
                img_r_feature_maps,
                img_r_proposal_boxes_norm_tf_order,
                tf_box_indices, (32, 32),
                name='img_r_rois')

            img_rois = self._sub_mean(img_rois)
            img_r_rois = self._sub_mean(img_r_rois)

            cos_simi = tf.reduce_sum(img_rois * img_r_rois, \
                                     axis=[1, 2], keep_dims=True)

            cos_simi = cos_simi / (tf.norm(img_rois + 1e-5, axis=[1, 2], keep_dims=True) * \
                                   tf.norm(img_r_rois + 1e-5, axis=[1, 2], keep_dims=True))

            cos_simi = tf.nn.relu(cos_simi)

            img_rois = tf.image.resize_bilinear(
                img_rois, self._proposal_roi_crop_size) * cos_simi
            img_r_rois = tf.image.resize_bilinear(
                img_r_rois, self._proposal_roi_crop_size) * cos_simi

        # Fully connected layers (Box Predictor)
        avod_layers_config = self.model_config.layers_config.avod_config

        fc_output_layers = \
            avod_fc_layers_builder.build(
                layers_config=avod_layers_config,
                input_rois=[img_rois, img_r_rois],
                input_weights=[img_mask, img_r_mask],
                num_final_classes=self._num_final_classes,
                box_rep=self._box_rep,
                top_anchors=top_anchors,
                ground_plane=ground_plane,
                is_training=self._is_training)

        all_cls_logits = \
            fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS]
        all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS]

        # This may be None
        all_angle_vectors = \
            fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS)

        with tf.variable_scope('softmax'):
            all_cls_softmax = tf.nn.softmax(all_cls_logits)

        ######################################################
        # Subsample mini_batch for the loss function
        ######################################################
        # Get the ground truth tensors
        anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS]
        if self._box_rep in ['box_3d', 'box_4ca']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
            orientations_gt = boxes_3d_gt[:, 6]
        elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
        else:
            raise NotImplementedError('Ground truth tensors not implemented')

        # Project anchor_gts to 2D bev
        with tf.variable_scope('avod_gt_projection'):
            bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev(
                anchors_gt, self.dataset.kitti_utils.bev_extents)

            bev_anchor_boxes_gt_tf_order = \
                anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt)

        with tf.variable_scope('avod_box_list'):
            # Convert to box_list format
            anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order)
            anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order)

        mb_mask, mb_class_label_indices, mb_gt_indices = \
            self.sample_mini_batch(
                anchor_box_list_gt=anchor_box_list_gt,
                anchor_box_list=anchor_box_list,
                class_labels=class_labels)

        # Create classification one_hot vector
        with tf.variable_scope('avod_one_hot_classes'):
            mb_classification_gt = tf.one_hot(
                mb_class_label_indices,
                depth=self._num_final_classes,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=(self._config.label_smoothing_epsilon /
                           self.dataset.num_classes))

        # TODO: Don't create a mini batch in test mode
        # Mask predictions
        with tf.variable_scope('avod_apply_mb_mask'):
            # Classification
            mb_classifications_logits = tf.boolean_mask(
                all_cls_logits, mb_mask)
            mb_classifications_softmax = tf.boolean_mask(
                all_cls_softmax, mb_mask)

            # Offsets
            mb_offsets = tf.boolean_mask(all_offsets, mb_mask)

            # Angle Vectors
            if all_angle_vectors is not None:
                mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask)
            else:
                mb_angle_vectors = None

        # Encode anchor offsets
        with tf.variable_scope('avod_encode_mb_anchors'):
            mb_anchors = tf.boolean_mask(top_anchors, mb_mask)

            if self._box_rep == 'box_3d':
                # Gather corresponding ground truth anchors for each mb sample
                mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices)
                mb_offsets_gt = anchor_encoder.tf_anchor_to_offset(
                    mb_anchors, mb_anchors_gt)

                # Gather corresponding ground truth orientation for each
                # mb sample
                mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices)
            elif self._box_rep in ['box_8c', 'box_8co']:

                # Get boxes_3d ground truth mini-batch and convert to box_8c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                if self._box_rep == 'box_8c':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt)
                elif self._box_rep == 'box_8co':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt)

                # Convert proposals: anchors -> box_3d -> box8c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_8c = \
                    box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

                # Get mini batch offsets
                mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask)
                mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets(
                    mb_boxes_8c, mb_boxes_8c_gt)

                # Flatten the offsets to a (N x 24) vector
                mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24])

            elif self._box_rep in ['box_4c', 'box_4ca']:

                # Get ground plane for box_4c conversion
                ground_plane = self._rpn_model.placeholders[
                    self._rpn_model.PL_GROUND_PLANE]

                # Convert gt boxes_3d -> box_4c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c(
                    mb_boxes_3d_gt, ground_plane)

                # Convert proposals: anchors -> box_3d -> box_4c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_4c = \
                    box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                       ground_plane)

                # Get mini batch
                mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask)
                mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets(
                    mb_boxes_4c, mb_boxes_4c_gt)

                if self._box_rep == 'box_4ca':
                    # Gather corresponding ground truth orientation for each
                    # mb sample
                    mb_orientations_gt = tf.gather(orientations_gt,
                                                   mb_gt_indices)

            else:
                raise NotImplementedError(
                    'Anchor encoding not implemented for', self._box_rep)

        ######################################################
        # ROI summary images
        ######################################################
        avod_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size
        """
        with tf.variable_scope('bev_avod_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(
                bev_proposal_boxes_norm_tf_order, mb_mask)
            mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(
                self._rpn_model._bev_preprocessed,
                mb_bev_anchors_norm,
                mb_bev_box_indices,
                (32, 32))

            bev_input_roi_summary_images = tf.split(
                bev_input_rois, self._bev_depth, axis=3)
            tf.summary.image('bev_avod_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=avod_mini_batch_size)
        """

        with tf.variable_scope('img_avod_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(
                img_proposal_boxes_norm_tf_order, mb_mask)
            mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(
                self._rpn_model._img_preprocessed, mb_img_anchors_norm,
                mb_img_box_indices, (32, 32))

            tf.summary.image('img_avod_rois',
                             img_input_rois,
                             max_outputs=avod_mini_batch_size)

        with tf.variable_scope('img_r_avod_rois'):
            # ROIs on image input
            mb_img_r_anchors_norm = tf.boolean_mask(
                img_r_proposal_boxes_norm_tf_order, mb_mask)
            mb_img_r_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_r_input_rois = tf.image.crop_and_resize(
                self._rpn_model._img_r_preprocessed, mb_img_r_anchors_norm,
                mb_img_r_box_indices, (32, 32))

            tf.summary.image('img_r_avod_rois',
                             img_r_input_rois,
                             max_outputs=avod_mini_batch_size)

        ######################################################
        # Final Predictions
        ######################################################
        # Get orientations from angle vectors
        if all_angle_vectors is not None:
            with tf.variable_scope('avod_orientation'):
                all_orientations = \
                    orientation_encoder.tf_angle_vector_to_orientation(
                        all_angle_vectors)

        # Apply offsets to regress proposals
        with tf.variable_scope('avod_regression'):
            if self._box_rep == 'box_3d':
                prediction_anchors = \
                    anchor_encoder.offset_to_anchor(top_anchors,
                                                    all_offsets)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Reshape the 24-dim regressed offsets to (N x 3 x 8)
                reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8])
                # Given the offsets, get the boxes_8c
                prediction_boxes_8c = \
                    box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c,
                                                        reshaped_offsets)
                # Convert corners back to box3D
                prediction_boxes_3d = \
                    box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c)

                # Convert the box_3d to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            elif self._box_rep in ['box_4c', 'box_4ca']:
                # Convert predictions box_4c -> box_3d
                prediction_boxes_4c = \
                    box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c,
                                                        all_offsets)

                prediction_boxes_3d = \
                    box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c,
                                                       ground_plane)

                # Convert to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            else:
                raise NotImplementedError('Regression not implemented for',
                                          self._box_rep)

        # Apply Non-oriented NMS in BEV
        with tf.variable_scope('avod_nms'):
            bev_extents = self.dataset.kitti_utils.bev_extents

            with tf.variable_scope('bev_projection'):
                # Project predictions into BEV
                avod_bev_boxes, _ = anchor_projector.project_to_bev(
                    prediction_anchors, bev_extents)
                avod_bev_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        avod_bev_boxes)

            # Get top score from second column onward
            all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1)

            # Apply NMS in BEV
            nms_indices = tf.image.non_max_suppression(
                avod_bev_boxes_tf_order,
                all_top_scores,
                max_output_size=self._nms_size,
                iou_threshold=self._nms_iou_threshold)

            # Gather predictions from NMS indices
            top_classification_logits = tf.gather(all_cls_logits, nms_indices)
            top_classification_softmax = tf.gather(all_cls_softmax,
                                                   nms_indices)
            top_prediction_anchors = tf.gather(prediction_anchors, nms_indices)

            if self._box_rep == 'box_3d':
                top_orientations = tf.gather(all_orientations, nms_indices)

            elif self._box_rep in ['box_8c', 'box_8co']:
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_8c = tf.gather(prediction_boxes_8c,
                                                    nms_indices)

            elif self._box_rep == 'box_4c':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)

            elif self._box_rep == 'box_4ca':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)
                top_orientations = tf.gather(all_orientations, nms_indices)

            else:
                raise NotImplementedError('NMS gather not implemented for',
                                          self._box_rep)

        if self._train_val_test in ['train', 'val']:
            # Additional entries are added to the shared prediction_dict
            # Mini batch predictions
            prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \
                mb_classifications_logits
            prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \
                mb_classifications_softmax
            prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets

            # Mini batch ground truth
            prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \
                mb_classification_gt
            prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt

            # Top NMS predictions
            prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \
                top_classification_logits
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax

            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

            # Mini batch predictions (for debugging)
            prediction_dict[self.PRED_MB_MASK] = mb_mask
            # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask
            prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \
                mb_class_label_indices

            # All predictions (for debugging)
            prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \
                all_cls_logits
            prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets

            # Path drop masks (for debugging)
            #prediction_dict['bev_mask'] = bev_mask
            prediction_dict['img_mask'] = img_mask
            prediction_dict['img_r_mask'] = img_r_mask

        else:
            # self._train_val_test == 'test'
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

        if self._box_rep == 'box_3d':
            prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt
            prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt
            prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

            # For debugging
            prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors

        elif self._box_rep in ['box_8c', 'box_8co']:
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d

            # Store the corners before converting for visualization purposes
            prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c

        elif self._box_rep == 'box_4c':
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c

        elif self._box_rep == 'box_4ca':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c
            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

        else:
            raise NotImplementedError('Prediction dict not implemented for',
                                      self._box_rep)

        # prediction_dict[self.PRED_MAX_IOUS] = max_ious
        # prediction_dict[self.PRED_ALL_IOUS] = all_ious

        return prediction_dict
예제 #4
0
    def build(self):
        rpn_model = self._rpn_model

        # Share the same prediction dict as RPN
        prediction_dict = rpn_model.build()

        top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS]
        ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE]

        class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES]

        with tf.variable_scope('avod_projection'):

            if self._config.expand_proposals_xz > 0.0:

                expand_length = self._config.expand_proposals_xz

                # Expand anchors along x and z
                with tf.variable_scope('expand_xz'):
                    expanded_dim_x = top_anchors[:, 3] + expand_length
                    expanded_dim_z = top_anchors[:, 5] + expand_length

                    expanded_anchors = tf.stack([
                        top_anchors[:, 0], top_anchors[:, 1], top_anchors[:,
                                                                          2],
                        expanded_dim_x, top_anchors[:, 4], expanded_dim_z
                    ],
                                                axis=1)

                avod_projection_in = expanded_anchors

            else:
                avod_projection_in = top_anchors

            with tf.variable_scope('bev'):
                # Project top anchors into bev and image spaces
                # bev_proposal_boxes are boxes' x and z coordinate relative to bev_extents
                # bev_proposal_boxes_norm are normalized boxes in bev_extents' range
                bev_proposal_boxes, bev_proposal_boxes_norm = \
                    anchor_projector.project_to_bev(
                        avod_projection_in,
                        self.dataset.kitti_utils.bev_extents)

                # Reorder projected boxes into [y1, x1, y2, x2]
                bev_proposal_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes)
                bev_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes_norm)

            with tf.variable_scope('img'):
                image_shape = tf.cast(
                    tf.shape(
                        rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2],
                    tf.float32)
                img_proposal_boxes, img_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        avod_projection_in,
                        rpn_model.placeholders[RpnModel.PL_CALIB_P2],
                        image_shape)
                # Only reorder the normalized img
                img_proposal_boxes_norm_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        img_proposal_boxes_norm)

        bev_feature_maps = rpn_model.bev_feature_maps
        img_feature_maps = rpn_model.img_feature_maps

        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):

            with tf.variable_scope('avod_path_drop'):

                img_mask = rpn_model.img_path_drop_mask
                bev_mask = rpn_model.bev_path_drop_mask

                img_feature_maps = tf.multiply(img_feature_maps, img_mask)

                bev_feature_maps = tf.multiply(bev_feature_maps, bev_mask)
        else:
            bev_mask = tf.constant(1.0)
            img_mask = tf.constant(1.0)

        # ROI Pooling
        with tf.variable_scope('avod_roi_pooling'):

            def get_box_indices(boxes):
                proposals_shape = boxes.get_shape().as_list()
                if any(dim is None for dim in proposals_shape):
                    proposals_shape = tf.shape(boxes)
                ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                multiplier = tf.expand_dims(
                    tf.range(start=0, limit=proposals_shape[0]), 1)
                return tf.reshape(ones_mat * multiplier, [-1])

            bev_boxes_norm_batches = tf.expand_dims(bev_proposal_boxes_norm,
                                                    axis=0)

            # These should be all 0's since there is only 1 image
            tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            # tf_box_indices contains 1D tensor with size [num_boxes], each element specifies
            # batch index to whom this box belongs. Because the batch size here is 1, so it
            # doesn't matter
            # bev_rois is a 4-D tensor of shape [num_boxes, crop_height, crop_width, depth]
            ####################################################################################
            # TODO PROJECT: set bev_feature_maps or img_feature_maps to zeros for testing
            # bev_feature_maps = tf.zeros_like(bev_feature_maps)
            # self.bev_feature_maps = tf.zeros_like(bev_feature_maps)
            # bev_feature_maps = self.bev_feature_maps
            ####################################################################################

            bev_rois = tf.image.crop_and_resize(
                bev_feature_maps,
                bev_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='bev_rois')
            # Do ROI Pooling on image
            img_rois = tf.image.crop_and_resize(
                img_feature_maps,
                img_proposal_boxes_norm_tf_order,
                tf_box_indices,
                self._proposal_roi_crop_size,
                name='img_rois')

            ####################################################################################
            # TODO PROJECT: create member variables for accessing
            # bev_rois4moe = tf.image.crop_and_resize(
            #     bev_feature_maps,
            #     bev_proposal_boxes_norm_tf_order,
            #     tf_box_indices,
            #     [28,28],
            #     name='bev_rois4moe')
            # # Do ROI Pooling on image
            # img_rois4moe = tf.image.crop_and_resize(
            #     img_feature_maps,
            #     img_proposal_boxes_norm_tf_order,
            #     tf_box_indices,
            #     [28,28],
            #     name='img_rois4moe')
            ####################################################################################

            ####################################################################################
            # TODO PROJECT: create member variables for accessing
            # self.bev_rois = bev_rois
            # self.img_rois = img_rois
            self.bev_boxes = bev_proposal_boxes_tf_order
            self.bev_boxes_norm = bev_proposal_boxes_norm
            self.img_boxes = img_proposal_boxes
            self.img_boxes_norm = img_proposal_boxes_norm
            # self.bev_mask = rpn_model.bev_path_drop_mask
            # self.img_mask = rpn_model.img_path_drop_mask
            ####################################################################################

            ####################################################################################
            # TODO PROJECT: scale the features to features with larger maximum values
            # self.max_img_feature_val = tf.reduce_max(img_rois, axis=None)
            # self.max_bev_feature_val = tf.reduce_max(bev_rois, axis=None)
#
# bev_rois_moe = tf.cond(tf.greater(self.max_img_feature_val, self.max_bev_feature_val),
#    lambda: self.scale_bev(bev_rois, img_rois),
#    lambda: bev_rois)
# img_rois_moe = tf.cond(tf.greater(self.max_bev_feature_val, self.max_img_feature_val),
#    lambda: self.scale_img(bev_rois, img_rois),
#    lambda: img_rois)

####################################################################################

####################################################################################
# TODO PROJECT: insert code here to add mixture of experts

# self._moe_model = MoeModel(img_rois, bev_rois, img_proposal_boxes, bev_proposal_boxes)
# self._moe_model = MoeModel(img_feature_maps, bev_feature_maps, img_proposal_boxes, bev_proposal_boxes)
# self._moe_model._set_up_input_pls()
# self.moe_prediction = self._moe_model.build()

####################################################################################
####################################################################################
# TODO PROJECT: weight the feature before average img and bev
# img_weights = tf.reshape(self.moe_prediction['img_weight'],[-1,1,1,1])
# bev_weights = tf.reshape(self.moe_prediction['bev_weight'],[-1,1,1,1])
# img_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32)
# bev_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32)
# weighted_img_rois = tf.multiply(img_weights,img_rois)
# weighted_bev_rois = tf.multiply(bev_weights,bev_rois)

####################################################################################
####################################################################################
# TODO PROJECT: create fused bev
        _, bev_mar_boxes_norm = cf.add_margin_to_regions(
            bev_proposal_boxes, self.dataset.kitti_utils.bev_extents)

        bev_pixels_loc = cf.bev_pixel_eq_1_loc(
            self._rpn_model._bev_preprocessed)

        max_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_hi
        min_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_lo
        num_slices = self.dataset.config.kitti_utils_config.bev_generator.slices.num_slices

        height_list = [
            min_height + (2 * x + 1) * (max_height - min_height) /
            (2.0 * num_slices) for x in range(num_slices)
        ]
        print("bev_preprocess shape: ",
              (self._rpn_model._bev_preprocessed).shape)

        velo_pc = cf.bev_pixel_loc_to_3d_velo(
            bev_pixels_loc,
            tf.shape(self._rpn_model._bev_preprocessed)[1:3], height_list,
            self.dataset.kitti_utils.bev_extents)
        print("PL_CALIB_P2 shape: ",
              self._rpn_model.placeholders[RpnModel.PL_CALIB_P2].shape)

        p_2d = anchor_projector.project_to_image_tensor(
            tf.transpose(tf.cast(velo_pc, tf.float32)),
            self._rpn_model.placeholders[RpnModel.PL_CALIB_P2])

        print("image feature maps [0] shape: ", img_feature_maps[0].shape)
        features_at_p_2d = tf.gather_nd(
            img_feature_maps[0], tf.cast(tf.round(tf.transpose(p_2d)),
                                         tf.int32))

        print("features_at_p_2d shape: ", features_at_p_2d.shape)
        new_bev = cf.create_fused_bev(
            tf.shape(self._rpn_model._bev_preprocessed), bev_pixels_loc,
            features_at_p_2d)
        # raise Exception("finish fused_bev generation!")

        self._new_bev_feature_extractor = feature_extractor_builder.get_extractor(
            self.model_config.layers_config.bev_feature_extractor)
        self.new_bev_feature_maps, self.new_bev_end_points = \
            self._new_bev_feature_extractor.build(
                new_bev,
                self._bev_pixel_size,
                self._is_training,
                scope='new_bev_vgg'
            )

        new_bev_rois = tf.image.crop_and_resize(
            self.new_bev_feature_maps,
            bev_proposal_boxes_norm_tf_order,
            tf_box_indices,
            self._proposal_roi_crop_size,
            name='new_bev_rois')

        ####################################################################################

        # Fully connected layers (Box Predictor)
        avod_layers_config = self.model_config.layers_config.avod_config

        # fc_output_layers = \
        # avod_fc_layers_builder.build(
        # layers_config=avod_layers_config,
        # input_rois=[bev_rois, img_rois],
        # input_weights=[bev_mask, img_mask],
        # num_final_classes=self._num_final_classes,
        # box_rep=self._box_rep,
        # top_anchors=top_anchors,
        # ground_plane=ground_plane,
        # is_training=self._is_training)
        ####################################################################################
        # TODO PROJECT: average img and bev features first and then concat with new bev
        rois_sum = tf.reduce_sum([bev_rois, img_rois], axis=0)
        rois_mean = tf.divide(rois_sum, tf.reduce_sum([bev_mask, img_mask]))
        fc_output_layers = \
            avod_fc_layers_builder.build(
                layers_config=avod_layers_config,
                input_rois=[rois_mean, new_bev_rois],
                input_weights=[1, img_mask],
                num_final_classes=self._num_final_classes,
                box_rep=self._box_rep,
                top_anchors=top_anchors,
                ground_plane=ground_plane,
                is_training=self._is_training)

        ####################################################################################

        ####################################################################################
        # TODO PROJECT: input weighted bev_rois and img_rois to output layer
        # fc_output_layers = \
        #     avod_fc_layers_builder.build(
        #         layers_config=avod_layers_config,
        #         input_rois=[weighted_bev_rois, weighted_img_rois],
        #         input_weights=[bev_mask * bev_weights, img_mask * img_weights],
        #         num_final_classes=self._num_final_classes,
        #         box_rep=self._box_rep,
        #         top_anchors=top_anchors,
        #         ground_plane=ground_plane,
        #         is_training=self._is_training)
        ####################################################################################


        all_cls_logits = \
            fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS]
        all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS]

        # This may be None
        all_angle_vectors = \
            fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS)

        with tf.variable_scope('softmax'):
            all_cls_softmax = tf.nn.softmax(all_cls_logits)

        ######################################################
        # Subsample mini_batch for the loss function
        ######################################################
        # Get the ground truth tensors
        anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS]
        if self._box_rep in ['box_3d', 'box_4ca']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
            orientations_gt = boxes_3d_gt[:, 6]
        elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']:
            boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D]
        else:
            raise NotImplementedError('Ground truth tensors not implemented')

        # Project anchor_gts to 2D bev
        with tf.variable_scope('avod_gt_projection'):
            bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev(
                anchors_gt, self.dataset.kitti_utils.bev_extents)

            bev_anchor_boxes_gt_tf_order = \
                anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt)

        with tf.variable_scope('avod_box_list'):
            # Convert to box_list format
            anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order)
            anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order)

        mb_mask, mb_class_label_indices, mb_gt_indices = \
            self.sample_mini_batch(
                anchor_box_list_gt=anchor_box_list_gt,
                anchor_box_list=anchor_box_list,
                class_labels=class_labels)

        # Create classification one_hot vector
        with tf.variable_scope('avod_one_hot_classes'):
            mb_classification_gt = tf.one_hot(
                mb_class_label_indices,
                depth=self._num_final_classes,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=(self._config.label_smoothing_epsilon /
                           self.dataset.num_classes))

        # TODO: Don't create a mini batch in test mode
        # Mask predictions
        with tf.variable_scope('avod_apply_mb_mask'):
            # Classification
            mb_classifications_logits = tf.boolean_mask(
                all_cls_logits, mb_mask)
            mb_classifications_softmax = tf.boolean_mask(
                all_cls_softmax, mb_mask)

            # Offsets
            mb_offsets = tf.boolean_mask(all_offsets, mb_mask)

            # Angle Vectors
            if all_angle_vectors is not None:
                mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask)
            else:
                mb_angle_vectors = None

        # Encode anchor offsets
        with tf.variable_scope('avod_encode_mb_anchors'):
            mb_anchors = tf.boolean_mask(top_anchors, mb_mask)

            if self._box_rep == 'box_3d':
                # Gather corresponding ground truth anchors for each mb sample
                mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices)
                mb_offsets_gt = anchor_encoder.tf_anchor_to_offset(
                    mb_anchors, mb_anchors_gt)

                # Gather corresponding ground truth orientation for each
                # mb sample
                mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices)
            elif self._box_rep in ['box_8c', 'box_8co']:

                # Get boxes_3d ground truth mini-batch and convert to box_8c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                if self._box_rep == 'box_8c':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt)
                elif self._box_rep == 'box_8co':
                    mb_boxes_8c_gt = \
                        box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt)

                # Convert proposals: anchors -> box_3d -> box8c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_8c = \
                    box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

                # Get mini batch offsets
                mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask)
                mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets(
                    mb_boxes_8c, mb_boxes_8c_gt)

                # Flatten the offsets to a (N x 24) vector
                mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24])

            elif self._box_rep in ['box_4c', 'box_4ca']:

                # Get ground plane for box_4c conversion
                ground_plane = self._rpn_model.placeholders[
                    self._rpn_model.PL_GROUND_PLANE]

                # Convert gt boxes_3d -> box_4c
                mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c(
                    mb_boxes_3d_gt, ground_plane)

                # Convert proposals: anchors -> box_3d -> box_4c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True)
                proposal_boxes_4c = \
                    box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                       ground_plane)

                # Get mini batch
                mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask)
                mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets(
                    mb_boxes_4c, mb_boxes_4c_gt)

                if self._box_rep == 'box_4ca':
                    # Gather corresponding ground truth orientation for each
                    # mb sample
                    mb_orientations_gt = tf.gather(orientations_gt,
                                                   mb_gt_indices)

            else:
                raise NotImplementedError(
                    'Anchor encoding not implemented for', self._box_rep)

        ######################################################
        # ROI summary images
        ######################################################
        avod_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size
        with tf.variable_scope('bev_avod_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(
                bev_proposal_boxes_norm_tf_order, mb_mask)
            mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(
                self._rpn_model._bev_preprocessed, mb_bev_anchors_norm,
                mb_bev_box_indices, (32, 32))

            bev_input_roi_summary_images = tf.split(bev_input_rois,
                                                    self._bev_depth,
                                                    axis=3)
            tf.summary.image('bev_avod_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=avod_mini_batch_size)

        with tf.variable_scope('img_avod_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(
                img_proposal_boxes_norm_tf_order, mb_mask)
            mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(
                self._rpn_model._img_preprocessed, mb_img_anchors_norm,
                mb_img_box_indices, (32, 32))

            tf.summary.image('img_avod_rois',
                             img_input_rois,
                             max_outputs=avod_mini_batch_size)

        ######################################################
        # Final Predictions
        ######################################################
        # Get orientations from angle vectors
        if all_angle_vectors is not None:
            with tf.variable_scope('avod_orientation'):
                all_orientations = \
                    orientation_encoder.tf_angle_vector_to_orientation(
                        all_angle_vectors)

        # Apply offsets to regress proposals
        with tf.variable_scope('avod_regression'):
            if self._box_rep == 'box_3d':
                prediction_anchors = \
                    anchor_encoder.offset_to_anchor(top_anchors,
                                                    all_offsets)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Reshape the 24-dim regressed offsets to (N x 3 x 8)
                reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8])
                # Given the offsets, get the boxes_8c
                prediction_boxes_8c = \
                    box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c,
                                                        reshaped_offsets)
                # Convert corners back to box3D
                prediction_boxes_3d = \
                    box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c)

                # Convert the box_3d to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            elif self._box_rep in ['box_4c', 'box_4ca']:
                # Convert predictions box_4c -> box_3d
                prediction_boxes_4c = \
                    box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c,
                                                        all_offsets)

                prediction_boxes_3d = \
                    box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c,
                                                       ground_plane)

                # Convert to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            else:
                raise NotImplementedError('Regression not implemented for',
                                          self._box_rep)

        # Apply Non-oriented NMS in BEV
        with tf.variable_scope('avod_nms'):
            bev_extents = self.dataset.kitti_utils.bev_extents

            with tf.variable_scope('bev_projection'):
                # Project predictions into BEV
                avod_bev_boxes, _ = anchor_projector.project_to_bev(
                    prediction_anchors, bev_extents)
                avod_bev_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        avod_bev_boxes)

            # Get top score from second column onward
            all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1)

            # Apply NMS in BEV
            nms_indices = tf.image.non_max_suppression(
                avod_bev_boxes_tf_order,
                all_top_scores,
                max_output_size=self._nms_size,
                iou_threshold=self._nms_iou_threshold)

            # Gather predictions from NMS indices
            top_classification_logits = tf.gather(all_cls_logits, nms_indices)
            top_classification_softmax = tf.gather(all_cls_softmax,
                                                   nms_indices)
            top_prediction_anchors = tf.gather(prediction_anchors, nms_indices)

            if self._box_rep == 'box_3d':
                top_orientations = tf.gather(all_orientations, nms_indices)

            elif self._box_rep in ['box_8c', 'box_8co']:
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_8c = tf.gather(prediction_boxes_8c,
                                                    nms_indices)

            elif self._box_rep == 'box_4c':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)

            elif self._box_rep == 'box_4ca':
                top_prediction_boxes_3d = tf.gather(prediction_boxes_3d,
                                                    nms_indices)
                top_prediction_boxes_4c = tf.gather(prediction_boxes_4c,
                                                    nms_indices)
                top_orientations = tf.gather(all_orientations, nms_indices)

            else:
                raise NotImplementedError('NMS gather not implemented for',
                                          self._box_rep)

        if self._train_val_test in ['train', 'val']:
            # Additional entries are added to the shared prediction_dict
            # Mini batch predictions
            prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \
                mb_classifications_logits
            prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \
                mb_classifications_softmax
            prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets

            # Mini batch ground truth
            prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \
                mb_classification_gt
            prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt

            # Top NMS predictions
            prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \
                top_classification_logits
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax

            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

            # Mini batch predictions (for debugging)
            prediction_dict[self.PRED_MB_MASK] = mb_mask
            # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask
            prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \
                mb_class_label_indices

            # All predictions (for debugging)
            prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \
                all_cls_logits
            prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets

            # Path drop masks (for debugging)
            prediction_dict['bev_mask'] = bev_mask
            prediction_dict['img_mask'] = img_mask

        else:
            # self._train_val_test == 'test'
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

        if self._box_rep == 'box_3d':
            prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt
            prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt
            prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

            # For debugging
            prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors

        # 8c means 8 corners
        elif self._box_rep in ['box_8c', 'box_8co']:
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d

            # Store the corners before converting for visualization purposes
            prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c

        # 4c means 4 corners
        elif self._box_rep == 'box_4c':
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c

        elif self._box_rep == 'box_4ca':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c
            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

        else:
            raise NotImplementedError('Prediction dict not implemented for',
                                      self._box_rep)

        # prediction_dict[self.PRED_MAX_IOUS] = max_ious
        # prediction_dict[self.PRED_ALL_IOUS] = all_ious

        return prediction_dict
예제 #5
0
    def build(self):

        # Setup input placeholders
        self._set_up_input_pls()

        # Setup feature extractors
        self._set_up_feature_extractors()

        bev_proposal_input = self.bev_feature_maps
        img_proposal_input = self.img_feature_maps

        fusion_mean_div_factor = 2.0

        # If both img and bev probabilites are set to 1.0, don't do
        # path drop.
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):
            with tf.variable_scope('rpn_path_drop'):

                random_values = tf.random_uniform(shape=[3],
                                                  minval=0.0,
                                                  maxval=1.0)

                img_mask, bev_mask = self.create_path_drop_masks(
                    self._path_drop_probabilities[0],
                    self._path_drop_probabilities[1],
                    random_values)

                img_proposal_input = tf.multiply(img_proposal_input,
                                                 img_mask)

                bev_proposal_input = tf.multiply(bev_proposal_input,
                                                 bev_mask)

                self.img_path_drop_mask = img_mask
                self.bev_path_drop_mask = bev_mask

                # Overwrite the division factor
                fusion_mean_div_factor = img_mask + bev_mask

        with tf.variable_scope('proposal_roi_pooling'):

            with tf.variable_scope('box_indices'):
                def get_box_indices(boxes):
                    proposals_shape = boxes.get_shape().as_list()
                    if any(dim is None for dim in proposals_shape):
                        proposals_shape = tf.shape(boxes)
                    ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                    multiplier = tf.expand_dims(
                        tf.range(start=0, limit=proposals_shape[0]), 1)
                    return tf.reshape(ones_mat * multiplier, [-1])

                bev_boxes_norm_batches = tf.expand_dims(
                    self._bev_anchors_norm_pl, axis=0)

                # These should be all 0's since there is only 1 image
                tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            bev_proposal_rois = tf.image.crop_and_resize(
                bev_proposal_input,
                self._bev_anchors_norm_pl,
                tf_box_indices,
                self._proposal_roi_crop_size)
            # Do ROI Pooling on image
            img_proposal_rois = tf.image.crop_and_resize(
                img_proposal_input,
                self._img_anchors_norm_pl,
                tf_box_indices,
                self._proposal_roi_crop_size)

        # Fully connected layers (Box Predictor)
        avod_layers_config = self.model_config.layers_config.avod_config

        with tf.variable_scope('proposal_roi_fusion'):
            feat_fusion_out = None
            fc_layers_type = avod_layers_config.WhichOneof('fc_layers')
            if fc_layers_type == 'basic_fc_layers':
                fusion_method = \
                    avod_layers_config.basic_fc_layers.fusion_method
            elif fc_layers_type == 'fusion_fc_layers':
                fusion_method = \
                    avod_layers_config.fusion_fc_layers.fusion_method

            if fusion_method == 'mean':
                tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
                feat_fusion_out = tf.divide(tf_features_sum,
                                            fusion_mean_div_factor)
            elif fusion_method == 'concat':
                feat_fusion_out = tf.concat(
                    [bev_proposal_rois, img_proposal_rois], axis=3)
            else:
                raise ValueError('Invalid fusion method', self._fusion_method)

        all_anchors = self.placeholders[self.PL_ANCHORS]
        ground_plane = self.placeholders[self.PL_GROUND_PLANE]

        fc_output_layers = \
            avod_fc_layers_builder.build(
                layers_config=avod_layers_config,
                input_rois=[feat_fusion_out],
                input_weights=[1.0],
                num_final_classes=self._num_final_classes,
                box_rep=self._box_rep,
                top_anchors=all_anchors,
                ground_plane=ground_plane,
                is_training=self._is_training)

        all_cls_logits = \
            fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS]
        all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS]

        # This may be None
        all_angle_vectors = \
            fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS)

        with tf.variable_scope('softmax'):
            all_cls_softmax = tf.nn.softmax(
                all_cls_logits)

        ######################################################
        # Subsample mini_batch for the loss function
        ######################################################
        # Get the ground truth tensors
        anchors_gt = self.placeholders[self.PL_LABEL_ANCHORS]
        if self._box_rep in ['box_3d', 'box_4ca']:
            boxes_3d_gt = self.placeholders[self.PL_LABEL_BOXES_3D]
            orientations_gt = boxes_3d_gt[:, 6]
        elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']:
            boxes_3d_gt = self.placeholders[self.PL_LABEL_BOXES_3D]
        else:
            raise NotImplementedError('Ground truth tensors not implemented')

        if self._train_val_test in ['train', 'val']:
            with tf.variable_scope('bev'):
                # Project all anchors into bev and image spaces
                bev_proposal_boxes, bev_proposal_boxes_norm = \
                    anchor_projector.project_to_bev(
                        all_anchors,
                        self.dataset.kitti_utils.bev_extents)

                # Reorder projected boxes into [y1, x1, y2, x2]
                bev_proposal_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_proposal_boxes)

            with tf.variable_scope('img'):
                image_shape = tf.cast(tf.shape(
                    self.placeholders[self.PL_IMG_INPUT])[0:2],
                    tf.float32)
                img_proposal_boxes, img_proposal_boxes_norm = \
                    anchor_projector.tf_project_to_image_space(
                        all_anchors,
                        self.placeholders[self.PL_CALIB_P2],
                        image_shape)

            # Project anchor_gts to 2D bev
            with tf.variable_scope('avod_gt_projection'):
                bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev(
                    anchors_gt, self.dataset.kitti_utils.bev_extents)

                bev_anchor_boxes_gt_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        bev_anchor_boxes_gt)

            with tf.variable_scope('avod_box_list'):
                # Convert to box_list format
                anchor_box_list_gt = \
                    box_list.BoxList(bev_anchor_boxes_gt_tf_order)
                anchor_box_list = \
                    box_list.BoxList(bev_proposal_boxes_tf_order)

            class_labels = self.placeholders[self.PL_LABEL_CLASSES]
            mb_mask, mb_class_label_indices, mb_gt_indices = \
                self.sample_mini_batch(
                    anchor_box_list_gt=anchor_box_list_gt,
                    anchor_box_list=anchor_box_list,
                    class_labels=class_labels)

            # Create classification one_hot vector
            with tf.variable_scope('avod_one_hot_classes'):
                mb_classification_gt = tf.one_hot(
                    mb_class_label_indices,
                    depth=self._num_final_classes,
                    on_value=1.0 - self._config.label_smoothing_epsilon,
                    off_value=(self._config.label_smoothing_epsilon /
                               self.dataset.num_classes))

            # Mask predictions
            with tf.variable_scope('avod_apply_mb_mask'):
                # Classification
                mb_classifications_logits = tf.boolean_mask(
                    all_cls_logits, mb_mask)
                mb_classifications_softmax = tf.boolean_mask(
                    all_cls_softmax, mb_mask)

                # Offsets
                mb_offsets = tf.boolean_mask(all_offsets, mb_mask)

                # Angle Vectors
                if all_angle_vectors is not None:
                    mb_angle_vectors = tf.boolean_mask(
                        all_angle_vectors, mb_mask)
                else:
                    mb_angle_vectors = None

            # Encode anchor offsets
            with tf.variable_scope('avod_encode_mb_anchors'):
                mb_anchors = tf.boolean_mask(all_anchors, mb_mask)

                if self._box_rep == 'box_3d':
                    # Gather corresponding ground truth anchors for each mb
                    # sample
                    mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices)
                    mb_offsets_gt = anchor_encoder.tf_anchor_to_offset(
                        mb_anchors, mb_anchors_gt)

                    # Gather corresponding ground truth orientation for each
                    # mb sample
                    mb_orientations_gt = tf.gather(orientations_gt,
                                                   mb_gt_indices)
                elif self._box_rep in ['box_8c', 'box_8co']:

                    # Get boxes_3d ground truth mini-batch and convert to box_8c
                    mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                    if self._box_rep == 'box_8c':
                        mb_boxes_8c_gt = \
                            box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt)
                    elif self._box_rep == 'box_8co':
                        mb_boxes_8c_gt = \
                            box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt)

                    # Convert proposals: anchors -> box_3d -> box8c
                    proposal_boxes_3d = \
                        box_3d_encoder.anchors_to_box_3d(all_anchors,
                                                         fix_lw=True)
                    proposal_boxes_8c = \
                        box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

                    # Get mini batch offsets
                    mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask)
                    mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets(
                        mb_boxes_8c, mb_boxes_8c_gt)

                    # Flatten the offsets to a (N x 24) vector
                    mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24])

                elif self._box_rep in ['box_4c', 'box_4ca']:

                    # Get ground plane for box_4c conversion
                    ground_plane = self.placeholders[
                        self.PL_GROUND_PLANE]

                    # Convert gt boxes_3d -> box_4c
                    mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices)
                    mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c(
                        mb_boxes_3d_gt, ground_plane)

                    # Convert proposals: anchors -> box_3d -> box_4c
                    proposal_boxes_3d = \
                        box_3d_encoder.anchors_to_box_3d(all_anchors,
                                                         fix_lw=True)
                    proposal_boxes_4c = \
                        box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                           ground_plane)

                    # Get mini batch
                    mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask)
                    mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets(
                        mb_boxes_4c, mb_boxes_4c_gt)

                    if self._box_rep == 'box_4ca':
                        # Gather corresponding ground truth orientation for each
                        # mb sample
                        mb_orientations_gt = tf.gather(orientations_gt,
                                                       mb_gt_indices)

                else:
                    raise NotImplementedError(
                        'Anchor encoding not implemented for', self._box_rep)

        elif self._train_val_test in ['test']:
            # In test-mode, skip mini-batch processing and just calculate
            # box conversions.
            if self._box_rep in ['box_4c', 'box_4ca']:
                # Convert proposals: anchors -> box_3d -> box_4c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(all_anchors, fix_lw=True)
                proposal_boxes_4c = \
                    box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d,
                                                       ground_plane)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Convert proposals: anchors -> box_3d -> box8c
                proposal_boxes_3d = \
                    box_3d_encoder.anchors_to_box_3d(all_anchors, fix_lw=True)
                proposal_boxes_8c = \
                    box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d)

        ######################################################
        # Final Predictions
        ######################################################
        # Get orientations from angle vectors
        if all_angle_vectors is not None:
            with tf.variable_scope('avod_orientation'):
                all_orientations = \
                    orientation_encoder.tf_angle_vector_to_orientation(
                        all_angle_vectors)

        # Apply offsets to regress proposals
        with tf.variable_scope('avod_regression'):
            if self._box_rep == 'box_3d':
                prediction_anchors = \
                    anchor_encoder.offset_to_anchor(all_anchors,
                                                    all_offsets)

            elif self._box_rep in ['box_8c', 'box_8co']:
                # Reshape the 24-dim regressed offsets to (N x 3 x 8)
                reshaped_offsets = tf.reshape(all_offsets,
                                              [-1, 3, 8])
                # Given the offsets, get the boxes_8c
                prediction_boxes_8c = \
                    box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c,
                                                        reshaped_offsets)
                # Convert corners back to box3D
                prediction_boxes_3d = \
                    box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c)

                # Convert the box_3d to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            elif self._box_rep in ['box_4c', 'box_4ca']:
                # Convert predictions box_4c -> box_3d
                prediction_boxes_4c = \
                    box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c,
                                                        all_offsets)

                prediction_boxes_3d = \
                    box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c,
                                                       ground_plane)

                # Convert to anchor format for nms
                prediction_anchors = \
                    box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d)

            else:
                raise NotImplementedError('Regression not implemented for',
                                          self._box_rep)

        # Apply Non-oriented NMS in BEV
        with tf.variable_scope('avod_nms'):
            bev_extents = self.dataset.kitti_utils.bev_extents

            with tf.variable_scope('bev_projection'):
                # Project predictions into BEV
                avod_bev_boxes, _ = anchor_projector.project_to_bev(
                    prediction_anchors, bev_extents)
                avod_bev_boxes_tf_order = \
                    anchor_projector.reorder_projected_boxes(
                        avod_bev_boxes)

            # Get top score from second column onward
            all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1)

            # Apply NMS in BEV
            nms_indices = tf.image.non_max_suppression(
                avod_bev_boxes_tf_order,
                all_top_scores,
                max_output_size=self._nms_size,
                iou_threshold=self._nms_iou_threshold)

            # Gather predictions from NMS indices
            top_classification_logits = tf.gather(all_cls_logits,
                                                  nms_indices)
            top_classification_softmax = tf.gather(all_cls_softmax,
                                                   nms_indices)
            top_prediction_anchors = tf.gather(prediction_anchors,
                                               nms_indices)

            if self._box_rep == 'box_3d':
                top_orientations = tf.gather(
                    all_orientations, nms_indices)

            elif self._box_rep in ['box_8c', 'box_8co']:
                top_prediction_boxes_3d = tf.gather(
                    prediction_boxes_3d, nms_indices)
                top_prediction_boxes_8c = tf.gather(
                    prediction_boxes_8c, nms_indices)

            elif self._box_rep == 'box_4c':
                top_prediction_boxes_3d = tf.gather(
                    prediction_boxes_3d, nms_indices)
                top_prediction_boxes_4c = tf.gather(
                    prediction_boxes_4c, nms_indices)

            elif self._box_rep == 'box_4ca':
                top_prediction_boxes_3d = tf.gather(
                    prediction_boxes_3d, nms_indices)
                top_prediction_boxes_4c = tf.gather(
                    prediction_boxes_4c, nms_indices)
                top_orientations = tf.gather(
                    all_orientations, nms_indices)

            else:
                raise NotImplementedError('NMS gather not implemented for',
                                          self._box_rep)

        prediction_dict = dict()

        if self._train_val_test in ['train', 'val']:
            # Additional entries are added to the shared prediction_dict
            # Mini batch predictions
            prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \
                mb_classifications_logits
            prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \
                mb_classifications_softmax
            prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets

            # Mini batch ground truth
            prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \
                mb_classification_gt
            prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt

            # Top NMS predictions
            prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \
                top_classification_logits
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax

            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

        else:
            # self._train_val_test == 'test'
            prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \
                top_classification_softmax
            prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \
                top_prediction_anchors

        if self._box_rep == 'box_3d':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

            # For debugging
            prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors

        elif self._box_rep in ['box_8c', 'box_8co']:
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d

            # Store the corners before converting for visualization purposes
            prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c

        elif self._box_rep == 'box_4c':
            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c

        elif self._box_rep == 'box_4ca':
            if self._train_val_test in ['train', 'val']:
                prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \
                    mb_orientations_gt
                prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors

            prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \
                top_prediction_boxes_3d
            prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c
            prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations

        else:
            raise NotImplementedError('Prediction dict not implemented for',
                                      self._box_rep)

        return prediction_dict
예제 #6
0
    def build(self):

        # Setup input placeholders
        # 信息输入
        self._set_up_input_pls()

        # Setup feature extractors
        # 特征提取
        self._set_up_feature_extractors()

        # 1*1卷积后的bev_fasturemap和img_featuremap
        bev_proposal_input = self.bev_bottleneck
        img_proposal_input = self.img_bottleneck

        # 融合参数
        fusion_mean_div_factor = 2.0

        # If both img and bev probabilites are set to 1.0, don't do path drop
        # 如果image和bev概率都设置为1.0,则不要执行路径丢弃。
        # train=0.9,test/val=1.0
        if not (self._path_drop_probabilities[0] ==
                self._path_drop_probabilities[1] == 1.0):
            with tf.variable_scope('rpn_path_drop'):
                # 从均匀分布中输出随机值。.随机输出3个0-1之间的数
                random_values = tf.random_uniform(shape=[3],
                                                  minval=0.0,
                                                  maxval=1.0)

                # 0.9,0.9.
                # 不是零就是1
                img_mask, bev_mask = self.create_path_drop_masks(
                    self._path_drop_probabilities[0],
                    self._path_drop_probabilities[1],
                    random_values)
                
                # 选择是否输入.如果mask为1 ,则输入,否则不输入
                img_proposal_input = tf.multiply(img_proposal_input,
                                                 img_mask)

                bev_proposal_input = tf.multiply(bev_proposal_input,
                                                 bev_mask)

                self.img_path_drop_mask = img_mask
                self.bev_path_drop_mask = bev_mask

                # Overwrite the division factor
                # 在训练时的融合参数
                fusion_mean_div_factor = img_mask + bev_mask
        # bev和 iname 的 featuremap 的裁剪
        with tf.variable_scope('proposal_roi_pooling'):

            with tf.variable_scope('box_indices'):
                def get_box_indices(boxes):
                    proposals_shape = boxes.get_shape().as_list()
                    if any(dim is None for dim in proposals_shape):
                        proposals_shape = tf.shape(boxes)
                    ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32)
                    multiplier = tf.expand_dims(
                        tf.range(start=0, limit=proposals_shape[0]), 1)
                    return tf.reshape(ones_mat * multiplier, [-1])

                bev_boxes_norm_batches = tf.expand_dims(
                    self._bev_anchors_norm_pl, axis=0)

                # These should be all 0's since there is only 1 image
                # 这些应该全是0,因为只有1个图像
                tf_box_indices = get_box_indices(bev_boxes_norm_batches)

            # Do ROI Pooling on BEV
            # 主要目的是让两种数据的输入能够统一一下,便于后续做数据融合.resize为6*6的输出
            bev_proposal_rois = tf.image.crop_and_resize(
                bev_proposal_input,
                self._bev_anchors_norm_pl,
                tf_box_indices,
                self._proposal_roi_crop_size)
            # Do ROI Pooling on image
            img_proposal_rois = tf.image.crop_and_resize(
                img_proposal_input,
                self._img_anchors_norm_pl,
                tf_box_indices,
                self._proposal_roi_crop_size)

        # bev和image的融合
        with tf.variable_scope('proposal_roi_fusion'):
            rpn_fusion_out = None
            # mean
            if self._fusion_method == 'mean':
                tf_features_sum = tf.add(bev_proposal_rois, img_proposal_rois)
                # /2平均融合方式
                rpn_fusion_out = tf.divide(tf_features_sum,
                                           fusion_mean_div_factor)
            elif self._fusion_method == 'concat':
                rpn_fusion_out = tf.concat(
                    [bev_proposal_rois, img_proposal_rois], axis=3)
            else:
                raise ValueError('Invalid fusion method', self._fusion_method)

        # TODO: move this section into an separate AnchorPredictor class
        with tf.variable_scope('anchor_predictor', 'ap', [rpn_fusion_out]):
            # 融合后的作为输入
            tensor_in = rpn_fusion_out

            # Parse rpn layers config
            layers_config = self._config.layers_config.rpn_config
            l2_weight_decay = layers_config.l2_weight_decay

            if l2_weight_decay > 0:
                # 正则化
                weights_regularizer = slim.l2_regularizer(l2_weight_decay)
            else:
                weights_regularizer = None

            with slim.arg_scope([slim.conv2d],
                                weights_regularizer=weights_regularizer):
                # Use conv2d instead of fully_connected layers.
                # 256,6上一层的输出实际上就是6*6的所以将全连接化为卷积操作,使用6*6的卷积核
                cls_fc6 = slim.conv2d(tensor_in,
                                      layers_config.cls_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='cls_fc6')

                cls_fc6_drop = slim.dropout(cls_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc6_drop')

                cls_fc7 = slim.conv2d(cls_fc6_drop,
                                      layers_config.cls_fc7,
                                      [1, 1],
                                      scope='cls_fc7')

                cls_fc7_drop = slim.dropout(cls_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='cls_fc7_drop')

                # 2,分类
                cls_fc8 = slim.conv2d(cls_fc7_drop,
                                      2,
                                      [1, 1],
                                      activation_fn=None,
                                      scope='cls_fc8')

                # 删除指定尺寸为1 的
                objectness = tf.squeeze(
                    cls_fc8, [1, 2],
                    name='cls_fc8/squeezed')

                # Use conv2d instead of fully_connected layers.
                reg_fc6 = slim.conv2d(tensor_in,
                                      layers_config.reg_fc6,
                                      self._proposal_roi_crop_size,
                                      padding='VALID',
                                      scope='reg_fc6')

                reg_fc6_drop = slim.dropout(reg_fc6,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc6_drop')

                reg_fc7 = slim.conv2d(reg_fc6_drop,
                                      layers_config.reg_fc7,
                                      [1, 1],
                                      scope='reg_fc7')

                reg_fc7_drop = slim.dropout(reg_fc7,
                                            layers_config.keep_prob,
                                            is_training=self._is_training,
                                            scope='reg_fc7_drop')

                # ∆t x , ∆t y , ∆t z , ∆d x , ∆d y , ∆d z
                # 256,6个回归值包括中心点差值,以及长宽高的差值
                reg_fc8 = slim.conv2d(reg_fc7_drop,
                                      6,
                                      [1, 1],
                                      activation_fn=None,
                                      scope='reg_fc8')

                offsets = tf.squeeze(
                    reg_fc8, [1, 2],
                    name='reg_fc8/squeezed')

        # Histogram summaries
        # 并没有用.就是一个可视化、可以自己选择是否可视化
        with tf.variable_scope('histograms_feature_extractor'):
            with tf.variable_scope('bev_vgg'):
                for end_point in self.bev_end_points:
                    tf.summary.histogram(
                        end_point, self.bev_end_points[end_point])

            with tf.variable_scope('img_vgg'):
                for end_point in self.img_end_points:
                    tf.summary.histogram(
                        end_point, self.img_end_points[end_point])

        with tf.variable_scope('histograms_rpn'):
            with tf.variable_scope('anchor_predictor'):
                fc_layers = [cls_fc6, cls_fc7, cls_fc8, objectness,
                             reg_fc6, reg_fc7, reg_fc8, offsets]
                for fc_layer in fc_layers:
                    # fix the name to avoid tf warnings
                    tf.summary.histogram(fc_layer.name.replace(':', '_'),
                                         fc_layer)

        # Return the proposals
        with tf.variable_scope('proposals'):
            anchors = self.placeholders[self.PL_ANCHORS]

            # Decode anchor regression offsets
            with tf.variable_scope('decoding'):
                # 得到回归后的(x,y,z,dx,dy,dz).由最初的输入变为回归的值
                regressed_anchors = anchor_encoder.offset_to_anchor(
                        anchors, offsets)

            with tf.variable_scope('bev_projection'):
                # [[-40,40],[0,70]]
                # 返回bev_box_corner,bev_box_corners_norm
                _, bev_proposal_boxes_norm = anchor_projector.project_to_bev(
                    regressed_anchors, self._bev_extents)

            with tf.variable_scope('softmax'):
                objectness_softmax = tf.nn.softmax(objectness)

            with tf.variable_scope('nms'):
                objectness_scores = objectness_softmax[:, 1]

                # Do NMS on regressed anchors
                # 实现极大值抑制non max suppression,
                # 其中boxes是不同boxes的坐标,scores是不同boxes预测的分数,max_boxes是保留的最大box的个数。
                # iou_threshold是一个阈值,去掉大于这个阈值的所有boxes?。
                # _nms_size=1024,0.8
                # 筛选出来的序数
                top_indices = tf.image.non_max_suppression(
                    bev_proposal_boxes_norm, objectness_scores,
                    max_output_size=self._nms_size,
                    iou_threshold=self._nms_iou_thresh)
                # 选择筛选后的anchors和objectness
                top_anchors = tf.gather(regressed_anchors, top_indices)
                top_objectness_softmax = tf.gather(objectness_scores,
                                                   top_indices)
                # top_offsets = tf.gather(offsets, top_indices)
                # top_objectness = tf.gather(objectness, top_indices)

        # Get mini batch
        all_ious_gt = self.placeholders[self.PL_ANCHOR_IOUS]
        all_offsets_gt = self.placeholders[self.PL_ANCHOR_OFFSETS]
        all_classes_gt = self.placeholders[self.PL_ANCHOR_CLASSES]

        with tf.variable_scope('mini_batch'):
            mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils
            mini_batch_mask, _ = \
                mini_batch_utils.sample_rpn_mini_batch(all_ious_gt)

        # ROI summary images
        rpn_mini_batch_size = \
            self.dataset.kitti_utils.mini_batch_utils.rpn_mini_batch_size
        with tf.variable_scope('bev_rpn_rois'):
            mb_bev_anchors_norm = tf.boolean_mask(self._bev_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_bev_box_indices = tf.zeros_like(
                tf.boolean_mask(all_classes_gt, mini_batch_mask),
                dtype=tf.int32)

            # Show the ROIs of the BEV input density map
            # for the mini batch anchors
            bev_input_rois = tf.image.crop_and_resize(
                self._bev_preprocessed,
                mb_bev_anchors_norm,
                mb_bev_box_indices,
                (32, 32))

            bev_input_roi_summary_images = tf.split(
                bev_input_rois, self._bev_depth, axis=3)
            tf.summary.image('bev_rpn_rois',
                             bev_input_roi_summary_images[-1],
                             max_outputs=rpn_mini_batch_size)

        with tf.variable_scope('img_rpn_rois'):
            # ROIs on image input
            mb_img_anchors_norm = tf.boolean_mask(self._img_anchors_norm_pl,
                                                  mini_batch_mask)
            mb_img_box_indices = tf.zeros_like(
                tf.boolean_mask(all_classes_gt, mini_batch_mask),
                dtype=tf.int32)

            # Do test ROI pooling on mini batch
            img_input_rois = tf.image.crop_and_resize(
                self._img_preprocessed,
                mb_img_anchors_norm,
                mb_img_box_indices,
                (32, 32))

            tf.summary.image('img_rpn_rois',
                             img_input_rois,
                             max_outputs=rpn_mini_batch_size)

        # Ground Truth Tensors
        with tf.variable_scope('one_hot_classes'):

            # Anchor classification ground truth
            # Object / Not Object
            min_pos_iou = \
                self.dataset.kitti_utils.mini_batch_utils.rpn_pos_iou_range[0]

            objectness_classes_gt = tf.cast(
                tf.greater_equal(all_ious_gt, min_pos_iou),
                dtype=tf.int32)
            objectness_gt = tf.one_hot(
                objectness_classes_gt, depth=2,
                on_value=1.0 - self._config.label_smoothing_epsilon,
                off_value=self._config.label_smoothing_epsilon)

        # Mask predictions for mini batch
        with tf.variable_scope('prediction_mini_batch'):
            objectness_masked = tf.boolean_mask(objectness, mini_batch_mask)
            offsets_masked = tf.boolean_mask(offsets, mini_batch_mask)

        with tf.variable_scope('ground_truth_mini_batch'):
            objectness_gt_masked = tf.boolean_mask(
                objectness_gt, mini_batch_mask)
            offsets_gt_masked = tf.boolean_mask(all_offsets_gt,
                                                mini_batch_mask)

        # Specify the tensors to evaluate
        predictions = dict()

        # Temporary predictions for debugging
        # predictions['anchor_ious'] = anchor_ious
        # predictions['anchor_offsets'] = all_offsets_gt

        if self._train_val_test in ['train', 'val']:
            # All anchors
            predictions[self.PRED_ANCHORS] = anchors

            # Mini-batch masks
            predictions[self.PRED_MB_MASK] = mini_batch_mask
            # Mini-batch predictions
            predictions[self.PRED_MB_OBJECTNESS] = objectness_masked
            predictions[self.PRED_MB_OFFSETS] = offsets_masked

            # Mini batch ground truth
            predictions[self.PRED_MB_OFFSETS_GT] = offsets_gt_masked
            predictions[self.PRED_MB_OBJECTNESS_GT] = objectness_gt_masked

            # Proposals after nms
            predictions[self.PRED_TOP_INDICES] = top_indices
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        else:
            # self._train_val_test == 'test'
            predictions[self.PRED_TOP_ANCHORS] = top_anchors
            predictions[
                self.PRED_TOP_OBJECTNESS_SOFTMAX] = top_objectness_softmax

        return predictions