def __init__(self, model_config, train_val_test, dataset): """ Args: model_config: configuration for the model train_val_test: "train", "val", or "test" dataset: the dataset that will provide samples and ground truth """ # Sets model configs (_config) super(AvodModel, self).__init__(model_config) #pdb.set_trace() self.dataset = dataset # Dataset config self._num_final_classes = self.dataset.num_classes + 1 # Input config input_config = self._config.input_config """ self._bev_pixel_size = np.asarray([input_config.bev_dims_h, input_config.bev_dims_w]) self._bev_depth = input_config.bev_depth """ self._img_pixel_size = np.asarray( [input_config.img_dims_h, input_config.img_dims_w]) self._img_depth = [input_config.img_depth] # AVOD config avod_config = self._config.avod_config self._proposal_roi_crop_size = \ [avod_config.avod_proposal_roi_crop_size] * 2 self._positive_selection = avod_config.avod_positive_selection self._nms_size = avod_config.avod_nms_size self._nms_iou_threshold = avod_config.avod_nms_iou_thresh self._path_drop_probabilities = self._config.path_drop_probabilities self._box_rep = avod_config.avod_box_representation if self._box_rep not in [ 'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca' ]: raise ValueError('Invalid box representation', self._box_rep) # Create the RpnModel self._rpn_model = RpnModel(model_config, train_val_test, dataset) if train_val_test not in ["train", "val", "test"]: raise ValueError('Invalid train_val_test value,' 'should be one of ["train", "val", "test"]') self._train_val_test = train_val_test self._is_training = (self._train_val_test == 'train') self.sample_info = {}
def test_path_drop_weights(self): # Tests the effect of path-drop on network's feature maps. # It sets up a minimal-training process to check the # feature before and after running the 'train_op' while # path-drop is in effect. train_val_test = 'train' # overwrite the training iterations self.train_config.max_iterations = 2 self.train_config.overwrite_checkpoints = True # Overwrite path drop probabilities model_config = config_builder.proto_to_obj(self.model_config) model_config.path_drop_probabilities = [0.0, 0.8] with tf.Graph().as_default(): # Set a graph-level seed tf.set_random_seed(1245) model = RpnModel(model_config, train_val_test=train_val_test, dataset=self.dataset) prediction_dict = model.build() losses_dict, total_loss = model.loss(prediction_dict) global_summaries = set([]) # Optimizer training_optimizer = optimizer_builder.build( self.train_config.optimizer, global_summaries) train_op = slim.learning.create_train_op( total_loss, training_optimizer) init_op = tf.global_variables_initializer() with tf.Session() as sess: sess.run(init_op) for step in range(1, self.train_config.max_iterations): feed_dict = model.create_feed_dict() if step == 1: current_feature_maps = sess.run(model.img_feature_maps, feed_dict=feed_dict) exp_feature_maps = current_feature_maps train_op_loss = sess.run(train_op, feed_dict=feed_dict) print('Step {}, Total Loss {:0.3f} '. format(step, train_op_loss)) updated_feature_maps = sess.run(model.img_feature_maps, feed_dict=feed_dict) # The feature maps should have remained the same since # the image path was dropped np.testing.assert_array_almost_equal( updated_feature_maps, exp_feature_maps, decimal=4)
def train(model_config, train_config, dataset_config): dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) train_val_test = 'train' model_name = model_config.model_name with tf.Graph().as_default(): if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'bev_only_rpn_model': model = BevOnlyRpnModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'bev_only_avod_model': model = BevOnlyAvodModel(model_config, train_val_test=train_val_test, dataset=dataset) else: raise ValueError('Invalid model_name') trainer.train(model, train_config)
def set_up_model(pipeline_config, data_split): model_config, train_config, _, dataset_config = \ config_builder.get_configs_from_pipeline_file( pipeline_config, is_training=False) dataset_config = config_builder.proto_to_obj(dataset_config) train_val_test = data_split # Always run in test mode dataset_config.data_split = 'test' dataset_config.data_split_dir = 'testing' dataset_config.has_labels = False dataset_config.aug_list = [] dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) model_name = model_config.model_name if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'avod_ssd_model': model = AvodSSDModel(model_config, train_val_test=train_val_test, dataset=dataset) else: raise ValueError('Invalid model_name') return model
def train(model_config, train_config, dataset_config): # 读取config文件里面的详细内容,包括: # model_config:模型参数 # train_config:训练参数 # dataset_config:数据集参数 dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) train_val_test = 'train' # 包括avod_model 和 rpn_model model_name = model_config.model_name with tf.Graph().as_default(): if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test=train_val_test, dataset=dataset) else: raise ValueError('Invalid model_name') trainer.train(model, train_config)
def train(model_config, train_config, dataset_config): dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) train_val_test = 'train' model_name = model_config.model_name with tf.Graph().as_default(): if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'retinanet_model': model = RetinanetModel(model_config, train_val_test=train_val_test, dataset=dataset) else: raise ValueError('Invalid model_name') #import pdb #pdb.set_trace() trainer.train(model, train_config)
def test_rpn_loss(self): # Use "val" so that the first sample is loaded each time rpn_model = RpnModel(self.model_config, train_val_test="val", dataset=self.dataset) predictions = rpn_model.build() loss, total_loss = rpn_model.loss(predictions) feed_dict = rpn_model.create_feed_dict() with self.test_session() as sess: init = tf.global_variables_initializer() sess.run(init) loss_dict_out = sess.run(loss, feed_dict=feed_dict) print('Losses ', loss_dict_out)
def inference(model_config, eval_config, dataset_config, data_split, ckpt_indices): # Overwrite the defaults dataset_config = config_builder.proto_to_obj(dataset_config) dataset_config.data_split = data_split dataset_config.data_split_dir = 'training' if data_split == 'test': dataset_config.data_split_dir = 'testing' eval_config.eval_mode = 'test' eval_config.evaluate_repeatedly = False dataset_config.has_labels = False # Enable this to see the actually memory being used eval_config.allow_gpu_mem_growth = True eval_config = config_builder.proto_to_obj(eval_config) # Grab the checkpoint indices to evaluate eval_config.ckpt_indices = ckpt_indices # Remove augmentation during evaluation in test mode dataset_config.aug_list = [] # Build the dataset object dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Setup the model model_name = model_config.model_name # Overwrite repeated field model_config = config_builder.proto_to_obj(model_config) # Switch path drop off during evaluation model_config.path_drop_probabilities = [1.0, 1.0] with tf.Graph().as_default(): if model_name == 'avod_model': model = AvodModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) elif model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) elif model_name == 'bev_only_rpn_model': model = BevOnlyRpnModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) elif model_name == 'bev_only_avod_model': model = BevOnlyAvodModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) else: raise ValueError('Invalid model name {}'.format(model_name)) model_evaluator = Evaluator(model, dataset_config, eval_config) model_evaluator.run_latest_checkpoints()
def test_disable_path_drop(self): # Test path drop is disabled when the probabilities # are set to 1.0. train_val_test = 'train' # Overwrite path drop probabilities model_config = config_builder.proto_to_obj(self.model_config) model_config.path_drop_probabilities = [1.0, 1.0] with tf.Graph().as_default(): model = RpnModel(model_config, train_val_test=train_val_test, dataset=self.dataset) model.build() # These variables are set during path drop only # in the case of no path-drop, they should be non-existence self.assertFalse(hasattr(model, 'img_path_drop_mask')) self.assertFalse(hasattr(model, 'bev_path_drop_mask'))
def inferPerspective(model_config, eval_config, dataset_config, additional_cls): model_name = model_config.model_name entity_perspect_dir = dataset_config.dataset_dir + dataset_config.data_split_dir + '/' logging.debug("Inferring perspective: %s\n %s\n %s", dataset_config.data_split, entity_perspect_dir, dataset_config.dataset_dir) files_in_range = create_split.create_split(dataset_config.dataset_dir, entity_perspect_dir, dataset_config.data_split) # If there are no files within the range cfg.MIN_IDX, cfg.MAX_IDX # then skip this perspective if not files_in_range: logging.debug( "No files within the range cfg.MIN_IDX, cfg.MAX_IDX, skipping perspective" ) return if not additional_cls: estimate_ground_planes.estimate_ground_planes(entity_perspect_dir, dataset_config, 0) # Build the dataset object dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) #Switch inference output directory model_config.paths_config.pred_dir = entity_perspect_dir + '/{}/'.format( cfg.AVOD_OUTPUT_DIR) logging.debug("Prediction directory: %s", model_config.paths_config.pred_dir) with tf.Graph().as_default(): if model_name == 'avod_model': model = AvodModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) elif model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) else: raise ValueError('Invalid model name {}'.format(model_name)) model_evaluator = Evaluator(model, dataset_config, eval_config) model_evaluator.run_latest_checkpoints() save_kitti_predictions.convertPredictionsToKitti( dataset, model_config.paths_config.pred_dir, additional_cls)
def train(model_config, train_config, dataset_config): dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) train_val_test = 'train' model_name = model_config.model_name with tf.Graph().as_default(): if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=train_val_test, dataset=dataset) trainer.train(model, train_config) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test=train_val_test, dataset=dataset) trainer.train(model, train_config) elif model_name == 'avod_moe_model': model = AvodMoeModel(model_config, train_val_test=train_val_test, dataset=dataset) trainer_moe.train(model, train_config) elif model_name == 'epbrm': model = epBRM(model_config, dataset=dataset) epbrm_trainer.train(model, train_config) elif model_name == 'avod_model_new_bev': model = AvodModelBEV(model_config, train_val_test=train_val_test, dataset=dataset) elif model_name == 'avod_model_double_fusion_new_bev': model = AvodModelDoubleFusionBEV(model_config, train_val_test=train_val_test, dataset=dataset) else: raise ValueError('Invalid model_name') if model_name == 'avod_model_new_bev' or model_name == 'avod_model_double_fusion_new_bev': trainer_new_bev.train(model, train_config) else: trainer.train(model, train_config)
def set_up_model_test_mode(pipeline_config_path, data_split): """Returns the model and its config in test mode.""" model_config, _, _, dataset_config = \ config_builder.get_configs_from_pipeline_file( pipeline_config_path, is_training=False) dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Overwrite the defaults dataset_config = config_builder.proto_to_obj(dataset_config) # Use the validation set dataset_config.data_split = data_split dataset_config.data_split_dir = 'training' if data_split == 'test': dataset_config.data_split_dir = 'testing' # Remove augmentation when in test mode dataset_config.aug_list = [] # Build the dataset object dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) model_name = model_config.model_name if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test='test', dataset=dataset) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test='test', dataset=dataset) elif model_name == 'avod_ssd_model': model = AvodSSDModel(model_config, train_val_test='test', dataset=dataset) else: raise ValueError('Invalid model_name') return model, model_config
def set_up_model_train_mode(pipeline_config_path, data_split): """Returns the model and its train_op.""" model_config, train_config, _, dataset_config = \ config_builder.get_configs_from_pipeline_file( pipeline_config_path, is_training=True) dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) model_name = model_config.model_name if model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=data_split, dataset=dataset) elif model_name == 'avod_model': model = AvodModel(model_config, train_val_test=data_split, dataset=dataset) elif model_name == 'avod_ssd_model': model = AvodSSDModel(model_config, train_val_test=data_split, dataset=dataset) else: raise ValueError('Invalid model_name') prediction_dict = model.build() losses_dict, total_loss = model.loss(prediction_dict) # These parameters are required to set up the optimizer global_summaries = set([]) global_step_tensor = tf.Variable(0, trainable=False) training_optimizer = optimizer_builder.build(train_config.optimizer, global_summaries, global_step_tensor) # Set up the train op train_op = slim.learning.create_train_op(total_loss, training_optimizer) return model, train_op
class AvodModel(model.DetectionModel): TRAIN_REFINE = True ############################## # Keys for Predictions ############################## # Mini batch (mb) ground truth PRED_MB_CLASSIFICATIONS_GT = 'avod_mb_classifications_gt' PRED_MB_OFFSETS_GT = 'avod_mb_offsets_gt' PRED_MB_ORIENTATIONS_GT = 'avod_mb_orientations_gt' PRED_MB_POS_GT = 'avod_mb_pos_gt' PRED_MB_NEG_GT = 'avod_mb_neg_gt' # Mini batch (mb) predictions PRED_MB_CLASSIFICATION_LOGITS = 'avod_mb_classification_logits' PRED_MB_CLASSIFICATION_SOFTMAX = 'avod_mb_classification_softmax' PRED_MB_OFFSETS = 'avod_mb_offsets' PRED_MB_ANGLE_VECTORS = 'avod_mb_angle_vectors' # Top predictions after BEV NMS PRED_TOP_CLASSIFICATION_LOGITS = 'avod_top_classification_logits' PRED_TOP_CLASSIFICATION_SOFTMAX = 'avod_top_classification_softmax' PRED_TOP_PREDICTION_ANCHORS = 'avod_top_prediction_anchors' PRED_TOP_PREDICTION_BOXES_3D = 'avod_top_prediction_boxes_3d' PRED_TOP_ORIENTATIONS = 'avod_top_orientations' # Other box representations PRED_TOP_BOXES_8C = 'avod_top_regressed_boxes_8c' PRED_TOP_BOXES_4C = 'avod_top_prediction_boxes_4c' # Mini batch (mb) predictions (for debugging) PRED_MB_MASK = 'avod_mb_mask' PRED_MB_POS_MASK = 'avod_mb_pos_mask' PRED_MB_ANCHORS_GT = 'avod_mb_anchors_gt' PRED_MB_CLASS_INDICES_GT = 'avod_mb_gt_classes' # All predictions (for debugging) PRED_ALL_CLASSIFICATIONS = 'avod_classifications' PRED_ALL_OFFSETS = 'avod_offsets' PRED_ALL_ANGLE_VECTORS = 'avod_angle_vectors' PRED_MAX_IOUS = 'avod_max_ious' PRED_ALL_IOUS = 'avod_anchor_ious' ############################## # Keys for Loss ############################## LOSS_FINAL_CLASSIFICATION = 'avod_classification_loss' LOSS_FINAL_REGRESSION = 'avod_regression_loss' # (for debugging) LOSS_FINAL_ORIENTATION = 'avod_orientation_loss' LOSS_FINAL_LOCALIZATION = 'avod_localization_loss' def __init__(self, model_config, train_val_test, dataset): """ Args: model_config: configuration for the model train_val_test: "train", "val", or "test" dataset: the dataset that will provide samples and ground truth """ # Sets model configs (_config) super(AvodModel, self).__init__(model_config) #pdb.set_trace() self.dataset = dataset # Dataset config self._num_final_classes = self.dataset.num_classes + 1 # Input config input_config = self._config.input_config """ self._bev_pixel_size = np.asarray([input_config.bev_dims_h, input_config.bev_dims_w]) self._bev_depth = input_config.bev_depth """ self._img_pixel_size = np.asarray( [input_config.img_dims_h, input_config.img_dims_w]) self._img_depth = [input_config.img_depth] # AVOD config avod_config = self._config.avod_config self._proposal_roi_crop_size = \ [avod_config.avod_proposal_roi_crop_size] * 2 self._positive_selection = avod_config.avod_positive_selection self._nms_size = avod_config.avod_nms_size self._nms_iou_threshold = avod_config.avod_nms_iou_thresh self._path_drop_probabilities = self._config.path_drop_probabilities self._box_rep = avod_config.avod_box_representation if self._box_rep not in [ 'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca' ]: raise ValueError('Invalid box representation', self._box_rep) # Create the RpnModel self._rpn_model = RpnModel(model_config, train_val_test, dataset) if train_val_test not in ["train", "val", "test"]: raise ValueError('Invalid train_val_test value,' 'should be one of ["train", "val", "test"]') self._train_val_test = train_val_test self._is_training = (self._train_val_test == 'train') self.sample_info = {} def _norm(self, x): norm = tf.norm(x, axis=[1, 2], keep_dims=True) x = tf.divide(x, norm + 1e-7) return x def _sub_mean(self, x): mean = tf.reduce_mean(x, axis=[1, 2], keep_dims=True) x = x - mean return x def build(self): rpn_model = self._rpn_model # Share the same prediction dict as RPN prediction_dict = rpn_model.build() top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS] ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE] class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES] with tf.variable_scope('avod_projection'): if self._config.expand_proposals_xz > 0.0: expand_length = self._config.expand_proposals_xz # Expand anchors along x and z with tf.variable_scope('expand_xz'): expanded_dim_x = top_anchors[:, 3] + expand_length expanded_dim_z = top_anchors[:, 5] + expand_length expanded_anchors = tf.stack([ top_anchors[:, 0], top_anchors[:, 1], top_anchors[:, 2], expanded_dim_x, top_anchors[:, 4], expanded_dim_z ], axis=1) avod_projection_in = expanded_anchors else: avod_projection_in = top_anchors with tf.variable_scope('bev'): # Project top anchors into bev and image spaces bev_proposal_boxes, bev_proposal_boxes_norm = \ anchor_projector.project_to_bev( avod_projection_in, self.dataset.kitti_utils.bev_extents) # Reorder projected boxes into [y1, x1, y2, x2] bev_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes) bev_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes_norm) with tf.variable_scope('img'): image_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2], tf.float32) img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( avod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P2], image_shape) # Only reorder the normalized img img_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm) with tf.variable_scope('img_r'): image_r_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_R_INPUT])[0:2], tf.float32) img_r_proposal_boxes, img_r_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( avod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P3], image_r_shape) # Only reorder the normalized img img_r_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_r_proposal_boxes_norm) #bev_feature_maps = rpn_model.bev_feature_maps img_feature_maps = rpn_model.img_feature_maps img_r_feature_maps = rpn_model.img_r_feature_maps """ if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('avod_path_drop'): img_mask = rpn_model.img_path_drop_mask #bev_mask = rpn_model.bev_path_drop_mask img_r_mask = rpn_model.img_r_path_drop_mask img_feature_maps = tf.multiply(img_feature_maps, img_mask) #bev_feature_maps = tf.multiply(bev_feature_maps, # bev_mask) img_r_feature_maps = tf.multiply(img_r_feature_maps, img_r_mask) else: #bev_mask = tf.constant(1.0) img_mask = tf.constant(1.0) img_r_mask = tf.constant(1.0) """ img_mask = tf.constant(1.0) img_r_mask = tf.constant(1.0) # ROI Pooling with tf.variable_scope('avod_roi_pooling'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) """ bev_boxes_norm_batches = tf.expand_dims( bev_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV bev_rois = tf.image.crop_and_resize( bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='bev_rois') """ img_boxes_norm_batches = tf.expand_dims(img_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(img_boxes_norm_batches) # Do ROI Pooling on image img_rois = tf.image.crop_and_resize( img_feature_maps, img_proposal_boxes_norm_tf_order, tf_box_indices, (32, 32), name='img_rois') img_r_rois = tf.image.crop_and_resize( img_r_feature_maps, img_r_proposal_boxes_norm_tf_order, tf_box_indices, (32, 32), name='img_r_rois') img_rois = self._sub_mean(img_rois) img_r_rois = self._sub_mean(img_r_rois) cos_simi = tf.reduce_sum(img_rois * img_r_rois, \ axis=[1, 2], keep_dims=True) cos_simi = cos_simi / (tf.norm(img_rois + 1e-5, axis=[1, 2], keep_dims=True) * \ tf.norm(img_r_rois + 1e-5, axis=[1, 2], keep_dims=True)) cos_simi = tf.nn.relu(cos_simi) img_rois = tf.image.resize_bilinear( img_rois, self._proposal_roi_crop_size) * cos_simi img_r_rois = tf.image.resize_bilinear( img_r_rois, self._proposal_roi_crop_size) * cos_simi # Fully connected layers (Box Predictor) avod_layers_config = self.model_config.layers_config.avod_config fc_output_layers = \ avod_fc_layers_builder.build( layers_config=avod_layers_config, input_rois=[img_rois, img_r_rois], input_weights=[img_mask, img_r_mask], num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training) all_cls_logits = \ fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS] # This may be None all_angle_vectors = \ fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS) with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax(all_cls_logits) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS] if self._box_rep in ['box_3d', 'box_4ca']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] orientations_gt = boxes_3d_gt[:, 6] elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] else: raise NotImplementedError('Ground truth tensors not implemented') # Project anchor_gts to 2D bev with tf.variable_scope('avod_gt_projection'): bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev( anchors_gt, self.dataset.kitti_utils.bev_extents) bev_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt) with tf.variable_scope('avod_box_list'): # Convert to box_list format anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order) anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order) mb_mask, mb_class_label_indices, mb_gt_indices = \ self.sample_mini_batch( anchor_box_list_gt=anchor_box_list_gt, anchor_box_list=anchor_box_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('avod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # TODO: Don't create a mini batch in test mode # Mask predictions with tf.variable_scope('avod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) # Angle Vectors if all_angle_vectors is not None: mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask) else: mb_angle_vectors = None # Encode anchor offsets with tf.variable_scope('avod_encode_mb_anchors'): mb_anchors = tf.boolean_mask(top_anchors, mb_mask) if self._box_rep == 'box_3d': # Gather corresponding ground truth anchors for each mb sample mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_anchor_to_offset( mb_anchors, mb_anchors_gt) # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) elif self._box_rep in ['box_8c', 'box_8co']: # Get boxes_3d ground truth mini-batch and convert to box_8c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) if self._box_rep == 'box_8c': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt) elif self._box_rep == 'box_8co': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt) # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) # Get mini batch offsets mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask) mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets( mb_boxes_8c, mb_boxes_8c_gt) # Flatten the offsets to a (N x 24) vector mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24]) elif self._box_rep in ['box_4c', 'box_4ca']: # Get ground plane for box_4c conversion ground_plane = self._rpn_model.placeholders[ self._rpn_model.PL_GROUND_PLANE] # Convert gt boxes_3d -> box_4c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c( mb_boxes_3d_gt, ground_plane) # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) # Get mini batch mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask) mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets( mb_boxes_4c, mb_boxes_4c_gt) if self._box_rep == 'box_4ca': # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) ###################################################### # ROI summary images ###################################################### avod_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size """ with tf.variable_scope('bev_avod_rois'): mb_bev_anchors_norm = tf.boolean_mask( bev_proposal_boxes_norm_tf_order, mb_mask) mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize( self._rpn_model._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split( bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_avod_rois', bev_input_roi_summary_images[-1], max_outputs=avod_mini_batch_size) """ with tf.variable_scope('img_avod_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order, mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._rpn_model._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_avod_rois', img_input_rois, max_outputs=avod_mini_batch_size) with tf.variable_scope('img_r_avod_rois'): # ROIs on image input mb_img_r_anchors_norm = tf.boolean_mask( img_r_proposal_boxes_norm_tf_order, mb_mask) mb_img_r_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_r_input_rois = tf.image.crop_and_resize( self._rpn_model._img_r_preprocessed, mb_img_r_anchors_norm, mb_img_r_box_indices, (32, 32)) tf.summary.image('img_r_avod_rois', img_r_input_rois, max_outputs=avod_mini_batch_size) ###################################################### # Final Predictions ###################################################### # Get orientations from angle vectors if all_angle_vectors is not None: with tf.variable_scope('avod_orientation'): all_orientations = \ orientation_encoder.tf_angle_vector_to_orientation( all_angle_vectors) # Apply offsets to regress proposals with tf.variable_scope('avod_regression'): if self._box_rep == 'box_3d': prediction_anchors = \ anchor_encoder.offset_to_anchor(top_anchors, all_offsets) elif self._box_rep in ['box_8c', 'box_8co']: # Reshape the 24-dim regressed offsets to (N x 3 x 8) reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8]) # Given the offsets, get the boxes_8c prediction_boxes_8c = \ box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c, reshaped_offsets) # Convert corners back to box3D prediction_boxes_3d = \ box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c) # Convert the box_3d to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) elif self._box_rep in ['box_4c', 'box_4ca']: # Convert predictions box_4c -> box_3d prediction_boxes_4c = \ box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c, all_offsets) prediction_boxes_3d = \ box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c, ground_plane) # Convert to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('avod_nms'): bev_extents = self.dataset.kitti_utils.bev_extents with tf.variable_scope('bev_projection'): # Project predictions into BEV avod_bev_boxes, _ = anchor_projector.project_to_bev( prediction_anchors, bev_extents) avod_bev_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( avod_bev_boxes) # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( avod_bev_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) top_prediction_anchors = tf.gather(prediction_anchors, nms_indices) if self._box_rep == 'box_3d': top_orientations = tf.gather(all_orientations, nms_indices) elif self._box_rep in ['box_8c', 'box_8co']: top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_8c = tf.gather(prediction_boxes_8c, nms_indices) elif self._box_rep == 'box_4c': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) elif self._box_rep == 'box_4ca': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) top_orientations = tf.gather(all_orientations, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors # Mini batch predictions (for debugging) prediction_dict[self.PRED_MB_MASK] = mb_mask # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \ mb_class_label_indices # All predictions (for debugging) prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \ all_cls_logits prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets # Path drop masks (for debugging) #prediction_dict['bev_mask'] = bev_mask prediction_dict['img_mask'] = img_mask prediction_dict['img_r_mask'] = img_r_mask else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors if self._box_rep == 'box_3d': prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations # For debugging prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors elif self._box_rep in ['box_8c', 'box_8co']: prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d # Store the corners before converting for visualization purposes prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c elif self._box_rep == 'box_4c': prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c elif self._box_rep == 'box_4ca': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) # prediction_dict[self.PRED_MAX_IOUS] = max_ious # prediction_dict[self.PRED_ALL_IOUS] = all_ious return prediction_dict def sample_mini_batch(self, anchor_box_list_gt, anchor_box_list, class_labels): with tf.variable_scope('avod_create_mb_mask'): # Get IoU for every anchor all_ious = box_list_ops.iou(anchor_box_list_gt, anchor_box_list) max_ious = tf.reduce_max(all_ious, axis=0) max_iou_indices = tf.argmax(all_ious, axis=0) # Sample a pos/neg mini-batch from anchors with highest IoU match mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils mb_mask, mb_pos_mask = mini_batch_utils.sample_avod_mini_batch( max_ious) mb_class_label_indices = mini_batch_utils.mask_class_label_indices( mb_pos_mask, mb_mask, max_iou_indices, class_labels) mb_gt_indices = tf.boolean_mask(max_iou_indices, mb_mask) return mb_mask, mb_class_label_indices, mb_gt_indices def create_feed_dict(self): feed_dict = self._rpn_model.create_feed_dict() self.sample_info = self._rpn_model.sample_info return feed_dict def loss(self, prediction_dict): # Note: The loss should be using mini-batch values only loss_dict, rpn_loss, rpn_acc_all, rpn_acc_pos, \ rpn_acc_score_neg, rpn_acc_score_pos = self._rpn_model.loss(prediction_dict) losses_output, avod_acc_all, avod_acc_pos = avod_loss_builder.build( self, prediction_dict) classification_loss = \ losses_output[avod_loss_builder.KEY_CLASSIFICATION_LOSS] final_reg_loss = losses_output[avod_loss_builder.KEY_REGRESSION_LOSS] avod_loss = losses_output[avod_loss_builder.KEY_AVOD_LOSS] offset_loss_norm = \ losses_output[avod_loss_builder.KEY_OFFSET_LOSS_NORM] loss_dict.update({self.LOSS_FINAL_CLASSIFICATION: classification_loss}) loss_dict.update({self.LOSS_FINAL_REGRESSION: final_reg_loss}) # Add localization and orientation losses to loss dict for plotting loss_dict.update({self.LOSS_FINAL_LOCALIZATION: offset_loss_norm}) ang_loss_loss_norm = losses_output.get( avod_loss_builder.KEY_ANG_LOSS_NORM) if ang_loss_loss_norm is not None: loss_dict.update({self.LOSS_FINAL_ORIENTATION: ang_loss_loss_norm}) with tf.variable_scope('model_total_loss'): if self.TRAIN_REFINE: total_loss = rpn_loss + avod_loss else: total_loss = rpn_loss rpn_score_2d_loss = loss_dict[self._rpn_model.LOSS_RPN_SCORE_2D] rpn_class_loss = loss_dict[self._rpn_model.LOSS_RPN_OBJECTNESS] rpn_reg_loss = loss_dict[self._rpn_model.LOSS_RPN_REGRESSION] refine_class_loss = classification_loss refine_reg_loss = final_reg_loss return loss_dict, \ total_loss, \ rpn_score_2d_loss, \ rpn_acc_score_neg, \ rpn_acc_score_pos, \ rpn_class_loss, \ rpn_reg_loss, \ rpn_acc_all, \ rpn_acc_pos, \ refine_class_loss, \ refine_reg_loss, \ avod_acc_all, \ avod_acc_pos
class AvodModelDoubleFusionBEV(model.DetectionModel): ############################## # Keys for Predictions ############################## # Mini batch (mb) ground truth PRED_MB_CLASSIFICATIONS_GT = 'avod_mb_classifications_gt' PRED_MB_OFFSETS_GT = 'avod_mb_offsets_gt' PRED_MB_ORIENTATIONS_GT = 'avod_mb_orientations_gt' # Mini batch (mb) predictions PRED_MB_CLASSIFICATION_LOGITS = 'avod_mb_classification_logits' PRED_MB_CLASSIFICATION_SOFTMAX = 'avod_mb_classification_softmax' PRED_MB_OFFSETS = 'avod_mb_offsets' PRED_MB_ANGLE_VECTORS = 'avod_mb_angle_vectors' # Top predictions after BEV NMS PRED_TOP_CLASSIFICATION_LOGITS = 'avod_top_classification_logits' PRED_TOP_CLASSIFICATION_SOFTMAX = 'avod_top_classification_softmax' PRED_TOP_PREDICTION_ANCHORS = 'avod_top_prediction_anchors' PRED_TOP_PREDICTION_BOXES_3D = 'avod_top_prediction_boxes_3d' PRED_TOP_ORIENTATIONS = 'avod_top_orientations' # Other box representations PRED_TOP_BOXES_8C = 'avod_top_regressed_boxes_8c' PRED_TOP_BOXES_4C = 'avod_top_prediction_boxes_4c' # Mini batch (mb) predictions (for debugging) PRED_MB_MASK = 'avod_mb_mask' PRED_MB_POS_MASK = 'avod_mb_pos_mask' PRED_MB_ANCHORS_GT = 'avod_mb_anchors_gt' PRED_MB_CLASS_INDICES_GT = 'avod_mb_gt_classes' # All predictions (for debugging) PRED_ALL_CLASSIFICATIONS = 'avod_classifications' PRED_ALL_OFFSETS = 'avod_offsets' PRED_ALL_ANGLE_VECTORS = 'avod_angle_vectors' PRED_MAX_IOUS = 'avod_max_ious' PRED_ALL_IOUS = 'avod_anchor_ious' ############################## # Keys for Loss ############################## LOSS_FINAL_CLASSIFICATION = 'avod_classification_loss' LOSS_FINAL_REGRESSION = 'avod_regression_loss' # (for debugging) LOSS_FINAL_ORIENTATION = 'avod_orientation_loss' LOSS_FINAL_LOCALIZATION = 'avod_localization_loss' def __init__(self, model_config, train_val_test, dataset): """ Args: model_config: configuration for the model train_val_test: "train", "val", or "test" dataset: the dataset that will provide samples and ground truth """ # Sets model configs (_config) super(AvodModelDoubleFusionBEV, self).__init__(model_config) self.dataset = dataset # Dataset config self._num_final_classes = self.dataset.num_classes + 1 # Input config input_config = self._config.input_config self._bev_pixel_size = np.asarray( [input_config.bev_dims_h, input_config.bev_dims_w]) self._bev_depth = input_config.bev_depth self._img_pixel_size = np.asarray( [input_config.img_dims_h, input_config.img_dims_w]) self._img_depth = [input_config.img_depth] # AVOD config avod_config = self._config.avod_config self._proposal_roi_crop_size = \ [avod_config.avod_proposal_roi_crop_size] * 2 self._positive_selection = avod_config.avod_positive_selection self._nms_size = avod_config.avod_nms_size self._nms_iou_threshold = avod_config.avod_nms_iou_thresh self._path_drop_probabilities = self._config.path_drop_probabilities self._box_rep = avod_config.avod_box_representation if self._box_rep not in [ 'box_3d', 'box_8c', 'box_8co', 'box_4c', 'box_4ca' ]: raise ValueError('Invalid box representation', self._box_rep) # Create the RpnModel self._rpn_model = RpnModel(model_config, train_val_test, dataset) if train_val_test not in ["train", "val", "test"]: raise ValueError('Invalid train_val_test value,' 'should be one of ["train", "val", "test"]') self._train_val_test = train_val_test self._is_training = (self._train_val_test == 'train') self.sample_info = {} #################################################################################### # TODO PROJECT: scale the features to features with larger maximum values def scale_bev(self, bev_rois, img_rois): val_to_mul = tf.divide(self.max_img_feature_val, self.max_bev_feature_val) return tf.multiply(bev_rois, val_to_mul) def scale_img(self, bev_rois, img_rois): val_to_mul = tf.divide(self.max_bev_feature_val, self.max_img_feature_val) return tf.multiply(bev_rois, val_to_mul) #################################################################################### def build(self): rpn_model = self._rpn_model # Share the same prediction dict as RPN prediction_dict = rpn_model.build() top_anchors = prediction_dict[RpnModel.PRED_TOP_ANCHORS] ground_plane = rpn_model.placeholders[RpnModel.PL_GROUND_PLANE] class_labels = rpn_model.placeholders[RpnModel.PL_LABEL_CLASSES] with tf.variable_scope('avod_projection'): if self._config.expand_proposals_xz > 0.0: expand_length = self._config.expand_proposals_xz # Expand anchors along x and z with tf.variable_scope('expand_xz'): expanded_dim_x = top_anchors[:, 3] + expand_length expanded_dim_z = top_anchors[:, 5] + expand_length expanded_anchors = tf.stack([ top_anchors[:, 0], top_anchors[:, 1], top_anchors[:, 2], expanded_dim_x, top_anchors[:, 4], expanded_dim_z ], axis=1) avod_projection_in = expanded_anchors else: avod_projection_in = top_anchors with tf.variable_scope('bev'): # Project top anchors into bev and image spaces # bev_proposal_boxes are boxes' x and z coordinate relative to bev_extents # bev_proposal_boxes_norm are normalized boxes in bev_extents' range bev_proposal_boxes, bev_proposal_boxes_norm = \ anchor_projector.project_to_bev( avod_projection_in, self.dataset.kitti_utils.bev_extents) # Reorder projected boxes into [y1, x1, y2, x2] bev_proposal_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes) bev_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( bev_proposal_boxes_norm) with tf.variable_scope('img'): image_shape = tf.cast( tf.shape( rpn_model.placeholders[RpnModel.PL_IMG_INPUT])[0:2], tf.float32) img_proposal_boxes, img_proposal_boxes_norm = \ anchor_projector.tf_project_to_image_space( avod_projection_in, rpn_model.placeholders[RpnModel.PL_CALIB_P2], image_shape) # Only reorder the normalized img img_proposal_boxes_norm_tf_order = \ anchor_projector.reorder_projected_boxes( img_proposal_boxes_norm) bev_feature_maps = rpn_model.bev_feature_maps img_feature_maps = rpn_model.img_feature_maps if not (self._path_drop_probabilities[0] == self._path_drop_probabilities[1] == 1.0): with tf.variable_scope('avod_path_drop'): img_mask = rpn_model.img_path_drop_mask bev_mask = rpn_model.bev_path_drop_mask img_feature_maps = tf.multiply(img_feature_maps, img_mask) bev_feature_maps = tf.multiply(bev_feature_maps, bev_mask) else: bev_mask = tf.constant(1.0) img_mask = tf.constant(1.0) # ROI Pooling with tf.variable_scope('avod_roi_pooling'): def get_box_indices(boxes): proposals_shape = boxes.get_shape().as_list() if any(dim is None for dim in proposals_shape): proposals_shape = tf.shape(boxes) ones_mat = tf.ones(proposals_shape[:2], dtype=tf.int32) multiplier = tf.expand_dims( tf.range(start=0, limit=proposals_shape[0]), 1) return tf.reshape(ones_mat * multiplier, [-1]) bev_boxes_norm_batches = tf.expand_dims(bev_proposal_boxes_norm, axis=0) # These should be all 0's since there is only 1 image tf_box_indices = get_box_indices(bev_boxes_norm_batches) # Do ROI Pooling on BEV # tf_box_indices contains 1D tensor with size [num_boxes], each element specifies # batch index to whom this box belongs. Because the batch size here is 1, so it # doesn't matter # bev_rois is a 4-D tensor of shape [num_boxes, crop_height, crop_width, depth] #################################################################################### # TODO PROJECT: set bev_feature_maps or img_feature_maps to zeros for testing # bev_feature_maps = tf.zeros_like(bev_feature_maps) # self.bev_feature_maps = tf.zeros_like(bev_feature_maps) # bev_feature_maps = self.bev_feature_maps #################################################################################### bev_rois = tf.image.crop_and_resize( bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='bev_rois') # Do ROI Pooling on image img_rois = tf.image.crop_and_resize( img_feature_maps, img_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='img_rois') #################################################################################### # TODO PROJECT: create member variables for accessing # bev_rois4moe = tf.image.crop_and_resize( # bev_feature_maps, # bev_proposal_boxes_norm_tf_order, # tf_box_indices, # [28,28], # name='bev_rois4moe') # # Do ROI Pooling on image # img_rois4moe = tf.image.crop_and_resize( # img_feature_maps, # img_proposal_boxes_norm_tf_order, # tf_box_indices, # [28,28], # name='img_rois4moe') #################################################################################### #################################################################################### # TODO PROJECT: create member variables for accessing # self.bev_rois = bev_rois # self.img_rois = img_rois self.bev_boxes = bev_proposal_boxes_tf_order self.bev_boxes_norm = bev_proposal_boxes_norm self.img_boxes = img_proposal_boxes self.img_boxes_norm = img_proposal_boxes_norm # self.bev_mask = rpn_model.bev_path_drop_mask # self.img_mask = rpn_model.img_path_drop_mask #################################################################################### #################################################################################### # TODO PROJECT: scale the features to features with larger maximum values # self.max_img_feature_val = tf.reduce_max(img_rois, axis=None) # self.max_bev_feature_val = tf.reduce_max(bev_rois, axis=None) # # bev_rois_moe = tf.cond(tf.greater(self.max_img_feature_val, self.max_bev_feature_val), # lambda: self.scale_bev(bev_rois, img_rois), # lambda: bev_rois) # img_rois_moe = tf.cond(tf.greater(self.max_bev_feature_val, self.max_img_feature_val), # lambda: self.scale_img(bev_rois, img_rois), # lambda: img_rois) #################################################################################### #################################################################################### # TODO PROJECT: insert code here to add mixture of experts # self._moe_model = MoeModel(img_rois, bev_rois, img_proposal_boxes, bev_proposal_boxes) # self._moe_model = MoeModel(img_feature_maps, bev_feature_maps, img_proposal_boxes, bev_proposal_boxes) # self._moe_model._set_up_input_pls() # self.moe_prediction = self._moe_model.build() #################################################################################### #################################################################################### # TODO PROJECT: weight the feature before average img and bev # img_weights = tf.reshape(self.moe_prediction['img_weight'],[-1,1,1,1]) # bev_weights = tf.reshape(self.moe_prediction['bev_weight'],[-1,1,1,1]) # img_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32) # bev_weights = 0.5 * tf.ones([1024,1,1,1], tf.float32) # weighted_img_rois = tf.multiply(img_weights,img_rois) # weighted_bev_rois = tf.multiply(bev_weights,bev_rois) #################################################################################### #################################################################################### # TODO PROJECT: create fused bev _, bev_mar_boxes_norm = cf.add_margin_to_regions( bev_proposal_boxes, self.dataset.kitti_utils.bev_extents) bev_pixels_loc = cf.bev_pixel_eq_1_loc( self._rpn_model._bev_preprocessed) max_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_hi min_height = self.dataset.config.kitti_utils_config.bev_generator.slices.height_lo num_slices = self.dataset.config.kitti_utils_config.bev_generator.slices.num_slices height_list = [ min_height + (2 * x + 1) * (max_height - min_height) / (2.0 * num_slices) for x in range(num_slices) ] print("bev_preprocess shape: ", (self._rpn_model._bev_preprocessed).shape) velo_pc = cf.bev_pixel_loc_to_3d_velo( bev_pixels_loc, tf.shape(self._rpn_model._bev_preprocessed)[1:3], height_list, self.dataset.kitti_utils.bev_extents) print("PL_CALIB_P2 shape: ", self._rpn_model.placeholders[RpnModel.PL_CALIB_P2].shape) p_2d = anchor_projector.project_to_image_tensor( tf.transpose(tf.cast(velo_pc, tf.float32)), self._rpn_model.placeholders[RpnModel.PL_CALIB_P2]) print("image feature maps [0] shape: ", img_feature_maps[0].shape) features_at_p_2d = tf.gather_nd( img_feature_maps[0], tf.cast(tf.round(tf.transpose(p_2d)), tf.int32)) print("features_at_p_2d shape: ", features_at_p_2d.shape) new_bev = cf.create_fused_bev( tf.shape(self._rpn_model._bev_preprocessed), bev_pixels_loc, features_at_p_2d) # raise Exception("finish fused_bev generation!") self._new_bev_feature_extractor = feature_extractor_builder.get_extractor( self.model_config.layers_config.bev_feature_extractor) self.new_bev_feature_maps, self.new_bev_end_points = \ self._new_bev_feature_extractor.build( new_bev, self._bev_pixel_size, self._is_training, scope='new_bev_vgg' ) new_bev_rois = tf.image.crop_and_resize( self.new_bev_feature_maps, bev_proposal_boxes_norm_tf_order, tf_box_indices, self._proposal_roi_crop_size, name='new_bev_rois') #################################################################################### # Fully connected layers (Box Predictor) avod_layers_config = self.model_config.layers_config.avod_config # fc_output_layers = \ # avod_fc_layers_builder.build( # layers_config=avod_layers_config, # input_rois=[bev_rois, img_rois], # input_weights=[bev_mask, img_mask], # num_final_classes=self._num_final_classes, # box_rep=self._box_rep, # top_anchors=top_anchors, # ground_plane=ground_plane, # is_training=self._is_training) #################################################################################### # TODO PROJECT: average img and bev features first and then concat with new bev rois_sum = tf.reduce_sum([bev_rois, img_rois], axis=0) rois_mean = tf.divide(rois_sum, tf.reduce_sum([bev_mask, img_mask])) fc_output_layers = \ avod_fc_layers_builder.build( layers_config=avod_layers_config, input_rois=[rois_mean, new_bev_rois], input_weights=[1, img_mask], num_final_classes=self._num_final_classes, box_rep=self._box_rep, top_anchors=top_anchors, ground_plane=ground_plane, is_training=self._is_training) #################################################################################### #################################################################################### # TODO PROJECT: input weighted bev_rois and img_rois to output layer # fc_output_layers = \ # avod_fc_layers_builder.build( # layers_config=avod_layers_config, # input_rois=[weighted_bev_rois, weighted_img_rois], # input_weights=[bev_mask * bev_weights, img_mask * img_weights], # num_final_classes=self._num_final_classes, # box_rep=self._box_rep, # top_anchors=top_anchors, # ground_plane=ground_plane, # is_training=self._is_training) #################################################################################### all_cls_logits = \ fc_output_layers[avod_fc_layers_builder.KEY_CLS_LOGITS] all_offsets = fc_output_layers[avod_fc_layers_builder.KEY_OFFSETS] # This may be None all_angle_vectors = \ fc_output_layers.get(avod_fc_layers_builder.KEY_ANGLE_VECTORS) with tf.variable_scope('softmax'): all_cls_softmax = tf.nn.softmax(all_cls_logits) ###################################################### # Subsample mini_batch for the loss function ###################################################### # Get the ground truth tensors anchors_gt = rpn_model.placeholders[RpnModel.PL_LABEL_ANCHORS] if self._box_rep in ['box_3d', 'box_4ca']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] orientations_gt = boxes_3d_gt[:, 6] elif self._box_rep in ['box_8c', 'box_8co', 'box_4c']: boxes_3d_gt = rpn_model.placeholders[RpnModel.PL_LABEL_BOXES_3D] else: raise NotImplementedError('Ground truth tensors not implemented') # Project anchor_gts to 2D bev with tf.variable_scope('avod_gt_projection'): bev_anchor_boxes_gt, _ = anchor_projector.project_to_bev( anchors_gt, self.dataset.kitti_utils.bev_extents) bev_anchor_boxes_gt_tf_order = \ anchor_projector.reorder_projected_boxes(bev_anchor_boxes_gt) with tf.variable_scope('avod_box_list'): # Convert to box_list format anchor_box_list_gt = box_list.BoxList(bev_anchor_boxes_gt_tf_order) anchor_box_list = box_list.BoxList(bev_proposal_boxes_tf_order) mb_mask, mb_class_label_indices, mb_gt_indices = \ self.sample_mini_batch( anchor_box_list_gt=anchor_box_list_gt, anchor_box_list=anchor_box_list, class_labels=class_labels) # Create classification one_hot vector with tf.variable_scope('avod_one_hot_classes'): mb_classification_gt = tf.one_hot( mb_class_label_indices, depth=self._num_final_classes, on_value=1.0 - self._config.label_smoothing_epsilon, off_value=(self._config.label_smoothing_epsilon / self.dataset.num_classes)) # TODO: Don't create a mini batch in test mode # Mask predictions with tf.variable_scope('avod_apply_mb_mask'): # Classification mb_classifications_logits = tf.boolean_mask( all_cls_logits, mb_mask) mb_classifications_softmax = tf.boolean_mask( all_cls_softmax, mb_mask) # Offsets mb_offsets = tf.boolean_mask(all_offsets, mb_mask) # Angle Vectors if all_angle_vectors is not None: mb_angle_vectors = tf.boolean_mask(all_angle_vectors, mb_mask) else: mb_angle_vectors = None # Encode anchor offsets with tf.variable_scope('avod_encode_mb_anchors'): mb_anchors = tf.boolean_mask(top_anchors, mb_mask) if self._box_rep == 'box_3d': # Gather corresponding ground truth anchors for each mb sample mb_anchors_gt = tf.gather(anchors_gt, mb_gt_indices) mb_offsets_gt = anchor_encoder.tf_anchor_to_offset( mb_anchors, mb_anchors_gt) # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) elif self._box_rep in ['box_8c', 'box_8co']: # Get boxes_3d ground truth mini-batch and convert to box_8c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) if self._box_rep == 'box_8c': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8c(mb_boxes_3d_gt) elif self._box_rep == 'box_8co': mb_boxes_8c_gt = \ box_8c_encoder.tf_box_3d_to_box_8co(mb_boxes_3d_gt) # Convert proposals: anchors -> box_3d -> box8c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_8c = \ box_8c_encoder.tf_box_3d_to_box_8c(proposal_boxes_3d) # Get mini batch offsets mb_boxes_8c = tf.boolean_mask(proposal_boxes_8c, mb_mask) mb_offsets_gt = box_8c_encoder.tf_box_8c_to_offsets( mb_boxes_8c, mb_boxes_8c_gt) # Flatten the offsets to a (N x 24) vector mb_offsets_gt = tf.reshape(mb_offsets_gt, [-1, 24]) elif self._box_rep in ['box_4c', 'box_4ca']: # Get ground plane for box_4c conversion ground_plane = self._rpn_model.placeholders[ self._rpn_model.PL_GROUND_PLANE] # Convert gt boxes_3d -> box_4c mb_boxes_3d_gt = tf.gather(boxes_3d_gt, mb_gt_indices) mb_boxes_4c_gt = box_4c_encoder.tf_box_3d_to_box_4c( mb_boxes_3d_gt, ground_plane) # Convert proposals: anchors -> box_3d -> box_4c proposal_boxes_3d = \ box_3d_encoder.anchors_to_box_3d(top_anchors, fix_lw=True) proposal_boxes_4c = \ box_4c_encoder.tf_box_3d_to_box_4c(proposal_boxes_3d, ground_plane) # Get mini batch mb_boxes_4c = tf.boolean_mask(proposal_boxes_4c, mb_mask) mb_offsets_gt = box_4c_encoder.tf_box_4c_to_offsets( mb_boxes_4c, mb_boxes_4c_gt) if self._box_rep == 'box_4ca': # Gather corresponding ground truth orientation for each # mb sample mb_orientations_gt = tf.gather(orientations_gt, mb_gt_indices) else: raise NotImplementedError( 'Anchor encoding not implemented for', self._box_rep) ###################################################### # ROI summary images ###################################################### avod_mini_batch_size = \ self.dataset.kitti_utils.mini_batch_utils.avod_mini_batch_size with tf.variable_scope('bev_avod_rois'): mb_bev_anchors_norm = tf.boolean_mask( bev_proposal_boxes_norm_tf_order, mb_mask) mb_bev_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Show the ROIs of the BEV input density map # for the mini batch anchors bev_input_rois = tf.image.crop_and_resize( self._rpn_model._bev_preprocessed, mb_bev_anchors_norm, mb_bev_box_indices, (32, 32)) bev_input_roi_summary_images = tf.split(bev_input_rois, self._bev_depth, axis=3) tf.summary.image('bev_avod_rois', bev_input_roi_summary_images[-1], max_outputs=avod_mini_batch_size) with tf.variable_scope('img_avod_rois'): # ROIs on image input mb_img_anchors_norm = tf.boolean_mask( img_proposal_boxes_norm_tf_order, mb_mask) mb_img_box_indices = tf.zeros_like(mb_gt_indices, dtype=tf.int32) # Do test ROI pooling on mini batch img_input_rois = tf.image.crop_and_resize( self._rpn_model._img_preprocessed, mb_img_anchors_norm, mb_img_box_indices, (32, 32)) tf.summary.image('img_avod_rois', img_input_rois, max_outputs=avod_mini_batch_size) ###################################################### # Final Predictions ###################################################### # Get orientations from angle vectors if all_angle_vectors is not None: with tf.variable_scope('avod_orientation'): all_orientations = \ orientation_encoder.tf_angle_vector_to_orientation( all_angle_vectors) # Apply offsets to regress proposals with tf.variable_scope('avod_regression'): if self._box_rep == 'box_3d': prediction_anchors = \ anchor_encoder.offset_to_anchor(top_anchors, all_offsets) elif self._box_rep in ['box_8c', 'box_8co']: # Reshape the 24-dim regressed offsets to (N x 3 x 8) reshaped_offsets = tf.reshape(all_offsets, [-1, 3, 8]) # Given the offsets, get the boxes_8c prediction_boxes_8c = \ box_8c_encoder.tf_offsets_to_box_8c(proposal_boxes_8c, reshaped_offsets) # Convert corners back to box3D prediction_boxes_3d = \ box_8c_encoder.box_8c_to_box_3d(prediction_boxes_8c) # Convert the box_3d to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) elif self._box_rep in ['box_4c', 'box_4ca']: # Convert predictions box_4c -> box_3d prediction_boxes_4c = \ box_4c_encoder.tf_offsets_to_box_4c(proposal_boxes_4c, all_offsets) prediction_boxes_3d = \ box_4c_encoder.tf_box_4c_to_box_3d(prediction_boxes_4c, ground_plane) # Convert to anchor format for nms prediction_anchors = \ box_3d_encoder.tf_box_3d_to_anchor(prediction_boxes_3d) else: raise NotImplementedError('Regression not implemented for', self._box_rep) # Apply Non-oriented NMS in BEV with tf.variable_scope('avod_nms'): bev_extents = self.dataset.kitti_utils.bev_extents with tf.variable_scope('bev_projection'): # Project predictions into BEV avod_bev_boxes, _ = anchor_projector.project_to_bev( prediction_anchors, bev_extents) avod_bev_boxes_tf_order = \ anchor_projector.reorder_projected_boxes( avod_bev_boxes) # Get top score from second column onward all_top_scores = tf.reduce_max(all_cls_logits[:, 1:], axis=1) # Apply NMS in BEV nms_indices = tf.image.non_max_suppression( avod_bev_boxes_tf_order, all_top_scores, max_output_size=self._nms_size, iou_threshold=self._nms_iou_threshold) # Gather predictions from NMS indices top_classification_logits = tf.gather(all_cls_logits, nms_indices) top_classification_softmax = tf.gather(all_cls_softmax, nms_indices) top_prediction_anchors = tf.gather(prediction_anchors, nms_indices) if self._box_rep == 'box_3d': top_orientations = tf.gather(all_orientations, nms_indices) elif self._box_rep in ['box_8c', 'box_8co']: top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_8c = tf.gather(prediction_boxes_8c, nms_indices) elif self._box_rep == 'box_4c': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) elif self._box_rep == 'box_4ca': top_prediction_boxes_3d = tf.gather(prediction_boxes_3d, nms_indices) top_prediction_boxes_4c = tf.gather(prediction_boxes_4c, nms_indices) top_orientations = tf.gather(all_orientations, nms_indices) else: raise NotImplementedError('NMS gather not implemented for', self._box_rep) if self._train_val_test in ['train', 'val']: # Additional entries are added to the shared prediction_dict # Mini batch predictions prediction_dict[self.PRED_MB_CLASSIFICATION_LOGITS] = \ mb_classifications_logits prediction_dict[self.PRED_MB_CLASSIFICATION_SOFTMAX] = \ mb_classifications_softmax prediction_dict[self.PRED_MB_OFFSETS] = mb_offsets # Mini batch ground truth prediction_dict[self.PRED_MB_CLASSIFICATIONS_GT] = \ mb_classification_gt prediction_dict[self.PRED_MB_OFFSETS_GT] = mb_offsets_gt # Top NMS predictions prediction_dict[self.PRED_TOP_CLASSIFICATION_LOGITS] = \ top_classification_logits prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors # Mini batch predictions (for debugging) prediction_dict[self.PRED_MB_MASK] = mb_mask # prediction_dict[self.PRED_MB_POS_MASK] = mb_pos_mask prediction_dict[self.PRED_MB_CLASS_INDICES_GT] = \ mb_class_label_indices # All predictions (for debugging) prediction_dict[self.PRED_ALL_CLASSIFICATIONS] = \ all_cls_logits prediction_dict[self.PRED_ALL_OFFSETS] = all_offsets # Path drop masks (for debugging) prediction_dict['bev_mask'] = bev_mask prediction_dict['img_mask'] = img_mask else: # self._train_val_test == 'test' prediction_dict[self.PRED_TOP_CLASSIFICATION_SOFTMAX] = \ top_classification_softmax prediction_dict[self.PRED_TOP_PREDICTION_ANCHORS] = \ top_prediction_anchors if self._box_rep == 'box_3d': prediction_dict[self.PRED_MB_ANCHORS_GT] = mb_anchors_gt prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations # For debugging prediction_dict[self.PRED_ALL_ANGLE_VECTORS] = all_angle_vectors # 8c means 8 corners elif self._box_rep in ['box_8c', 'box_8co']: prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d # Store the corners before converting for visualization purposes prediction_dict[self.PRED_TOP_BOXES_8C] = top_prediction_boxes_8c # 4c means 4 corners elif self._box_rep == 'box_4c': prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c elif self._box_rep == 'box_4ca': if self._train_val_test in ['train', 'val']: prediction_dict[self.PRED_MB_ORIENTATIONS_GT] = \ mb_orientations_gt prediction_dict[self.PRED_MB_ANGLE_VECTORS] = mb_angle_vectors prediction_dict[self.PRED_TOP_PREDICTION_BOXES_3D] = \ top_prediction_boxes_3d prediction_dict[self.PRED_TOP_BOXES_4C] = top_prediction_boxes_4c prediction_dict[self.PRED_TOP_ORIENTATIONS] = top_orientations else: raise NotImplementedError('Prediction dict not implemented for', self._box_rep) # prediction_dict[self.PRED_MAX_IOUS] = max_ious # prediction_dict[self.PRED_ALL_IOUS] = all_ious return prediction_dict def sample_mini_batch(self, anchor_box_list_gt, anchor_box_list, class_labels): with tf.variable_scope('avod_create_mb_mask'): # Get IoU for every anchor all_ious = box_list_ops.iou(anchor_box_list_gt, anchor_box_list) max_ious = tf.reduce_max(all_ious, axis=0) max_iou_indices = tf.argmax(all_ious, axis=0) # Sample a pos/neg mini-batch from anchors with highest IoU match mini_batch_utils = self.dataset.kitti_utils.mini_batch_utils mb_mask, mb_pos_mask = mini_batch_utils.sample_avod_mini_batch( max_ious) mb_class_label_indices = mini_batch_utils.mask_class_label_indices( mb_pos_mask, mb_mask, max_iou_indices, class_labels) mb_gt_indices = tf.boolean_mask(max_iou_indices, mb_mask) return mb_mask, mb_class_label_indices, mb_gt_indices def create_feed_dict(self): feed_dict = self._rpn_model.create_feed_dict() self.sample_info = self._rpn_model.sample_info return feed_dict def loss(self, prediction_dict): # Note: The loss should be using mini-batch values only loss_dict, rpn_loss = self._rpn_model.loss(prediction_dict) losses_output = avod_loss_builder.build(self, prediction_dict) classification_loss = \ losses_output[avod_loss_builder.KEY_CLASSIFICATION_LOSS] final_reg_loss = losses_output[avod_loss_builder.KEY_REGRESSION_LOSS] avod_loss = losses_output[avod_loss_builder.KEY_AVOD_LOSS] offset_loss_norm = \ losses_output[avod_loss_builder.KEY_OFFSET_LOSS_NORM] loss_dict.update({self.LOSS_FINAL_CLASSIFICATION: classification_loss}) loss_dict.update({self.LOSS_FINAL_REGRESSION: final_reg_loss}) # Add localization and orientation losses to loss dict for plotting loss_dict.update({self.LOSS_FINAL_LOCALIZATION: offset_loss_norm}) ang_loss_loss_norm = losses_output.get( avod_loss_builder.KEY_ANG_LOSS_NORM) if ang_loss_loss_norm is not None: loss_dict.update({self.LOSS_FINAL_ORIENTATION: ang_loss_loss_norm}) with tf.variable_scope('model_total_loss'): total_loss = rpn_loss + avod_loss ################################################################################ # TODO PROJECT: plot weight ################################################################################ return loss_dict, total_loss
def test(model_config, eval_config, dataset_config, data_split, ckpt_indices): # Overwrite the defaults dataset_config = config_builder.proto_to_obj(dataset_config) dataset_config.data_split = data_split dataset_config.data_split_dir = 'training' if data_split == 'test': dataset_config.data_split_dir = 'testing' eval_config.eval_mode = 'test' eval_config.evaluate_repeatedly = False dataset_config.has_labels = False # Enable this to see the actually memory being used eval_config.allow_gpu_mem_growth = True eval_config = config_builder.proto_to_obj(eval_config) # Grab the checkpoint indices to evaluate eval_config.ckpt_indices = ckpt_indices # Remove augmentation during evaluation in test mode dataset_config.aug_list = [] # Build the dataset object dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Setup the model model_name = model_config.model_name # Overwrite repeated field model_config = config_builder.proto_to_obj(model_config) # Switch path drop off during evaluation model_config.path_drop_probabilities = [1.0, 1.0] with tf.Graph().as_default(): if model_name == 'avod_model': model = AvodModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) elif model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=eval_config.eval_mode, dataset=dataset) else: raise ValueError('Invalid model name {}'.format(model_name)) #model_evaluator = Evaluator(model, dataset_config, eval_config) #model_evaluator.run_latest_checkpoints() # Create a variable tensor to hold the global step global_step_tensor = tf.Variable(0, trainable=False, name='global_step') allow_gpu_mem_growth = eval_config.allow_gpu_mem_growth if allow_gpu_mem_growth: # GPU memory config config = tf.ConfigProto() config.gpu_options.allow_growth = allow_gpu_mem_growth _sess = tf.Session(config=config) else: _sess = tf.Session() _prediction_dict = model.build() _saver = tf.train.Saver() trainer_utils.load_checkpoints(model_config.paths_config.checkpoint_dir, _saver) num_checkpoints = len(_saver.last_checkpoints) print("test:",num_checkpoints) checkpoint_to_restore = _saver.last_checkpoints[num_checkpoints-1] _saver.restore(_sess, checkpoint_to_restore) num_samples = model.dataset.num_samples num_valid_samples = 0 current_epoch = model.dataset.epochs_completed while current_epoch == model.dataset.epochs_completed: # Keep track of feed_dict speed start_time = time.time() feed_dict = model.create_feed_dict() feed_dict_time = time.time() - start_time # Get sample name from model sample_name = model.sample_info['sample_name'] num_valid_samples += 1 print("Step: {} / {}, Inference on sample {}".format( num_valid_samples, num_samples, sample_name)) print("test mode") inference_start_time = time.time() # Don't calculate loss or run summaries for test predictions = _sess.run(_prediction_dict, feed_dict=feed_dict) inference_time = time.time() - inference_start_time print("inference time:", inference_time) predictions_and_scores = get_avod_predicted_boxes_3d_and_scores(predictions) #print(predictions_and_scores) #im_path = os.path.join(dataset_dir, 'training/image_2/{:06d}.png'.format(img_idx)) #im = cv2.imread(im_path) #cv2.imshow('result',im) #cv2.waitKey(30) prediction_boxes_3d = predictions_and_scores[:, 0:7] prediction_scores = predictions_and_scores[:, 7] prediction_class_indices = predictions_and_scores[:, 8] gt_classes = ['Car'] fig_size = (10, 6.1) avod_score_threshold = 0.1 if len(prediction_boxes_3d) > 0: # Apply score mask avod_score_mask = prediction_scores >= avod_score_threshold prediction_boxes_3d = prediction_boxes_3d[avod_score_mask] prediction_scores = prediction_scores[avod_score_mask] prediction_class_indices = \ prediction_class_indices[avod_score_mask] if len(prediction_boxes_3d) > 0: dataset_dir = model.dataset.dataset_dir sample_name = (model.dataset.sample_names[model.dataset._index_in_epoch - 1]) img_idx = int(sample_name) print("frame_index",img_idx) image_path = model.dataset.get_rgb_image_path(sample_name) image = Image.open(image_path) image_size = image.size if model.dataset.has_labels: gt_objects = obj_utils.read_labels(dataset.label_dir, img_idx) else: gt_objects = [] filtered_gt_objs = model.dataset.kitti_utils.filter_labels( gt_objects, classes=gt_classes) stereo_calib = calib_utils.read_calibration(dataset.calib_dir, img_idx) calib_p2 = stereo_calib.p2 # Project the 3D box predictions to image space image_filter = [] final_boxes_2d = [] for i in range(len(prediction_boxes_3d)): box_3d = prediction_boxes_3d[i, 0:7] img_box = box_3d_projector.project_to_image_space( box_3d, calib_p2, truncate=True, image_size=image_size, discard_before_truncation=False) if img_box is not None: image_filter.append(True) final_boxes_2d.append(img_box) else: image_filter.append(False) final_boxes_2d = np.asarray(final_boxes_2d) final_prediction_boxes_3d = prediction_boxes_3d[image_filter] final_scores = prediction_scores[image_filter] final_class_indices = prediction_class_indices[image_filter] num_of_predictions = final_boxes_2d.shape[0] # Convert to objs final_prediction_objs = \ [box_3d_encoder.box_3d_to_object_label( prediction, obj_type='Prediction') for prediction in final_prediction_boxes_3d] for (obj, score) in zip(final_prediction_objs, final_scores): obj.score = score pred_fig, pred_2d_axes, pred_3d_axes = \ vis_utils.visualization(dataset.rgb_image_dir, img_idx, display=False, fig_size=fig_size) draw_predictions(filtered_gt_objs, calib_p2, num_of_predictions, final_prediction_objs, final_class_indices, final_boxes_2d, pred_2d_axes, pred_3d_axes, True, True, gt_classes, False) #cv2.imshow('result',pred_fig) print(type(pred_fig)) pred_fig.canvas.draw() img = np.fromstring(pred_fig.canvas.tostring_rgb(), dtype=np.uint8, sep='') img = img.reshape(pred_fig.canvas.get_width_height()[::-1] + (3,)) cv2.imshow('result',img) #draw bird view kitti_utils = model.dataset.kitti_utils print(img.shape[0:2]) point_cloud = kitti_utils.get_point_cloud( 'lidar', img_idx, (370, 1242)) ground_plane = kitti_utils.get_ground_plane(sample_name) bev_images = kitti_utils.create_bev_maps(point_cloud, ground_plane) density_map = np.array(bev_images.get("density_map")) _, box_points_norm = box_3d_projector.project_to_bev( final_prediction_boxes_3d, [[-40, 40], [0, 70]]) density_map = draw_boxes(density_map, box_points_norm) cv2.imshow('lidar',density_map) cv2.waitKey(-1)
def test_create_path_drop_masks(self): # Tests creating path drop choices # based on the given probabilities rpn_model = RpnModel(self.model_config, train_val_test="val", dataset=self.dataset) rpn_model.build() ################################## # Test-Case 1 : Keep img, Keep bev ################################## p_img = tf.constant(0.6) p_bev = tf.constant(0.85) # Set the random numbers for testing purposes rand_choice = [0.53, 0.83, 0.05] rand_choice_tensor = tf.convert_to_tensor(rand_choice) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) with self.test_session(): img_mask_out = img_mask.eval() bev_mask_out = bev_mask.eval() np.testing.assert_array_equal(img_mask_out, 1.0) np.testing.assert_array_equal(bev_mask_out, 1.0) ################################## # Test-Case 2 : Kill img, Keep bev ################################## p_img = tf.constant(0.2) p_bev = tf.constant(0.85) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) with self.test_session(): img_mask_out = img_mask.eval() bev_mask_out = bev_mask.eval() np.testing.assert_array_equal(img_mask_out, 0.0) np.testing.assert_array_equal(bev_mask_out, 1.0) ################################## # Test-Case 3 : Keep img, Kill bev ################################## p_img = tf.constant(0.9) p_bev = tf.constant(0.1) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) with self.test_session(): img_mask_out = img_mask.eval() bev_mask_out = bev_mask.eval() np.testing.assert_array_equal(img_mask_out, 1.0) np.testing.assert_array_equal(bev_mask_out, 0.0) ############################################## # Test-Case 4 : Kill img, Kill bev, third flip ############################################## p_img = tf.constant(0.0) p_bev = tf.constant(0.1) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) with self.test_session(): img_mask_out = img_mask.eval() bev_mask_out = bev_mask.eval() np.testing.assert_array_equal(img_mask_out, 0.0) # Because of the third condition, we expect to be keeping bev np.testing.assert_array_equal(bev_mask_out, 1.0) ############################################## # Test-Case 5 : Kill img, Kill bev, third flip ############################################## # Let's flip the third chance and keep img instead rand_choice = [0.53, 0.83, 0.61] rand_choice_tensor = tf.convert_to_tensor(rand_choice) p_img = tf.constant(0.0) p_bev = tf.constant(0.1) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) with self.test_session(): img_mask_out = img_mask.eval() bev_mask_out = bev_mask.eval() # Because of the third condition, we expect to be keeping img np.testing.assert_array_equal(img_mask_out, 1.0) np.testing.assert_array_equal(bev_mask_out, 0.0)
def test_load_model_weights(self): # Tests loading weights train_val_test = 'train' # Overwrite the training iterations self.train_config.max_iterations = 1 self.train_config.overwrite_checkpoints = True with tf.Graph().as_default(): model = RpnModel(self.model_config, train_val_test=train_val_test, dataset=self.dataset) trainer.train(model, self.train_config) paths_config = self.model_config.paths_config rpn_checkpoint_dir = paths_config.checkpoint_dir # load the weights back in init_op = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) trainer_utils.load_checkpoints(rpn_checkpoint_dir, saver) checkpoint_to_restore = saver.last_checkpoints[-1] trainer_utils.load_model_weights(sess, checkpoint_to_restore) rpn_vars = slim.get_model_variables() rpn_weights = sess.run(rpn_vars) self.assertGreater(len(rpn_weights), 0, msg='Loaded RPN weights are empty') with tf.Graph().as_default(): model = AvodModel(self.model_config, train_val_test=train_val_test, dataset=self.dataset) model.build() # load the weights back in init_op = tf.global_variables_initializer() saver = tf.train.Saver() with tf.Session() as sess: sess.run(init_op) trainer_utils.load_checkpoints(rpn_checkpoint_dir, saver) checkpoint_to_restore = saver.last_checkpoints[-1] trainer_utils.load_model_weights(sess, checkpoint_to_restore) avod_vars = slim.get_model_variables() avod_weights = sess.run(avod_vars) # AVOD weights should include both RPN + AVOD weights self.assertGreater(len(avod_weights), len(rpn_weights), msg='Expected more weights for AVOD') # grab weights corresponding to RPN by index # since the model variables are ordered rpn_len = len(rpn_weights) loaded_rpn_vars = avod_vars[0:rpn_len] rpn_weights_reload = sess.run(loaded_rpn_vars) # Make sure the reloaded weights match the originally # loaded weights for i in range(rpn_len): np.testing.assert_array_equal(rpn_weights_reload[i], rpn_weights[i])
def evaluate(model_config, eval_config, dataset_config): # Parse eval config eval_mode = eval_config.eval_mode if eval_mode not in ['val', 'test']: raise ValueError('Evaluation mode can only be set to `val` or `test`') evaluate_repeatedly = eval_config.evaluate_repeatedly # Parse dataset config data_split = dataset_config.data_split if data_split == 'train': dataset_config.data_split_dir = 'training' dataset_config.has_labels = True elif data_split.startswith('val'): dataset_config.data_split_dir = 'training' # Don't load labels for val split when running in test mode if eval_mode == 'val': dataset_config.has_labels = True elif eval_mode == 'test': dataset_config.has_labels = False elif data_split == 'test': dataset_config.data_split_dir = 'testing' dataset_config.has_labels = False else: raise ValueError('Invalid data split', data_split) # Convert to object to overwrite repeated fields dataset_config = config_builder.proto_to_obj(dataset_config) # Remove augmentation during evaluation dataset_config.aug_list = [] # Build the dataset object dataset = DatasetBuilder.build_kitti_dataset(dataset_config, use_defaults=False) # Setup the model model_name = model_config.model_name # Convert to object to overwrite repeated fields model_config = config_builder.proto_to_obj(model_config) # Switch path drop off during evaluation model_config.path_drop_probabilities = [1.0, 1.0] with tf.Graph().as_default(): if model_name == 'avod_model': model = AvodModel(model_config, train_val_test=eval_mode, dataset=dataset) elif model_name == 'rpn_model': model = RpnModel(model_config, train_val_test=eval_mode, dataset=dataset) else: raise ValueError('Invalid model name {}'.format(model_name)) model_evaluator = Evaluator(model, dataset_config, eval_config) if evaluate_repeatedly: model_evaluator.repeated_checkpoint_run() else: model_evaluator.run_latest_checkpoints()
def test_path_drop_input_multiplication(self): # Tests the result of final image/bev inputs # based on the path drop decisions rpn_model = RpnModel(self.model_config, train_val_test="val", dataset=self.dataset) rpn_model.build() # Shape of input feature map dummy_img_feature_shape = [1, 30, 50, 2] random_values = np.random.randint(low=1.0, high=256.0, size=2).astype(np.float32) dummy_img_feature_map = tf.fill(dummy_img_feature_shape, random_values[0]) # Assume both features map are the same size, this is not # the case inside the network dummy_bev_feature_map = tf.fill(dummy_img_feature_shape, random_values[1]) ################################## # Test-Case 1 : Keep img, Kill bev ################################## exp_img_input = np.full(dummy_img_feature_shape, random_values[0]) exp_bev_input = np.full(dummy_img_feature_shape, 0.0) p_img = tf.constant(0.6) p_bev = tf.constant(0.4) # Set the random numbers for testing purposes rand_choice = [0.53, 0.83, 0.05] rand_choice_tensor = tf.convert_to_tensor(rand_choice) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) final_img_input = tf.multiply(dummy_img_feature_map, img_mask) final_bev_input = tf.multiply(dummy_bev_feature_map, bev_mask) with self.test_session(): final_img_input_out = final_img_input.eval() final_bev_input_out = final_bev_input.eval() np.testing.assert_array_equal(final_img_input_out, exp_img_input) np.testing.assert_array_equal(final_bev_input_out, exp_bev_input) ################################## # Test-Case 2 : Kill img, Keep bev ################################## exp_img_input = np.full(dummy_img_feature_shape, 0) exp_bev_input = np.full(dummy_img_feature_shape, random_values[1]) p_img = tf.constant(0.4) p_bev = tf.constant(0.9) img_mask, bev_mask = rpn_model.create_path_drop_masks( p_img, p_bev, rand_choice_tensor) final_img_input = tf.multiply(dummy_img_feature_map, img_mask) final_bev_input = tf.multiply(dummy_bev_feature_map, bev_mask) with self.test_session(): final_img_input_out = final_img_input.eval() final_bev_input_out = final_bev_input.eval() np.testing.assert_array_equal(final_img_input_out, exp_img_input) np.testing.assert_array_equal(final_bev_input_out, exp_bev_input)