def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Produce image detection predictions. Parameters ---------- inputs : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, and bounding box for each image. Returns ------- outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as a string (8 coordinate format), and confidence scores. """ iou_threshold = ( 0.5 # Bounding box overlap threshold for false positive or true positive ) score_threshold = 0.05 # The score confidence threshold to use for detections max_detections = 100 # Maxmimum number of detections to use per image # Create object that stores backbone information backbone = models.backbone(self.hyperparams["backbone"]) # Create the generators train_generator = CSVGenerator( self.annotations, self.classes, self.base_dir, self.hyperparams["batch_size"], backbone.preprocess_image, shuffle_groups=False, ) # Instantiate model model, training_model, prediction_model = self._create_models( backbone_retinanet=backbone.retinanet, num_classes=train_generator.num_classes(), lr=self.hyperparams["learning_rate"], ) # Load model weights saved in fit training_model.load_weights(self.hyperparams["weights_path"] + "model_weights.h5") # Convert training model to inference model inference_model = models.convert_model(training_model) # Generate image paths image_cols = inputs.metadata.get_columns_with_semantic_type( "https://metadata.datadrivendiscovery.org/types/FileName") self.base_dir = [ inputs.metadata.query( (metadata_base.ALL_ELEMENTS, t))["location_base_uris"][0].replace("file:///", "/") for t in image_cols ] self.image_paths = np.array([[ os.path.join(self.base_dir, filename) for filename in inputs.iloc[:, col] ] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten() self.image_paths = pd.Series(self.image_paths) # Initialize output objects box_list = [] score_list = [] image_name_list = [] # Predict bounding boxes and confidence scores for each image image_list = [ x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i ] start_time = time.time() logger.info("Starting testing...") for i in image_list: image = read_image_bgr(i) # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) boxes, scores, labels = inference_model.predict_on_batch( tf.constant(np.expand_dims(image, axis=0), dtype=tf.float32)) # correct for image scale boxes /= scale for box, score in zip(boxes[0], scores[0]): if score < 0.5: break b = box.astype(int) box_list.append(b) score_list.append(score) image_name_list.append(i * len(b)) logger.info( f"Testing complete. Testing took {time.time()-start_time} seconds." ) ## Convert predicted boxes from a list of arrays to a list of strings boxes = np.array(box_list).tolist() boxes = list( map(lambda x: [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes)) # Convert to 8 coordinate format for D3M boxes = list(map(lambda x: ",".join(map(str, x)), boxes)) # Create mapping between image names and D3M index input_df = pd.DataFrame({ "d3mIndex": inputs.d3mIndex, "image": [os.path.basename(list) for list in self.image_paths], }) d3mIdx_image_mapping = input_df.set_index("image").T.to_dict("list") # Extract values for image name keys and get missing image predictions (if they exist) image_name_list = [os.path.basename(list) for list in image_name_list] d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list] empty_predictions_image_names = [ k for k, v in d3mIdx_image_mapping.items() if v not in d3mIdx ] d3mIdx = [item for sublist in d3mIdx for item in sublist] # Flatten list of lists ## Assemble in a Pandas DataFrame results = pd.DataFrame({ "d3mIndex": d3mIdx, "bounding_box": boxes, "confidence": score_list }) # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return # predictions for an image, create a dummy empty prediction row to add to results_df for that # missing image. if len(empty_predictions_image_names) != 0: # Create data frame of empty predictions for missing each image and concat with results. # Sort results_df. empty_predictions_df = self._fill_empty_predictions( empty_predictions_image_names, d3mIdx_image_mapping) results_df = pd.concat([results, empty_predictions_df ]).sort_values("d3mIndex") else: results_df = results # Convert to DataFrame container results_df = d3m_DataFrame(results_df) ## Assemble first output column ('d3mIndex) col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0))) col_dict["structural_type"] = type("1") col_dict["name"] = "d3mIndex" col_dict["semantic_types"] = ( "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/PrimaryKey", ) results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 0), col_dict) ## Assemble second output column ('bounding_box') col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1))) col_dict["structural_type"] = type("1") col_dict["name"] = "bounding_box" col_dict["semantic_types"] = ( "http://schema.org/Text", "https://metadata.datadrivendiscovery.org/types/PredictedTarget", "https://metadata.datadrivendiscovery.org/types/BoundingPolygon", ) results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 1), col_dict) ## Assemble third output column ('confidence') col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2))) col_dict["structural_type"] = type("1") col_dict["name"] = "confidence" col_dict["semantic_types"] = ( "http://schema.org/Integer", "https://metadata.datadrivendiscovery.org/types/Score", ) results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 2), col_dict) return CallResult(results_df)
def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: """ Produce image detection predictions. Parameters ---------- inputs : numpy ndarray of size (n_images, dimension) containing the d3m Index, image name, and bounding box for each image. Returns ------- outputs : A d3m dataframe container with the d3m index, image name, bounding boxes as a string (8 coordinate format), and confidence scores. """ iou_threshold = 0.5 # Bounding box overlap threshold for false positive or true positive score_threshold = 0.05 # The score confidence threshold to use for detections max_detections = 100 # Maxmimum number of detections to use per image # Convert training model to inference model inference_model = models.convert_model(self.training_model) # Generate image paths image_cols = inputs.metadata.get_columns_with_semantic_type( 'https://metadata.datadrivendiscovery.org/types/FileName') self.base_dir = [ inputs.metadata.query( (metadata_base.ALL_ELEMENTS, t))['location_base_uris'][0].replace('file:///', '/') for t in image_cols ] self.image_paths = np.array([[ os.path.join(self.base_dir, filename) for filename in inputs.iloc[:, col] ] for self.base_dir, col in zip(self.base_dir, image_cols)]).flatten() self.image_paths = pd.Series(self.image_paths) # Initialize output objects box_list = [] score_list = [] image_name_list = [] # Predict bounding boxes and confidence scores for each image image_list = [ x for i, x in enumerate(self.image_paths.tolist()) if self.image_paths.tolist().index(x) == i ] start_time = time.time() print('Starting testing...', file=sys.__stdout__) for i in image_list: image = read_image_bgr(i) # preprocess image for network image = preprocess_image(image) image, scale = resize_image(image) boxes, scores, labels = inference_model.predict_on_batch( tf.constant(np.expand_dims(image, axis=0), dtype=tf.float32)) # correct for image scale boxes /= scale for box, score in zip(boxes[0], scores[0]): if score < 0.5: break b = box.astype(int) box_list.append(b) score_list.append(score) image_name_list.append(i * len(b)) print( f'Testing complete. Testing took {time.time()-start_time} seconds.', file=sys.__stdout__) ## Convert predicted boxes from a list of arrays to a list of strings boxes = np.array(box_list).tolist() boxes = list( map(lambda x: [x[0], x[1], x[0], x[3], x[2], x[3], x[2], x[1]], boxes)) # Convert to 8 coordinate format for D3M boxes = list(map(lambda x: ",".join(map(str, x)), boxes)) # Create mapping between image names and D3M index input_df = pd.DataFrame({ 'd3mIndex': inputs.d3mIndex, 'image': [os.path.basename(list) for list in self.image_paths] }) d3mIdx_image_mapping = input_df.set_index('image').T.to_dict('list') # Extract values for image name keys and get missing image predictions (if they exist) image_name_list = [os.path.basename(list) for list in image_name_list] d3mIdx = [d3mIdx_image_mapping.get(key) for key in image_name_list] empty_predictions_image_names = [ k for k, v in d3mIdx_image_mapping.items() if v not in d3mIdx ] d3mIdx = [item for sublist in d3mIdx for item in sublist] # Flatten list of lists ## Assemble in a Pandas DataFrame results = pd.DataFrame({ 'd3mIndex': d3mIdx, 'bounding_box': boxes, 'confidence': score_list }) # D3M metrics evaluator needs at least one prediction per image. If RetinaNet does not return # predictions for an image, create a dummy empty prediction row to add to results_df for that # missing image. if len(empty_predictions_image_names) != 0: # Create data frame of empty predictions for missing each image and concat with results. # Sort results_df. empty_predictions_df = self._fill_empty_predictions( empty_predictions_image_names, d3mIdx_image_mapping) results_df = pd.concat([results, empty_predictions_df ]).sort_values('d3mIndex') else: results_df = results # Convert to DataFrame container results_df = d3m_DataFrame(results_df) ## Assemble first output column ('d3mIndex) col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 0))) col_dict['structural_type'] = type("1") col_dict['name'] = 'd3mIndex' col_dict['semantic_types'] = ( 'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/PrimaryKey') results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 0), col_dict) ## Assemble second output column ('bounding_box') col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 1))) col_dict['structural_type'] = type("1") col_dict['name'] = 'bounding_box' col_dict['semantic_types'] = ( 'http://schema.org/Text', 'https://metadata.datadrivendiscovery.org/types/PredictedTarget', 'https://metadata.datadrivendiscovery.org/types/BoundingPolygon') results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 1), col_dict) ## Assemble third output column ('confidence') col_dict = dict( results_df.metadata.query((metadata_base.ALL_ELEMENTS, 2))) col_dict['structural_type'] = type("1") col_dict['name'] = 'confidence' col_dict['semantic_types'] = ( 'http://schema.org/Integer', 'https://metadata.datadrivendiscovery.org/types/Score') results_df.metadata = results_df.metadata.update( (metadata_base.ALL_ELEMENTS, 2), col_dict) return CallResult(results_df)
def resize_image(self, image): """ Resize an image using image_min_side and image_max_side. """ return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side)