def main(unused_argv): del unused_argv # Load the label map. print(' - Loading the label map...') label_map_dict = {} if FLAGS.label_map_format == 'csv': with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file: reader = csv.reader(csv_file, delimiter=':') for row in reader: if len(row) != 2: raise ValueError( 'Each row of the csv label map file must be in ' '`id:name` format.') id_index = int(row[0]) name = row[1] label_map_dict[id_index] = { 'id': id_index, 'name': name, } else: raise ValueError('Unsupported label map format: {}.'.format( FLAGS.label_map_format)) with tf.Session(graph=tf.Graph()) as sess: print(' - Loading saved model...') meta_graph_def = tf.saved_model.load( sess, [tf.saved_model.tag_constants.SERVING], FLAGS.saved_model_dir) inputs = dict(meta_graph_def.signature_def['serving_default'].inputs) outputs = dict(meta_graph_def.signature_def['serving_default'].outputs) image_node = inputs['input'].name output_nodes = { 'num_detections': outputs['num_detections'].name, 'detection_boxes': outputs['detection_boxes'].name, 'detection_classes': outputs['detection_classes'].name, 'detection_attributes': outputs['detection_attributes'].name, 'detection_scores': outputs['detection_scores'].name, } if 'detection_masks' in outputs: output_nodes['detection_masks'] = outputs['detection_masks'].name if not FLAGS.use_normalized_coordinates: if 'image_info' not in outputs: raise ValueError( 'If `use_normalized_coordinates` = False, `image_info`' ' node must be included in the SavedModel.') output_nodes['image_info'] = outputs['image_info'].name image_with_detections_list = [] image_files = tf.gfile.Glob(FLAGS.image_file_pattern) for i, image_file in enumerate(image_files): print(' - processing image %d...' % i) image = Image.open(image_file) image = image.convert('RGB') # needed for images with 4 channels. width, height = image.size np_image = (np.array(image.getdata()).reshape(height, width, 3).astype(np.uint8)) np_image_input = input_utils.normalize_image_np(np_image) np_image_input = np.float32( np_image_input.reshape(1, height, width, 3)) output_results = sess.run(output_nodes, feed_dict={image_node: np_image_input}) num_detections = int(output_results['num_detections'][0]) np_boxes = output_results['detection_boxes'][0, :num_detections] if not FLAGS.use_normalized_coordinates: np_image_info = output_results['image_info'][0] np_boxes = np_boxes / np.tile(np_image_info[1:2, :], (1, 2)) ymin, xmin, ymax, xmax = np.split(np_boxes, 4, axis=-1) ymin = ymin * height ymax = ymax * height xmin = xmin * width xmax = xmax * width np_boxes = np.concatenate([ymin, xmin, ymax, xmax], axis=-1) np_scores = output_results['detection_scores'][0, :num_detections] np_classes = output_results['detection_classes'][ 0, :num_detections] np_classes = np_classes.astype(np.int32) np_attributes = output_results['detection_attributes'][ 0, :num_detections, :] np_masks = None if 'detection_masks' in output_results: np_masks = output_results['detection_masks'][ 0, :num_detections] np_masks = mask_utils.paste_instance_masks( np_masks, box_utils.yxyx_to_xywh(np_boxes), height, width) image_with_detections = ( visualization_utils.visualize_boxes_and_labels_on_image_array( np_image, np_boxes, np_classes, np_scores, label_map_dict, instance_masks=np_masks, use_normalized_coordinates=False, max_boxes_to_draw=FLAGS.max_boxes_to_draw, min_score_thresh=FLAGS.min_score_threshold)) image_with_detections_list.append(image_with_detections) print(' - Saving the outputs...') formatted_image_with_detections_list = [ Image.fromarray(image.astype(np.uint8)) for image in image_with_detections_list ] html_str = '<html>' image_strs = [] for formatted_image in formatted_image_with_detections_list: with io.BytesIO() as stream: formatted_image.save(stream, format='JPEG') data_uri = base64.b64encode(stream.getvalue()).decode('utf-8') image_strs.append( '<img src="data:image/jpeg;base64,{}", height=800>'.format( data_uri)) images_str = ' '.join(image_strs) html_str += images_str html_str += '</html>' with tf.gfile.GFile(FLAGS.output_html, 'w') as f: f.write(html_str)
def main(unused_argv): del unused_argv # Load the label map. print(' - Loading the label map...') label_map_dict = {} if FLAGS.label_map_format == 'csv': with tf.gfile.Open(FLAGS.label_map_file, 'r') as csv_file: reader = csv.reader(csv_file, delimiter=':') for row in reader: if len(row) != 2: raise ValueError( 'Each row of the csv label map file must be in ' '`id:name` format.') id_index = int(row[0]) name = row[1] label_map_dict[id_index] = { 'id': id_index, 'name': name, } else: raise ValueError('Unsupported label map format: {}.'.format( FLAGS.label_mape_format)) params = config_factory.config_generator(FLAGS.model) if FLAGS.config_file: params = params_dict.override_params_dict(params, FLAGS.config_file, is_strict=True) params = params_dict.override_params_dict(params, FLAGS.params_override, is_strict=True) params.override( { 'architecture': { 'use_bfloat16': False, # The inference runs on CPU/GPU. }, }, is_strict=True) params.validate() params.lock() model = model_factory.model_generator(params) with tf.Graph().as_default(): image_input = tf.placeholder(shape=(), dtype=tf.string) image = tf.io.decode_image(image_input, channels=3) image.set_shape([None, None, 3]) image = input_utils.normalize_image(image) image_size = [FLAGS.image_size, FLAGS.image_size] image, image_info = input_utils.resize_and_crop_image( image, image_size, image_size, aug_scale_min=1.0, aug_scale_max=1.0) image.set_shape([image_size[0], image_size[1], 3]) # batching. images = tf.reshape(image, [1, image_size[0], image_size[1], 3]) images_info = tf.expand_dims(image_info, axis=0) # model inference outputs = model.build_outputs(images, {'image_info': images_info}, mode=mode_keys.PREDICT) outputs['detection_boxes'] = ( outputs['detection_boxes'] / tf.tile(images_info[:, 2:3, :], [1, 1, 2])) predictions = outputs # Create a saver in order to load the pre-trained checkpoint. saver = tf.train.Saver() image_with_detections_list = [] with tf.Session() as sess: print(' - Loading the checkpoint...') saver.restore(sess, FLAGS.checkpoint_path) res = [] image_files = tf.gfile.Glob(FLAGS.image_file_pattern) for i, image_file in enumerate(image_files): print(' - Processing image %d...' % i) with tf.gfile.GFile(image_file, 'rb') as f: image_bytes = f.read() image = Image.open(image_file) image = image.convert( 'RGB') # needed for images with 4 channels. width, height = image.size np_image = (np.array(image.getdata()).reshape( height, width, 3).astype(np.uint8)) predictions_np = sess.run(predictions, feed_dict={image_input: image_bytes}) num_detections = int(predictions_np['num_detections'][0]) np_boxes = predictions_np['detection_boxes'][ 0, :num_detections] np_scores = predictions_np['detection_scores'][ 0, :num_detections] np_classes = predictions_np['detection_classes'][ 0, :num_detections] np_classes = np_classes.astype(np.int32) np_attributes = predictions_np['detection_attributes'][ 0, :num_detections, :] np_masks = None if 'detection_masks' in predictions_np: instance_masks = predictions_np['detection_masks'][ 0, :num_detections] np_masks = mask_utils.paste_instance_masks( instance_masks, box_utils.yxyx_to_xywh(np_boxes), height, width) encoded_masks = [ mask_api.encode(np.asfortranarray(np_mask)) for np_mask in list(np_masks) ] res.append({ 'image_file': image_file, 'boxes': np_boxes, 'classes': np_classes, 'scores': np_scores, 'attributes': np_attributes, 'masks': encoded_masks, }) image_with_detections = ( visualization_utils. visualize_boxes_and_labels_on_image_array( np_image, np_boxes, np_classes, np_scores, label_map_dict, instance_masks=np_masks, use_normalized_coordinates=False, max_boxes_to_draw=FLAGS.max_boxes_to_draw, min_score_thresh=FLAGS.min_score_threshold)) image_with_detections_list.append(image_with_detections) print(' - Saving the outputs...') formatted_image_with_detections_list = [ Image.fromarray(image.astype(np.uint8)) for image in image_with_detections_list ] html_str = '<html>' image_strs = [] for formatted_image in formatted_image_with_detections_list: with io.BytesIO() as stream: formatted_image.save(stream, format='JPEG') data_uri = base64.b64encode(stream.getvalue()).decode('utf-8') image_strs.append( '<img src="data:image/jpeg;base64,{}", height=800>'.format( data_uri)) images_str = ' '.join(image_strs) html_str += images_str html_str += '</html>' with tf.gfile.GFile(FLAGS.output_html, 'w') as f: f.write(html_str) np.save(FLAGS.output_file, res)
def saveOutputs(sess, predictions, image_input, image_file): print(' - Loading the label map...') label_map_dict = {} if 'csv' == 'csv': with tf.gfile.Open('dataset/fashionpedia_label_map.csv', 'r') as csv_file: reader = csv.reader(csv_file, delimiter=':') for row in reader: if len(row) != 2: raise ValueError( 'Each row of the csv label map file must be in ' '`id:name` format.') id_index = int(row[0]) name = row[1] label_map_dict[id_index] = { 'id': id_index, 'name': name, } print(' - Processing image ...') with tf.gfile.GFile(image_file, 'rb') as f: image_bytes = f.read() image = Image.open(image_file) image = image.convert('RGB') # needed for images with 4 channels. width, height = image.size np_image = (np.array(image.getdata()).reshape(height, width, 3).astype(np.uint8)) predictions_np = sess.run(predictions, feed_dict={image_input: image_bytes}) num_detections = int(predictions_np['num_detections'][0]) np_boxes = predictions_np['detection_boxes'][0, :num_detections] np_scores = predictions_np['detection_scores'][0, :num_detections] np_classes = predictions_np['detection_classes'][0, :num_detections] np_classes = np_classes.astype(np.int32) np_attributes = predictions_np['detection_attributes'][ 0, :num_detections, :] np_masks = None if 'detection_masks' in predictions_np: instance_masks = predictions_np['detection_masks'][0, :num_detections] np_masks = mask_utils.paste_instance_masks( instance_masks, box_utils.yxyx_to_xywh(np_boxes), height, width) encoded_masks = [ mask_api.encode(np.asfortranarray(np_mask)) for np_mask in list(np_masks) ] res = [] res.append({ 'image_file': image_file, 'boxes': np_boxes, 'classes': np_classes, 'scores': np_scores, 'attributes': np_attributes }) #'masks': encoded_masks, print("Output generated") image_with_detections_list = [] image_with_detections = ( visualization_utils.visualize_boxes_and_labels_on_image_array( np_image, np_boxes, np_classes, np_scores, label_map_dict, instance_masks=np_masks, use_normalized_coordinates=False, max_boxes_to_draw=20, min_score_thresh=0.05)) image_with_detections_list.append(image_with_detections) print(' - Saving the outputs...') formatted_image_with_detections_list = [ Image.fromarray(image.astype(np.uint8)) for image in image_with_detections_list ] html_str = '<html>' image_strs = [] for formatted_image in formatted_image_with_detections_list: with io.BytesIO() as stream: formatted_image.save(stream, format='JPEG') data_uri = base64.b64encode(stream.getvalue()).decode('utf-8') image_strs.append( '<img src="data:image/jpeg;base64,{}", height=800>'.format( data_uri)) images_str = ' '.join(image_strs) html_str += images_str html_str += '</html>' with tf.gfile.GFile("output.html", 'w') as f: f.write(html_str) np.save("output.npy", res) return {"v": "output.html", "d": "output.npy"}
def convert_predictions_to_coco_annotations(predictions): """Converts a batch of predictions to annotations in COCO format. Args: predictions: a dictionary of lists of numpy arrays including the following fields. K below denotes the maximum number of instances per image. Required fields: - source_id: a list of numpy arrays of int or string of shape [batch_size]. - num_detections: a list of numpy arrays of int of shape [batch_size]. - detection_boxes: a list of numpy arrays of float of shape [batch_size, K, 4], where coordinates are in the original image space (not the scaled image space). - detection_classes: a list of numpy arrays of int of shape [batch_size, K]. - detection_scores: a list of numpy arrays of float of shape [batch_size, K]. Optional fields: - detection_masks: a list of numpy arrays of float of shape [batch_size, K, mask_height, mask_width]. Returns: coco_predictions: prediction in COCO annotation format. """ coco_predictions = [] num_batches = len(predictions['source_id']) batch_size = predictions['source_id'][0].shape[0] max_num_detections = predictions['detection_classes'][0].shape[1] use_outer_box = 'detection_outer_boxes' in predictions for i in range(num_batches): predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh( predictions['detection_boxes'][i]) if use_outer_box: predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh( predictions['detection_outer_boxes'][i]) mask_boxes = predictions['detection_outer_boxes'] else: mask_boxes = predictions['detection_boxes'] for j in range(batch_size): if 'detection_masks' in predictions: image_masks = generate_segmentation_from_masks( predictions['detection_masks'][i][j], mask_boxes[i][j], int(predictions['image_info'][i][j, 0, 0]), int(predictions['image_info'][i][j, 0, 1]), is_image_mask=False) binary_masks = (image_masks > 0.0).astype(np.uint8) encoded_masks = [ mask_utils.encode(np.asfortranarray(binary_mask)) for binary_mask in list(binary_masks) ] for k in range(max_num_detections): ann = {} ann['image_id'] = predictions['source_id'][i][j] ann['category_id'] = predictions['detection_classes'][i][j, k] ann['bbox'] = predictions['detection_boxes'][i][j, k] ann['score'] = predictions['detection_scores'][i][j, k] if 'detection_masks' in predictions: ann['segmentation'] = encoded_masks[k] coco_predictions.append(ann) for i, ann in enumerate(coco_predictions): ann['id'] = i + 1 return coco_predictions
def convert_predictions_to_coco_annotations(predictions, eval_image_sizes: dict = None, output_image_size: int = None, encode_mask_fn=None, score_threshold=0.05): """Converts a batch of predictions to annotations in COCO format. Args: predictions: a dictionary of lists of numpy arrays including the following fields. K below denotes the maximum number of instances per image. Required fields: - source_id: a list of numpy arrays of int or string of shape [batch_size]. - num_detections: a list of numpy arrays of int of shape [batch_size]. - detection_boxes: a list of numpy arrays of float of shape [batch_size, K, 4], where coordinates are in the original image space (not the scaled image space). - detection_classes: a list of numpy arrays of int of shape [batch_size, K]. - detection_scores: a list of numpy arrays of float of shape [batch_size, K]. Optional fields: - detection_masks: a list of numpy arrays of float of shape [batch_size, K, mask_height, mask_width]. Returns: coco_predictions: prediction in COCO annotation format. """ coco_predictions = [] num_batches = len(predictions['source_id']) use_outer_box = 'detection_outer_boxes' in predictions encode_mask_fn = (lambda x: mask_api.encode(np.asfortranarray(x))) if encode_mask_fn is None else encode_mask_fn for i in tqdm(range(num_batches), total=num_batches): predictions['detection_boxes'][i] = box_utils.yxyx_to_xywh( predictions['detection_boxes'][i]) if use_outer_box: predictions['detection_outer_boxes'][i] = box_utils.yxyx_to_xywh( predictions['detection_outer_boxes'][i]) mask_boxes = predictions['detection_outer_boxes'] else: mask_boxes = predictions['detection_boxes'] batch_size = predictions['source_id'][i].shape[0] for j in range(batch_size): image_id = predictions['source_id'][i][j] orig_image_size = predictions['image_info'][i][j, 0] if eval_image_sizes: eval_image_size = eval_image_sizes[image_id] if eval_image_sizes else orig_image_size elif output_image_size: eval_image_size = get_new_image_size(orig_image_size, output_image_size) else: eval_image_size = orig_image_size eval_scale = orig_image_size[0] / eval_image_size[0] bbox_indices = np.argwhere(predictions['detection_scores'][i][j] >= score_threshold).flatten() if 'detection_masks' in predictions: predicted_masks = predictions['detection_masks'][i][j, bbox_indices] image_masks = mask_utils.paste_instance_masks( predicted_masks, mask_boxes[i][j, bbox_indices].astype(np.float32) / eval_scale, int(eval_image_size[0]), int(eval_image_size[1])) binary_masks = (image_masks > 0.0).astype(np.uint8) encoded_masks = [encode_mask_fn(binary_mask) for binary_mask in list(binary_masks)] mask_masks = (predicted_masks > 0.5).astype(np.float32) mask_areas = mask_masks.sum(axis=-1).sum(axis=-1) mask_area_fractions = (mask_areas / np.prod(predicted_masks.shape[1:])).tolist() mask_mean_scores = ((predicted_masks * mask_masks).sum(axis=-1).sum(axis=-1) / mask_areas).tolist() for m, k in enumerate(bbox_indices): ann = { 'image_id': int(image_id), 'category_id': int(predictions['detection_classes'][i][j, k]), 'bbox': (predictions['detection_boxes'][i][j, k].astype(np.float32) / eval_scale).tolist(), 'score': float(predictions['detection_scores'][i][j, k]), } if 'detection_masks' in predictions: ann['segmentation'] = encoded_masks[m] ann['mask_mean_score'] = mask_mean_scores[m] ann['mask_area_fraction'] = mask_area_fractions[m] if 'detection_attributes' in predictions: ann['attribute_probabilities'] = predictions['detection_attributes'][i][j, k].tolist() coco_predictions.append(ann) for i, ann in enumerate(coco_predictions): ann['id'] = i + 1 return coco_predictions
def convert_predictions_to_coco_annotations( prediction: Prediction, image_id: int, filename: str, score_threshold=0.05, ) -> list[COCOAnnotation]: """This is made, modifying a function of the same name in /tf_tpu_models/official/detection/evaluation/coco_utils.py Parameters ---------- prediction : Prediction [description] image_id: int filename: str score_threshold : float, optional [description], by default 0.05 Returns ------- list[COCOAnnotation] [description] """ prediction["pred_detection_boxes"] = box_utils.yxyx_to_xywh( prediction["pred_detection_boxes"] ) mask_boxes = prediction["pred_detection_boxes"] orig_shape = prediction["pred_image_info"][0] resize_shape = prediction["pred_image_info"][1] if orig_shape[0] > orig_shape[1]: o2r = orig_shape[0] / resize_shape[0] else: o2r = orig_shape[1] / resize_shape[1] bbox_indices = np.argwhere( prediction["pred_detection_scores"] >= score_threshold ).flatten() predicted_masks = prediction["pred_detection_masks"][bbox_indices] image_masks = mask_utils.paste_instance_masks( predicted_masks, mask_boxes[bbox_indices].astype(np.float32) * o2r, int(orig_shape[0]), int(orig_shape[1]), ) binary_masks = (image_masks > 0.0).astype(np.uint8) encoded_masks = [_encode_mask_fn(binary_mask) for binary_mask in list(binary_masks)] mask_masks = (predicted_masks > 0.5).astype(np.float32) mask_areas = mask_masks.sum(axis=-1).sum(axis=-1) mask_area_fractions = (mask_areas / np.prod(predicted_masks.shape[1:])).tolist() mask_mean_scores = ( (predicted_masks * mask_masks).sum(axis=-1).sum(axis=-1) / mask_areas ).tolist() anns: list[COCOAnnotation] = [] for m, k in enumerate(bbox_indices): mask_mean_score = mask_mean_scores[m] # mask_mean_score is float("nan") when mask_area is 0. if not math.isnan(mask_mean_score): ann = COCOAnnotation( image_id=image_id, filename=filename, category_id=int(prediction["pred_detection_classes"][k]), # Avoid `astype(np.float32)` because # it can't be serialized as JSON. bbox=tuple( float(x) for x in prediction["pred_detection_boxes"][k] * o2r ), mask_area_fraction=float(mask_area_fractions[m]), score=float(prediction["pred_detection_scores"][k]), segmentation=encoded_masks[m], mask_mean_score=mask_mean_score, ) anns.append(ann) return anns