def __init__(self): logger.info('Loading Tensorflow Detection API') weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL, cache_dir=os.path.abspath(config.WEIGHT_PATH), cache_subdir='models') extract_path = weights_path.replace('.tar.gz', '') if not os.path.exists(extract_path): tar = tarfile.open(weights_path, "r:gz") tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models')) tar.close() pb_path = os.path.join(extract_path, self.PB_NAME) self.graph = tf.Graph() with self.graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(pb_path, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) self.categories = label_map_util.convert_label_map_to_categories(self.label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(self.categories)
def _build_metric_names(self): """Builds a list with metric names.""" self._metric_names = [ self._metric_prefix + 'Precision/mAP@{}IOU'.format( self._matching_iou_threshold) ] if self._evaluate_corlocs: self._metric_names.append( self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold)) category_index = label_map_util.create_category_index(self._categories) for idx in range(self._num_classes): if idx + self._label_id_offset in category_index: category_name = category_index[idx + self._label_id_offset]['name'] try: category_name = unicode(category_name, 'utf-8') except TypeError: pass category_name = unicodedata.normalize('NFKD', category_name).encode( 'ascii', 'ignore') self._metric_names.append( self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( self._matching_iou_threshold, category_name)) if self._evaluate_corlocs: self._metric_names.append( self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}' .format(self._matching_iou_threshold, category_name))
def evaluate(self): """Compute evaluation result. Returns: A dictionary of metrics with the following fields - 1. summary_metrics: '<prefix if not empty>_Precision/mAP@<matching_iou_threshold>IOU': mean average precision at the specified IOU threshold. 2. per_category_ap: category specific results with keys of the form '<prefix if not empty>_PerformanceByCategory/ mAP@<matching_iou_threshold>IOU/category'. """ (per_class_ap, mean_ap, per_class_precision, per_class_recall, per_class_corloc, mean_corloc) = ( self._evaluation.evaluate()) pascal_metrics = {self._metric_names[0]: mean_ap} if self._evaluate_corlocs: pascal_metrics[self._metric_names[1]] = mean_corloc category_index = label_map_util.create_category_index(self._categories) for idx in range(per_class_ap.size): if idx + self._label_id_offset in category_index: category_name = category_index[idx + self._label_id_offset]['name'] try: category_name = unicode(category_name, 'utf-8') except TypeError: pass category_name = unicodedata.normalize( 'NFKD', category_name).encode('ascii', 'ignore') display_name = ( self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( self._matching_iou_threshold, category_name)) pascal_metrics[display_name] = per_class_ap[idx] # Optionally add precision and recall values if self._evaluate_precision_recall: display_name = ( self._metric_prefix + 'PerformanceByCategory/Precision@{}IOU/{}'.format( self._matching_iou_threshold, category_name)) pascal_metrics[display_name] = per_class_precision[idx] display_name = ( self._metric_prefix + 'PerformanceByCategory/Recall@{}IOU/{}'.format( self._matching_iou_threshold, category_name)) pascal_metrics[display_name] = per_class_recall[idx] # Optionally add CorLoc metrics.classes if self._evaluate_corlocs: display_name = ( self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}' .format(self._matching_iou_threshold, category_name)) pascal_metrics[display_name] = per_class_corloc[idx] return pascal_metrics
def test_create_category_index(self): categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}] category_index = label_map_util.create_category_index(categories) self.assertDictEqual({ 1: { 'name': u'1', 'id': 1 }, 2: { 'name': u'2', 'id': 2 } }, category_index)
def _create_tf_record_from_coco_annotations( annotations_file, image_dir, output_path, include_masks, num_shards): """Loads COCO annotation json files and converts to tf.Record format. Args: annotations_file: JSON file containing bounding box annotations. image_dir: Directory containing the image files. output_path: Path to output tf.Record file. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. num_shards: number of output file shards. """ with contextlib2.ExitStack() as tf_record_close_stack, \ tf.gfile.GFile(annotations_file, 'r') as fid: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_path, num_shards) groundtruth_data = json.load(fid) images = groundtruth_data['images'] category_index = label_map_util.create_category_index( groundtruth_data['categories']) annotations_index = {} if 'annotations' in groundtruth_data: tf.logging.info( 'Found groundtruth annotations. Building annotations index.') for annotation in groundtruth_data['annotations']: image_id = annotation['image_id'] if image_id not in annotations_index: annotations_index[image_id] = [] annotations_index[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image['id'] if image_id not in annotations_index: missing_annotation_count += 1 annotations_index[image_id] = [] tf.logging.info('%d images are missing annotations.', missing_annotation_count) total_num_annotations_skipped = 0 for idx, image in enumerate(images): if idx % 100 == 0: tf.logging.info('On image %d of %d', idx, len(images)) annotations_list = annotations_index[image['id']] _, tf_example, num_annotations_skipped = create_tf_example( image, annotations_list, image_dir, category_index, include_masks) total_num_annotations_skipped += num_annotations_skipped shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) tf.logging.info('Finished writing, skipped %d annotations.', total_num_annotations_skipped)
def __init__(self): self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with self.detection_graph.as_default(): # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') self.tensor_dict = tensor_dict self.image_tensor = image_tensor self.label_map = label_map self.category_index = category_index self.session = tf.Session(graph=self.detection_graph)
def evaluate(self): """Compute evaluation result. Returns: A dictionary of metrics with the following fields - 1. summary_metrics: 'Precision/mAP@<matching_iou_threshold>IOU': mean average precision at the specified IOU threshold. 2. per_category_ap: category specific results with keys of the form 'PerformanceByCategory/mAP@<matching_iou_threshold>IOU/category'. """ (per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = ( self._evaluation.evaluate()) pascal_metrics = { self._metric_prefix + 'Precision/mAP@{}IOU'.format(self._matching_iou_threshold): mean_ap } if self._evaluate_corlocs: pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format( self._matching_iou_threshold)] = mean_corloc category_index = label_map_util.create_category_index(self._categories) for idx in range(per_class_ap.size): if idx + self._label_id_offset in category_index: display_name = ( self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format( self._matching_iou_threshold, category_index[idx + self._label_id_offset]['name'])) pascal_metrics[display_name] = per_class_ap[idx] # Optionally add CorLoc metrics.classes if self._evaluate_corlocs: display_name = ( self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}' .format(self._matching_iou_threshold, category_index[idx + self._label_id_offset]['name'])) pascal_metrics[display_name] = per_class_corloc[idx] return pascal_metrics
CWD_PATH = os.getcwd() # Path to frozen detection graph. This is the actual model that is used for the object detection. MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb') # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def detect_objects(image_np, sess, detection_graph): # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name('detection_scores:0') classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0')
def evaluate_detection_results_pascal_voc(result_lists, categories, label_id_offset=0, iou_thres=0.5, corloc_summary=False): """Computes Pascal VOC detection metrics given groundtruth and detections. This function computes Pascal VOC metrics. This function by default takes detections and groundtruth boxes encoded in result_lists and writes evaluation results to tf summaries which can be viewed on tensorboard. Args: result_lists: a dictionary holding lists of groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'image_id': a list of string ids 'detection_boxes': a list of float32 numpy arrays of shape [N, 4] 'detection_scores': a list of float32 numpy arrays of shape [N] 'detection_classes': a list of int32 numpy arrays of shape [N] 'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4] 'groundtruth_classes': a list of int32 numpy arrays of shape [M] and the remaining fields below are optional: 'difficult': a list of boolean arrays of shape [M] indicating the difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide this information and it is used to remove difficult examples from eval in order to not penalize the models on them. Note that it is okay to have additional fields in result_lists --- they are simply ignored. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' label_id_offset: an integer offset for the label space. iou_thres: float determining the IoU threshold at which a box is considered correct. Defaults to the standard 0.5. corloc_summary: boolean. If True, also outputs CorLoc metrics. Returns: A dictionary of metric names to scalar values. Raises: ValueError: if the set of keys in result_lists is not a superset of the expected list of keys. Unexpected keys are ignored. ValueError: if the lists in result_lists have inconsistent sizes. """ # check for expected keys in result_lists expected_keys = [ 'detection_boxes', 'detection_scores', 'detection_classes', 'image_id' ] expected_keys += ['groundtruth_boxes', 'groundtruth_classes'] if not set(expected_keys).issubset(set(result_lists.keys())): raise ValueError('result_lists does not have expected key set.') num_results = len(result_lists[expected_keys[0]]) for key in expected_keys: if len(result_lists[key]) != num_results: raise ValueError('Inconsistent list sizes in result_lists') # Pascal VOC evaluator assumes foreground index starts from zero. categories = copy.deepcopy(categories) for idx in range(len(categories)): categories[idx]['id'] -= label_id_offset # num_classes (maybe encoded as categories) num_classes = max([cat['id'] for cat in categories]) + 1 logging.info('Computing Pascal VOC metrics on results.') if all(image_id.isdigit() for image_id in result_lists['image_id']): image_ids = [int(image_id) for image_id in result_lists['image_id']] else: image_ids = range(num_results) evaluator = object_detection_evaluation.ObjectDetectionEvaluation( num_classes, matching_iou_threshold=iou_thres) difficult_lists = None if 'difficult' in result_lists and result_lists['difficult']: difficult_lists = result_lists['difficult'] for idx, image_id in enumerate(image_ids): difficult = None if difficult_lists is not None and difficult_lists[idx].size: difficult = difficult_lists[idx].astype(np.bool) evaluator.add_single_ground_truth_image_info( image_id, result_lists['groundtruth_boxes'][idx], result_lists['groundtruth_classes'][idx] - label_id_offset, difficult) evaluator.add_single_detected_image_info( image_id, result_lists['detection_boxes'][idx], result_lists['detection_scores'][idx], result_lists['detection_classes'][idx] - label_id_offset) per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = ( evaluator.evaluate()) metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap} category_index = label_map_util.create_category_index(categories) for idx in range(per_class_ap.size): if idx in category_index: display_name = ('PerformanceByCategory/mAP@{}IOU/{}'.format( iou_thres, category_index[idx]['name'])) metrics[display_name] = per_class_ap[idx] if corloc_summary: metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc for idx in range(per_class_corloc.size): if idx in category_index: display_name = ('PerformanceByCategory/CorLoc@{}IOU/{}'.format( iou_thres, category_index[idx]['name'])) metrics[display_name] = per_class_corloc[idx] return metrics
def visualize_detection_results(result_dict, tag, global_step, categories, summary_dir='', export_dir='', agnostic_mode=False, show_groundtruth=False, groundtruth_box_visualization_color='black', min_score_thresh=.5, max_num_predictions=20, skip_scores=False, skip_labels=False, keep_image_id_for_visualization_export=False): """Visualizes detection results and writes visualizations to image summaries. This function visualizes an image with its detected bounding boxes and writes to image summaries which can be viewed on tensorboard. It optionally also writes images to a directory. In the case of missing entry in the label map, unknown class name in the visualization is shown as "N/A". Args: result_dict: a dictionary holding groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'original_image': a numpy array representing the image with shape [1, height, width, 3] or [1, height, width, 1] 'detection_boxes': a numpy array of shape [N, 4] 'detection_scores': a numpy array of shape [N] 'detection_classes': a numpy array of shape [N] The following keys are optional: 'groundtruth_boxes': a numpy array of shape [N, 4] 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] Detections are assumed to be provided in decreasing order of score and for display, and we assume that scores are probabilities between 0 and 1. tag: tensorboard tag (string) to associate with image. global_step: global step at which the visualization are generated. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' 'supercategory': (optional) string representing the supercategory e.g., 'animal', 'vehicle', 'food', etc summary_dir: the output directory to which the image summaries are written. export_dir: the output directory to which images are written. If this is empty (default), then images are not exported. agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. show_groundtruth: boolean (default: False) controlling whether to show groundtruth boxes in addition to detected boxes groundtruth_box_visualization_color: box color for visualizing groundtruth boxes min_score_thresh: minimum score threshold for a box to be visualized max_num_predictions: maximum number of detections to visualize skip_scores: whether to skip score when drawing a single detection skip_labels: whether to skip label when drawing a single detection keep_image_id_for_visualization_export: whether to keep image identifier in filename when exported to export_dir Raises: ValueError: if result_dict does not contain the expected keys (i.e., 'original_image', 'detection_boxes', 'detection_scores', 'detection_classes') """ detection_fields = fields.DetectionResultFields input_fields = fields.InputDataFields if not set([ input_fields.original_image, detection_fields.detection_boxes, detection_fields.detection_scores, detection_fields.detection_classes, ]).issubset(set(result_dict.keys())): raise ValueError('result_dict does not contain all expected keys.') if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: raise ValueError('If show_groundtruth is enabled, result_dict must contain ' 'groundtruth_boxes.') logging.info('Creating detection visualizations.') category_index = label_map_util.create_category_index(categories) image = np.squeeze(result_dict[input_fields.original_image], axis=0) if image.shape[2] == 1: # If one channel image, repeat in RGB. image = np.tile(image, [1, 1, 3]) detection_boxes = result_dict[detection_fields.detection_boxes] detection_scores = result_dict[detection_fields.detection_scores] detection_classes = np.int32((result_dict[ detection_fields.detection_classes])) detection_keypoints = result_dict.get(detection_fields.detection_keypoints) detection_masks = result_dict.get(detection_fields.detection_masks) detection_boundaries = result_dict.get(detection_fields.detection_boundaries) # Plot groundtruth underneath detections if show_groundtruth: groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints) vis_utils.visualize_boxes_and_labels_on_image_array( image=image, boxes=groundtruth_boxes, classes=None, scores=None, category_index=category_index, keypoints=groundtruth_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=None, groundtruth_box_visualization_color=groundtruth_box_visualization_color) vis_utils.visualize_boxes_and_labels_on_image_array( image, detection_boxes, detection_classes, detection_scores, category_index, instance_masks=detection_masks, instance_boundaries=detection_boundaries, keypoints=detection_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=max_num_predictions, min_score_thresh=min_score_thresh, agnostic_mode=agnostic_mode, skip_scores=skip_scores, skip_labels=skip_labels) if export_dir: if keep_image_id_for_visualization_export and result_dict[fields. InputDataFields() .key]: export_path = os.path.join(export_dir, 'export-{}-{}.png'.format( tag, result_dict[fields.InputDataFields().key])) else: export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) vis_utils.save_image_array_as_png(image, export_path) summary = tf.Summary(value=[ tf.Summary.Value( tag=tag, image=tf.Summary.Image( encoded_image_string=vis_utils.encode_image_array_as_png_str( image))) ]) summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer.add_summary(summary, global_step) logging.info('Detection visualizations written to summary with tag %s.', tag)
reference_pts = np.float32([ list(reference_tags[0]['center']), list(reference_tags[1]['center']), list(reference_tags[2]['center']), list(reference_tags[3]['center']) ]) # cv2.namedWindow("Reference", cv2.WINDOW_KEEPRATIO) cv2.namedWindow("Distorced", cv2.WINDOW_KEEPRATIO) cv2.namedWindow("Reverse Warped", cv2.WINDOW_KEEPRATIO) label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
def get_num_classes(pbtxt_fname): label_map = label_map_util.load_labelmap(pbtxt_fname) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) return len(category_index.keys())
def visualize_detection_results(result_dict, tag, global_step, categories, summary_dir='', export_dir='', agnostic_mode=False, show_groundtruth=False, groundtruth_box_visualization_color='black', min_score_thresh=.5, max_num_predictions=20, skip_scores=False, skip_labels=False, keep_image_id_for_visualization_export=False): """Visualizes detection results and writes visualizations to image summaries. This function visualizes an image with its detected bounding boxes and writes to image summaries which can be viewed on tensorboard. It optionally also writes images to a directory. In the case of missing entry in the label map, unknown class name in the visualization is shown as "N/A". Args: result_dict: a dictionary holding groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'original_image': a numpy array representing the image with shape [1, height, width, 3] or [1, height, width, 1] 'detection_boxes': a numpy array of shape [N, 4] 'detection_scores': a numpy array of shape [N] 'detection_classes': a numpy array of shape [N] The following keys are optional: 'groundtruth_boxes': a numpy array of shape [N, 4] 'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2] Detections are assumed to be provided in decreasing order of score and for display, and we assume that scores are probabilities between 0 and 1. tag: tensorboard tag (string) to associate with image. global_step: global step at which the visualization are generated. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' 'supercategory': (optional) string representing the supercategory e.g., 'animal', 'vehicle', 'food', etc summary_dir: the output directory to which the image summaries are written. export_dir: the output directory to which images are written. If this is empty (default), then images are not exported. agnostic_mode: boolean (default: False) controlling whether to evaluate in class-agnostic mode or not. show_groundtruth: boolean (default: False) controlling whether to show groundtruth boxes in addition to detected boxes groundtruth_box_visualization_color: box color for visualizing groundtruth boxes min_score_thresh: minimum score threshold for a box to be visualized max_num_predictions: maximum number of detections to visualize skip_scores: whether to skip score when drawing a single detection skip_labels: whether to skip label when drawing a single detection keep_image_id_for_visualization_export: whether to keep image identifier in filename when exported to export_dir Raises: ValueError: if result_dict does not contain the expected keys (i.e., 'original_image', 'detection_boxes', 'detection_scores', 'detection_classes') """ detection_fields = fields.DetectionResultFields input_fields = fields.InputDataFields if not set([ input_fields.original_image, detection_fields.detection_boxes, detection_fields.detection_scores, detection_fields.detection_classes, ]).issubset(set(result_dict.keys())): raise ValueError('result_dict does not contain all expected keys.') if show_groundtruth and input_fields.groundtruth_boxes not in result_dict: raise ValueError( 'If show_groundtruth is enabled, result_dict must contain ' 'groundtruth_boxes.') logging.info('Creating detection visualizations.') category_index = label_map_util.create_category_index(categories) image = np.squeeze(result_dict[input_fields.original_image], axis=0) if image.shape[2] == 1: # If one channel image, repeat in RGB. image = np.tile(image, [1, 1, 3]) detection_boxes = result_dict[detection_fields.detection_boxes] detection_scores = result_dict[detection_fields.detection_scores] detection_classes = np.int32( (result_dict[detection_fields.detection_classes])) detection_keypoints = result_dict.get(detection_fields.detection_keypoints) detection_masks = result_dict.get(detection_fields.detection_masks) detection_boundaries = result_dict.get( detection_fields.detection_boundaries) # Plot groundtruth underneath detections if show_groundtruth: groundtruth_boxes = result_dict[input_fields.groundtruth_boxes] groundtruth_keypoints = result_dict.get( input_fields.groundtruth_keypoints) vis_utils.visualize_boxes_and_labels_on_image_array( image=image, boxes=groundtruth_boxes, classes=None, scores=None, category_index=category_index, keypoints=groundtruth_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=None, groundtruth_box_visualization_color= groundtruth_box_visualization_color) vis_utils.visualize_boxes_and_labels_on_image_array( image, detection_boxes, detection_classes, detection_scores, category_index, instance_masks=detection_masks, instance_boundaries=detection_boundaries, keypoints=detection_keypoints, use_normalized_coordinates=False, max_boxes_to_draw=max_num_predictions, min_score_thresh=min_score_thresh, agnostic_mode=agnostic_mode, skip_scores=skip_scores, skip_labels=skip_labels) if export_dir: if keep_image_id_for_visualization_export and result_dict[ fields.InputDataFields().key]: export_path = os.path.join( export_dir, 'export-{}-{}.png'.format( tag, result_dict[fields.InputDataFields().key])) else: export_path = os.path.join(export_dir, 'export-{}.png'.format(tag)) vis_utils.save_image_array_as_png(image, export_path) summary = tf.Summary(value=[ tf.Summary.Value(tag=tag, image=tf.Summary.Image( encoded_image_string=vis_utils. encode_image_array_as_png_str(image))) ]) summary_writer = tf.summary.FileWriterCache.get(summary_dir) summary_writer.add_summary(summary, global_step) logging.info('Detection visualizations written to summary with tag %s.', tag)
def run(self): time1 = time.time() MIN_ratio = 0.8 #MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28' GRAPH_FILE_NAME = 'frozen_inference_graph.pb' LABEL_FILE = 'data/mscoco_label_map.pbtxt' NUM_CLASSES = 90 #end define label_map = lmu.load_labelmap(LABEL_FILE) categories = lmu.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) categories_index = lmu.create_category_index(categories) print("call label_map & categories : %0.5f" % (time.time() - time1)) graph_file = MODEL_NAME + '/' + GRAPH_FILE_NAME #thread function def find_detection_target(categories_index, classes, scores): time1_1 = time.time() #스레드함수 시작시간 print("스레드 시작") objects = [] #리스트 생성 for index, value in enumerate(classes[0]): object_dict = {} #딕셔너리 if scores[0][index] > MIN_ratio: object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \ scores[0][index] objects.append(object_dict) #리스트 추가 print(objects) print("스레드 함수 처리시간 %0.5f" & (time.time() - time1_1)) #end thread function detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(graph_file, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sses = tf.Session(graph=detection_graph) print("store in memoey time : %0.5f" % (time.time() - time1)) image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') print("make tensor time : %0.5f" % (time.time() - time1)) #capture = cv2.VideoCapture(0) capture = cv2.VideoCapture("20190916_162900.mp4") prevtime = 0 #thread_1 = Process(target = find_detection_target, args = (categories_index, classes, scores))#쓰레드 생성 print("road Video time : %0.5f" % (time.time() - time1)) while True: ret, frame = capture.read() frame_expanded = np.expand_dims(frame, axis=0) height, width, channel = frame.shape #프레임 표시 curtime = time.time() sec = curtime - prevtime prevtime = curtime fps = 1 / sec str = "FPS : %0.1f" % fps cv2.putText(frame, str, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0)) #end 프레임 (boxes, scores, classes, nums) = sses.run( #np.ndarray [ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) #end sses.run() vis_util.visualize_boxes_and_labels_on_image_array( frame, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), categories_index, use_normalized_coordinates=True, min_score_thresh=MIN_ratio, #최소 인식률 line_thickness=2) #선두께 if ret: # https://stackoverflow.com/a/55468544/6622587 rgbImage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) h, w, ch = rgbImage.shape bytesPerLine = ch * w convertToQtFormat = QtGui.QImage(rgbImage.data, w, h, bytesPerLine, QtGui.QImage.Format_RGB888) p = convertToQtFormat.scaled(640, 480, Qt.KeepAspectRatio) self.changePixmap.emit(p) # objects = [] #리스트 생성 for index, value in enumerate(classes[0]): object_dict = {} # 딕셔너리 if scores[0][index] > MIN_ratio: object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \ scores[0][index] # objects.append(object_dict) #리스트 추가 #visualize_boxes_and_labels_on_image_array box_size_info 이미지 정 #for box, color in box_to_color_map.items(): # ymin, xmin, ymax, xmax = box #[index][0] [1] [2] [3] ymin = int((boxes[0][index][0] * height)) xmin = int((boxes[0][index][1] * width)) ymax = int((boxes[0][index][2] * height)) xmax = int((boxes[0][index][3] * width)) Result = frame[ymin:ymax, xmin:xmax] cv2.imwrite('car.jpg', Result) try: result_chars = NP.number_recognition('car.jpg') ui.label_6.setText(result_chars) # print(NP.check()) except: print("응안돼") #cv2.imshow('re', Result) # print(objects) key = cv2.waitKey(1) & 0xFF if key == ord("q"): break
def main(_): tf.logging.set_verbosity(tf.logging.INFO) required_flags = [ 'input_tfrecord_paths', 'output_tfrecord_path', 'inference_graph', 'meta', 'label_map' ] for flag_name in required_flags: if not getattr(FLAGS, flag_name): raise ValueError('Flag --{} is required'.format(flag_name)) ## Load meta data for yolo meta = detection_inference.build_meta(FLAGS.meta) ## Load category_index of coco data from google detection NUM_CLASSES = 90 label_map = label_map_util.load_labelmap(FLAGS.label_map) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) #pdb.set_trace() with tf.Session() as sess: input_tfrecord_paths = [ v for v in FLAGS.input_tfrecord_paths.split(',') if v ] tf.logging.info('Reading input from %d files', len(input_tfrecord_paths)) serialized_example_tensor, image_tensor = detection_inference.build_input( meta, input_tfrecord_paths) tf.logging.info('Reading graph and building model...') detected_boxes_tensor = detection_inference.build_inference_graph( image_tensor, FLAGS.inference_graph) tf.logging.info('Running inference and writing output to {}'.format( FLAGS.output_tfrecord_path)) sess.run(tf.local_variables_initializer()) tf.train.start_queue_runners() print("entering into loop ") starttime = datetime.datetime.now() with tf.python_io.TFRecordWriter( FLAGS.output_tfrecord_path) as tf_record_writer: try: for counter in itertools.count(): tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 10, counter) t0 = datetime.datetime.now() tf_example = detection_inference.infer_detections_and_add_to_example( meta, category_index, serialized_example_tensor, detected_boxes_tensor, FLAGS.discard_image_pixels) t1 = datetime.datetime.now() tf.logging.info('processed an image in %d ms', (t1 - t0).microseconds / 1000) tf_record_writer.write(tf_example.SerializeToString()) except tf.errors.OutOfRangeError: tf.logging.info('Finished processing records') endtime = datetime.datetime.now() print("running time is ") print((endtime - starttime).seconds)
def _create_tf_record_from_coco_annotations( annotations_file, image_dir, output_path, include_masks, num_shards, keypoint_annotations_file='', densepose_annotations_file='', remove_non_person_annotations=False, remove_non_person_images=False): """Loads COCO annotation json files and converts to tf.Record format. Args: annotations_file: JSON file containing bounding box annotations. image_dir: Directory containing the image files. output_path: Path to output tf.Record file. include_masks: Whether to include instance segmentations masks (PNG encoded) in the result. default: False. num_shards: number of output file shards. keypoint_annotations_file: JSON file containing the person keypoint annotations. If empty, then no person keypoint annotations will be generated. densepose_annotations_file: JSON file containing the DensePose annotations. If empty, then no DensePose annotations will be generated. remove_non_person_annotations: Whether to remove any annotations that are not the "person" class. remove_non_person_images: Whether to remove any images that do not contain at least one "person" annotation. """ with contextlib2.ExitStack() as tf_record_close_stack, \ tf.gfile.GFile(annotations_file, 'r') as fid: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_path, num_shards) groundtruth_data = json.load(fid) images = groundtruth_data['images'] category_index = label_map_util.create_category_index( groundtruth_data['categories']) annotations_index = {} if 'annotations' in groundtruth_data: logging.info( 'Found groundtruth annotations. Building annotations index.') for annotation in groundtruth_data['annotations']: image_id = annotation['image_id'] if image_id not in annotations_index: annotations_index[image_id] = [] annotations_index[image_id].append(annotation) missing_annotation_count = 0 for image in images: image_id = image['id'] if image_id not in annotations_index: missing_annotation_count += 1 annotations_index[image_id] = [] logging.info('%d images are missing annotations.', missing_annotation_count) keypoint_annotations_index = {} if keypoint_annotations_file: with tf.gfile.GFile(keypoint_annotations_file, 'r') as kid: keypoint_groundtruth_data = json.load(kid) if 'annotations' in keypoint_groundtruth_data: for annotation in keypoint_groundtruth_data['annotations']: image_id = annotation['image_id'] if image_id not in keypoint_annotations_index: keypoint_annotations_index[image_id] = {} keypoint_annotations_index[image_id][ annotation['id']] = annotation densepose_annotations_index = {} if densepose_annotations_file: with tf.gfile.GFile(densepose_annotations_file, 'r') as fid: densepose_groundtruth_data = json.load(fid) if 'annotations' in densepose_groundtruth_data: for annotation in densepose_groundtruth_data['annotations']: image_id = annotation['image_id'] if image_id not in densepose_annotations_index: densepose_annotations_index[image_id] = {} densepose_annotations_index[image_id][ annotation['id']] = annotation total_num_annotations_skipped = 0 total_num_keypoint_annotations_skipped = 0 total_num_densepose_annotations_skipped = 0 for idx, image in enumerate(images): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(images)) annotations_list = annotations_index[image['id']] keypoint_annotations_dict = None if keypoint_annotations_file: keypoint_annotations_dict = {} if image['id'] in keypoint_annotations_index: keypoint_annotations_dict = keypoint_annotations_index[ image['id']] densepose_annotations_dict = None if densepose_annotations_file: densepose_annotations_dict = {} if image['id'] in densepose_annotations_index: densepose_annotations_dict = densepose_annotations_index[ image['id']] (_, tf_example, num_annotations_skipped, num_keypoint_annotations_skipped, num_densepose_annotations_skipped) = create_tf_example( image, annotations_list, image_dir, category_index, include_masks, keypoint_annotations_dict, densepose_annotations_dict, remove_non_person_annotations, remove_non_person_images) total_num_annotations_skipped += num_annotations_skipped total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped total_num_densepose_annotations_skipped += ( num_densepose_annotations_skipped) shard_idx = idx % num_shards if tf_example: output_tfrecords[shard_idx].write( tf_example.SerializeToString()) logging.info('Finished writing, skipped %d annotations.', total_num_annotations_skipped) if keypoint_annotations_file: logging.info('Finished writing, skipped %d keypoint annotations.', total_num_keypoint_annotations_skipped) if densepose_annotations_file: logging.info('Finished writing, skipped %d DensePose annotations.', total_num_densepose_annotations_skipped)
def object_detection(t, tensorflow_venv, object_detection_api, model_path, detection_threshold, detection_graph, sess, category_index, bridge, sign, camera): #, left_eye, right_eye): from PIL import Image, ImageDraw import numpy # initialize the TensorFlow Object Detection session and store it as needed if detection_graph.value is None: # import TensorFlow in the NRP, update this path for your local installation try: import site site.addsitedir(tensorflow_venv.value + '/lib/python2.7/site-packages') import tensorflow as tf except: clientLogger.info( "Unable to import TensorFlow, did you change the path in the transfer function?" ) raise # configure Object Detection environment import sys # paths to saved model states, update these paths if different in your local installation MODEL_BASE = object_detection_api.value sys.path.append(MODEL_BASE) sys.path.append(MODEL_BASE + '/object_detection') sys.path.append(MODEL_BASE + '/slim') PATH_TO_CKPT = model_path.value + '/frozen_inference_graph.pb' PATH_TO_LABELS = model_path.value + '/label_map.pbtxt' # initialize the detection graph import object_detection.utils.label_map_util as label_map_util #from utils import label_map_util detection_graph.value = tf.Graph() with detection_graph.value.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess.value = tf.Session(graph=detection_graph.value) # create internal label and category mappings label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=4, use_display_name=True) category_index.value = label_map_util.create_category_index(categories) # OpenCV bridge for ROS <-> CV image conversion from cv_bridge import CvBridge, CvBridgeError bridge.value = CvBridge() # initialized, start searching sign.value = '' # no image received yet, do nothing if camera.value is None: return # convert the ROS image to an OpenCV image and Numpy array cv_image = bridge.value.imgmsg_to_cv2(camera.value, "rgb8") numpy_image = np.expand_dims(cv_image, axis=0) # run the actual detection image_tensor = detection_graph.value.get_tensor_by_name('image_tensor:0') boxes = detection_graph.value.get_tensor_by_name('detection_boxes:0') scores = detection_graph.value.get_tensor_by_name('detection_scores:0') classes = detection_graph.value.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.value.get_tensor_by_name( 'num_detections:0') (boxes, scores, classes, num_detections) = sess.value.run([boxes, scores, classes, num_detections], feed_dict={image_tensor: numpy_image}) boxes, scores, classes, num_detections = map( np.squeeze, [boxes, scores, classes, num_detections]) # annotate detections on the image pil_image = Image.fromarray(cv_image) detections = [] closest_sign = {'name': sign.value, 'square': -1} for i in range(num_detections): # only accept high enough detection scores if scores[i] < detection_threshold.value: continue name = category_index.value[classes[i]]['name'] # log the detection at timestamp clientLogger.info(t, name, scores[i]) detections.append(name) # annotate the image with boxes draw = ImageDraw.Draw(pil_image) im_width, im_height = pil_image.size ymin, xmin, ymax, xmax = boxes[i] (left, right, top, bottom) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height) draw.line([(left, top), (left, bottom), (right, bottom), (right, top), (left, top)], width=int(scores[i] * 10) - 4, fill='red') square = (xmax - xmin) * (ymax - ymin) if closest_sign['square'] < square: closest_sign['square'] = square closest_sign['name'] = name clientLogger.info("Current sign:", closest_sign['name']) sign.value = closest_sign['name'] # publish a ROS image with annotations return bridge.value.cv2_to_imgmsg(numpy.array(pil_image), "rgb8")
def camera_connection(process): inside = 0 print("Started process", process) # log.info("{}:Started process".format(process)) # Taking current directory CWD_PATH = os.getcwd() # warning_enable = False try: # General name for the model MODEL_NAME = 'inference_graph1' # Assigning label txt file PATH_TO_LABELS = os.path.join(CWD_PATH, 'training1', 'labelmap.pbtxt') # PATH_TO_LABELS="/home/server3/tensorflow1/models/research/object_detection/shell2sideviewtraining/labelmap.pbtxt" NUM_CLASSES = 6 # Path to frozen detection graph .pb file, which contains the model that is used for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file label_map = label_map_util.load_labelmap(PATH_TO_LABELS) # print("label_map={}".format(label_map)) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) # Taking Categories category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() # with tf.io.GFile(PATH_TO_CKPT, 'rb') as fid: with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # This is tensor flow 1.x code to run the above model sess = tf.Session(graph=detection_graph) # Define input and output tensors (i.e. data) for the object detection classifier Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represents level of confidence for each of the objects.The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') # Class detection detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # print("Biswa", num_detections) # Reading local video cap = cv2.VideoCapture( '/home/vert/Foreign_Obj_Det_Project/Project_Demo_Video/Shell2FrontView_wop_20210209_140215007565.avi' ) # Running while loop into the video to testing the code while True: _, img = cap.read() img1 = img.copy() # Scaling down the image to display scale_percent1 = 40 # percent of original size width1 = int(img1.shape[1] * scale_percent1 / 100) height1 = int(img1.shape[0] * scale_percent1 / 100) dim1 = (width1, height1) nf1 = cv2.resize(img1, dim1, interpolation=cv2.INTER_AREA) # vout1.write(nf1) frame_expanded = np.expand_dims(img, axis=0) # Copy image the image # img=nf1.copy() # DRAWING LINE for shell2 side view # Left LINE x1, y1 = 800, 950 x1, y2 = 600, 1050 # off1=2.0 cv2.line(img, (x1, y1), (x1, y2), (0, 0, 255), 2) # Right LINE X1, Y1 = 1200, 950 X1, Y2 = 1200, 1050 # off2=2.0275 cv2.line(img, (X1, Y1), (X1, Y2), (0, 0, 255), 2) # # LINE - 3 p1, q1 = 600, 750 p2, q1 = 1150, 950 # off3 = 10 cv2.line(img, (p1, q1), (p2, q1), (0, 0, 255), 2) # # LINE - 4 P1, Q1 = 600, 850 P2, Q1 = 1150, 1100 # off3 = 10 cv2.line(img, (P1, Q1), (P2, Q1), (0, 0, 255), 2) # # # # # DRAWING LINE for shell1 front view # # # Right LINE-1 # x1, y1 = 700, 800 # x1, y2 = 600, 950 # # off1=2.0 # # cv2.line(img, (x1, y1), (x1, y2), (0, 0, 255), 2) # # # Left LINE-2 # X1, Y1 = 1200, 800 # X1, Y2 = 1200, 950 # # off2=2.0 # # cv2.line(img, (X1, Y1), (X1, Y2), (0, 0, 255), 2) # # # LINE - 3 # p1, q1 = 600, 750 # p2, q1 = 1200, 800 # # off3 = 10 # # cv2.line(img, (p1, q1), (p2, q1), (0, 0, 255), 2) # # # LINE - 4 # P1, Q1 = 600, 850 # P2, Q1 = 1200, 950 # off3 = 10 # cv2.line(img, (P1, Q1), (P2, Q1), (0, 0, 255), 2) # print("Empty class found end") # print("{} shape {}".format(process,frame_expanded.shape)) #log.info("{} shape {}".format(process, frame_expanded.shape)) # Assighing the boxes, scores, class and num from the model (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: frame_expanded}) # Converting image to array to pass inside the model img1 = np.array(img1) # Taking coordinates from function which is available in util folder im, ymin, xmin, ymax, xmax = vis_util.visualize_boxes_and_labels_on_image_array( img1, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8, min_score_thresh=0.40) # Taking boxes details boxes1 = np.squeeze(boxes) # get all boxes from an array max_boxes_to_draw = boxes1.shape[0] # get scores to get a threshold scores1 = np.squeeze(scores) # print(scores1) # this is set as a default but feel free to adjust it to your needs min_score_thresh = .4 # Checking person class from the video to take the time for i in range(min(max_boxes_to_draw, boxes1.shape[0])): if scores1 is None or scores1[i] > min_score_thresh: class_name = category_index[np.squeeze(classes).astype( np.int32)[i]]['name'] # If the object is person then we need to process below lines if class_name == "Person": # print(class_name) # Taking the shape of the image h, w = img.shape[:2] # Taking l,r,t,b value from the box l, r, t, b = int(xmin * w), int(xmax * w), int( ymin * h), int(ymax * h) # Printing logger logger.info("print l,r,t,b-{}-{}-{}-{}".format( l, r, t, b)) # print("Coordinates-{}-{}-{}-{}".format(l,r,t,b)) # Creating Rectangle box out side of the object cv2.rectangle(img, (l, b), (r, t), (1, 190, 200), 2) logger.info("print image shape-{}".img.shape()) # Creating bounding box with different color # cv2.rectangle(img, (l, b), (r, t),(0,0,255), 1) # centroid calculation c1 = int(l + ((r - l) / 2)) c2 = int(t + ((b - t) / 2)) logger.info("print ci and c2 -{}-{}".format(c1, c2)) # Applying cv2 circle method to create centroid in side the object cv2.circle(img, (c1, c2), 1, (255, 153, 255), 3) cv2.putText(img, "Person", (l, t), cv2.FONT_HERSHEY_PLAIN, 1, (255, 127, 0), 1) # Checking the distance to take the time # if q1<c2+offset and q1>c2-offset: # print("Person Crossed The Line") # # Writing the current timestamp inside the txt file in our folder # with open("/home/vert/Desktop/Time_Tracking/Log.txt","a") as f: # f.write("Person Crossed Line-{}".format(datetime.now())) # f.write("\n") # Checking the condition to take the time if q1 < c2 < Q1 and x1 < c1 < X1: inside += 1 if inside == 1: # He is inside the warning zone print("INSIDE WARNING ZONE") with open( "/home/vert/Desktop/Time_Tracking/Log.txt", "a") as f: f.write("Person Crossed Line-{}".format( datetime.now())) f.write("\n") cv2.putText(img, "Inside Warning zone", (75, 75), cv2.FONT_HERSHEY_SIMPLEX, 2, (0, 0, 255), 2) else: # if (inside>=1 and c2<q1) or (inside>=1 and c2>Q1): if inside > 0: # HE's OUT print("OUTSIDE WARNING ZONE") with open( "/home/vert/Desktop/Time_Tracking/Log.txt", "a") as f: f.write("Outside Warning Zone-{}".format( datetime.now())) f.write("\n") cv2.putText(img, "Out of Warning Zone", (75, 75), cv2.FONT_HERSHEY_SIMPLEX, 2, (130, 255, 255), 2) # RESET inside to "0" inside = 0 else: pass cv2.imshow("SSD Model Image", nf1) cv2.imshow("Normal Image", img) cv2.waitKey(1) # if cv2.waitKey(1) == 27: # break except (Exception, Exception) as exc: print("Ending Processing..theres exceptiPon-{}".format(exc))
def __init__(self): # ROS initialize # rospy.init_node('ros_tensorflow_ObjectDetection') # rospy.on_shutdown(self.shutdown) # Set model path and image topic # model_path = rospy.get_param("~model_path", "") # image_topic = rospy.get_param("~image_topic", "") # self._cv_bridge = CvBridge() # rospy.loginfo("finding model path...") '''select model path ,model label and model name,include 'MODEL_NAME' 'PATH_TO_CKPT' and 'PATH_TO_LABELS' ''' # MODEL_NAME = '/outputing' # PATH_TO_CKPT = model_path + MODEL_NAME +'/frozen_inference_graph.pb' # # PATH_TO_LABELS = os.path.join(model_path + '/data', 'frame_label_map.pbtxt') # What model to download. # MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' # MODEL_FILE = MODEL_NAME + '.tar.gz' # DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. # PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. # PATH_TO_LABELS = os.path.join(model_path+'/data', 'mscoco_label_map.pbtxt') # NUM_CLASSES = 1 NUM_CLASSES = 90 # Download Model # rospy.loginfo("Downloading models...") #send loginfo # opener = urllib.request.URLopener() # opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) # tar_file = tarfile.open(MODEL_FILE) # for file in tar_file.getmembers(): # file_name = os.path.basename(file.name) #use os.path.basename for # if 'frozen_inference_graph.pb' in file_name: # tar_file.extract(file, os.getcwd()) #os.getcwd() #Load a (frozen) Tensorflow model into memory. # self.detection_graph = tf.Graph() # # # with self.detection_graph.as_default(): # od_graph_def = tf.GraphDef() # with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: # serialized_graph = fid.read() # od_graph_def.ParseFromString(serialized_graph) # tf.import_graph_def(od_graph_def, name='') # rospy.loginfo("loading models' label ......") # rospy.loginfo("please wait") # Loading label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) # #Initialize ROS Subscriber and Publisher # self._sub = rospy.Subscriber(image_topic, ROSImage, self.callback, queue_size=10) # self._pub = rospy.Publisher('object_detection', ROSImage, queue_size=1) # rospy.loginfo("Start object dectecter ...") config = tf.ConfigProto() config.gpu_options.allow_growth = True #
def __init__(self, *args, **kwds): # Metricas de deteccion y tiempo self.tmin = 100 self.tmax = 0 self.ttot = 0 self.tcount = 0 self.dmin = 100 self.dmax = 0 self.dboxtot = 0 self.davgtot = 0 self.dcount = 0 # begin wxGlade: MyFrame.__init__ kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE wx.Frame.__init__(self, *args, **kwds) self.SetSize((812, 522)) self.Bind(wx.EVT_KEY_DOWN, self.KeyDown) # Menu Bar self.frame_menubar = wx.MenuBar() wxglade_tmp_menu = wx.Menu() item = wxglade_tmp_menu.Append(wx.ID_ANY, u"Configuración", "") self.Bind(wx.EVT_MENU, self.configuraciónClick, id=item.GetId()) item = wxglade_tmp_menu.Append(wx.ID_ANY, "Acerca de...", "") self.Bind(wx.EVT_MENU, self.acercaDeClick, id=item.GetId()) item = wxglade_tmp_menu.Append(wx.ID_ANY, "Salir", "") self.Bind(wx.EVT_MENU, self.salirClick, id=item.GetId()) self.frame_menubar.Append(wxglade_tmp_menu, "Menu") wxglade_tmp_menu = wx.Menu() item = wxglade_tmp_menu.Append(wx.ID_ANY, "Start/Pause\tSPACE BAR", "") self.Bind(wx.EVT_MENU, self.cambiarEstadoCNN, id=item.GetId()) '''accel_tbl = wx.AcceleratorTable([(wx.ACCEL_CTRL, ord('s'), item.GetId() )]) self.SetAcceleratorTable(accel_tbl)''' item = wxglade_tmp_menu.Append(wx.ID_ANY, "Person/Empty\tCTRL", "") self.Bind(wx.EVT_MENU, self.cambiarAnotation, id=item.GetId()) '''accel_tbl = wx.AcceleratorTable([(wx.ACCEL_CTRL, ord('a'), item.GetId() )]) self.SetAcceleratorTable(accel_tbl)''' self.frame_menubar.Append(wxglade_tmp_menu, "Capturar Frames") self.SetMenuBar(self.frame_menubar) # Menu Bar end self.label_1 = wx.StaticText(self, wx.ID_ANY, "Ubicaciones:") self.cantUbicaciones = wx.StaticText(self, wx.ID_ANY, "0") self.label_2 = wx.StaticText(self, wx.ID_ANY, "Ocupadas: ") self.cantOcupadas = wx.StaticText(self, wx.ID_ANY, "0") self.label_3 = wx.StaticText(self, wx.ID_ANY, "Libres:") self.cantLibres = wx.StaticText(self, wx.ID_ANY, "0") self.__set_properties() self.__do_layout() # end wxGlade #Create objects self.THROBLESHOOT = 0.0001 #Default = 0.7 self.RN=False #Activa/Desactiva la RN self.CaptureWidth = 720 self.CaptureHeight = 1280 #Para Camara en vivo self.Screen1Width = 360 self.Screen1Height = 640 self.Screen1 = wx.StaticBitmap(self, size = (self.Screen1Width, self.Screen1Height)) # Static bitmaps for OpenCV images img = wx.Image('imagenes/bancaLibre.png').Scale(self.Screen1Width, self.Screen1Height, wx.IMAGE_QUALITY_HIGH) self.wxbmp = img.ConvertToBitmap() self.num=-1 self.boxes=0 self.scores=0 self.classes=0 self.sizer_2.Add( self.Screen1, 1, wx.FIXED_MINSIZE |wx.ALL, 5 ) self.Screen1.Bind(wx.EVT_ERASE_BACKGROUND, self.onEraseBackground) self.Screen1.Bind(wx.EVT_PAINT, self.onPaint) # Add objects to sizer #self.sizer_2.Add(self.Screen1, 0, wx.EXPAND | wx.ALL, 10) #Para resultado del analisis self.Screen2Width = 550 self.Screen2Height = 270 #Maximizo ventana para que ocupe todo el escritorio menos la barra de tareas c_x, c_y, c_w, c_h = wx.ClientDisplayRect() self.SetSize((c_w, c_h)) self.SetPosition((c_x, c_y)) #Ventana mitad de escritorio self.SetSize((c_w/2, c_h)) self.SetPosition((c_w/2, c_y)) #Obtengo la posicion, dentro de la toma completa, de cada ubicacion path_locations='configuracion' self.images_location=self.xml_to_locations(path_locations) self.cantUbicaciones.Label=str(len(self.images_location)) self.cantLibres.Label=str(len(self.images_location)) #Lista para guardar el estado de cada banca: # [OK] = ocupada # [ ] = libre # [?] = indeterminado self.locations_state=[] self.imagenes_bancas_select = { "ocupada": 'imagenes/bancaOcupadaSelect.png', "libre": 'imagenes/bancaLibreSelect.png', "indeterminado": 'imagenes/bancaIndeterminadoSelect.png' } #Creo tantas bancas como posiciones guardadas en el xml y las guardo en una lista #Las StaticBitmap contendran las imagenes de los estados de las bancas self.screen_list=[] for i in self.images_location: sb=wx.StaticBitmap(self, size = (self.Screen2Width, self.Screen2Height)) #sb.SetPosition(wx.Point(0,0)) self.screen_list.append(banca.Banca(sb,i[0],i[1],i[2],i[3],i[4])) #Creo un diccionario para consultar datos de cada banca, al hacer click en una banca self.dict_bancas= {} # create an empty dictionary for i in range(len(self.screen_list)): self.dict_bancas[self.screen_list[i].staticBitmap]=self.screen_list[i] #Seteo estado,posicion y evento de cada StaticBitmap for i in self.screen_list: #Seteo posicion proporcional al tamaño del screen y al tamaño de la captura xmin,ymin=i.getPosicionXML() xpos=int((xmin/self.CaptureWidth)*self.Screen2Width) ypos=int((ymin/self.CaptureHeight)*self.Screen2Height) x, y = self.sizer_3.GetPosition() i.setPosicionVentana(x+xpos,y+ypos) #Seteo el eventos i.staticBitmap.Bind(wx.EVT_LEFT_UP, self.bancaClick) i.staticBitmap.Bind(wx.EVT_ENTER_WINDOW, self.onMouseOverBanca) i.staticBitmap.Bind( wx.EVT_LEAVE_WINDOW, self.onMouseOutBanca) #Seteo cursor sobre la banca i.staticBitmap.SetCursor(wx.Cursor(wx.CURSOR_HAND)) self.tiempo1=time.now() ipcamUrl = 'http://*****:*****@192.168.43.1:8081' ipcamUrlCelRodri = 'http://*****:*****@192.168.43.25:8081' #ipcamUrl = 'http://*****:*****@192.168.43.93:8081' #ipcamUrl = 'toma_lateral.mov' ipcam = {} ipcamDesc = 'Celular' ipcam[ipcamDesc] = urlparse(ipcamUrl) print(time.now()) # Prueba la conexión al destino ip if len(ipcamUrl) > 5: err,errMsg = self.urlTest(ipcam[ipcamDesc].hostname,ipcam[ipcamDesc].port) if err > 0: print(time.now(),"Falló conexión. ",errMsg) exit(1) try: self.capture = cv2.VideoCapture(ipcamUrl) self.capture.set(3,self.CaptureWidth) #1024 640 1280 800 384 self.capture.set(4,self.CaptureHeight) #600 480 960 600 288 fourcc = cv2.VideoWriter_fourcc(*'MJPG') self.out = cv2.VideoWriter('output.avi', fourcc, 20.0, (int(self.capture.get(4)),int(self.capture.get(3)))) sys.path.append("..") # Importación del módulo de detección de objetos. from object_detection.utils import label_map_util from object_detection.utils import visualization_utils as vis_util PATH_TO_CKPT = 'modelo_congelado/frozen_inference_graph.pb' PATH_TO_LABELS = os.path.join('configuracion', 'label_map.pbtxt') NUM_CLASSES = 90 self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) except IOError as e: print(time.now(), "Error abriendo socket: ", ipcamUrl) except KeyboardInterrupt as e: print(time.now(), "Detenido por teclado.") except BaseException as e: print(time.now(), "Error desconocido: ", e) # if e.number == -138: # print("Compruebe la conexión con '" + ipcamUrl + "'") # else: # print("Error: " + e.message) finally: #self.capture.release() cv2.destroyAllWindows() with self.detection_graph.as_default(): with tf.Session(graph=self.detection_graph) as sess: self.sess = tf.Session() self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0') self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0') self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0') #Creo un timer para: # 1) Actualizar la información en pantalla # 2) Activar la CNN y obtener datos del analisis self.timer = wx.Timer(self) self.Bind(wx.EVT_TIMER, self.OnTimer) #Inicialmente la CNN está inactiva self.analisis='PAUSADO' self.anotation='Person' self.Bind(wx.EVT_CLOSE, self.onClose) self.Bind(wx.EVT_LEFT_UP, self.VentanaClick) #Estado del programa self.STATE_RUNNING = 1 self.STATE_CLOSING = 2 self.state = self.STATE_RUNNING #Seteo cada cuanto tiempo se activará el timer self.fps=60 self.timer.Start(1000./self.fps) # timer interval #Cantidad de ciclos del timer que la CNN no trabaja #Esto es para evitar lag self.FREC= 10 self.FRECUENCIA_CNN=self.FREC self.imagenes=4000 self.VisualBoxes=0
def main(_): assert FLAGS.train_dir, '`train_dir` is missing.' if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir) if FLAGS.pipeline_config_path: configs = config_util.get_configs_from_pipeline_file( FLAGS.pipeline_config_path) if FLAGS.task == 0: tf.gfile.Copy(FLAGS.pipeline_config_path, os.path.join(FLAGS.train_dir, 'pipeline.config'), overwrite=True) else: configs = config_util.get_configs_from_multiple_files( model_config_path=FLAGS.model_config_path, train_config_path=FLAGS.train_config_path, train_input_config_path=FLAGS.input_config_path) if FLAGS.task == 0: for name, config in [('model.config', FLAGS.model_config_path), ('train.config', FLAGS.train_config_path), ('input.config', FLAGS.input_config_path)]: tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name), overwrite=True) model_config = configs['model'] train_config = configs['train_config'] input_config = configs['train_input_config'] model_fn = functools.partial( model_builder.build, model_config=model_config, is_training=True) #iterator = dataset_util.make_initializable_iterator(dataset_builder.build(input_config)) datasetmy = dataset_builder.build(input_config) iterator = datasetmy.make_initializable_iterator() def get_next(config): return iterator.get_next() create_input_dict_fn = functools.partial(get_next, input_config) data_augmentation_options = [ preprocessor_builder.build(step) for step in train_config.data_augmentation_options] input_queue = trainer.create_input_queue( train_config.batch_size, create_input_dict_fn, train_config.batch_queue_capacity, train_config.num_batch_queue_threads, train_config.prefetch_queue_capacity, data_augmentation_options) tensors = input_queue.dequeue() #print all tensors in tfrecord print(tensors) groundtruth_difficult = tensors[0]['groundtruth_difficult'] groundtruth_group_of = tensors[0]['groundtruth_group_of'] groundtruth_weights = tensors[0]['groundtruth_weights'] groundtruth_is_crowd = tensors[0]['groundtruth_is_crowd'] key = tensors[0]['key'] groundtruth_boxes = tensors[0]['groundtruth_boxes'] image = tensors[0]['image'] groundtruth_area = tensors[0]['groundtruth_area'] groundtruth_classes = tensors[0]['groundtruth_classes'] filename = tensors[0]['filename'] num_groundtruth_boxes = tensors[0]['num_groundtruth_boxes'] source_id = tensors[0]['source_id'] init_op=tf.initialize_all_variables() with tf.Session() as sess: sess.run(iterator.initializer) sess.run(tf.tables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(coord=coord) sess.run(init_op) for i in range(10): groundtruth_weights_val,groundtruth_difficult_val,groundtruth_group_of_val,groundtruth_is_crowd_val,key_val,groundtruth_boxes_val,image_val,groundtruth_area_val,groundtruth_classes_val,filename_val,num_groundtruth_boxes_val,source_id_val = \ sess.run([groundtruth_weights,groundtruth_difficult,groundtruth_group_of,groundtruth_is_crowd,key,groundtruth_boxes,image,groundtruth_area,groundtruth_classes,filename,num_groundtruth_boxes,source_id]) # print(groundtruth_weights_val) print(groundtruth_boxes_val) # print(groundtruth_difficult_val) # print(groundtruth_group_of_val) # print(groundtruth_is_crowd_val) # print(key_val) # print(image_val) # print(groundtruth_area_val) print(groundtruth_classes_val) print(filename_val) print(num_groundtruth_boxes_val) # print(source_id_val) image_val = image_val[0] image_val = image_val.astype(np.uint8) # cv2.imshow('image', image_val) # cv2.waitKey() # plt.imshow(image_val) # plt.show() print('finish') #plot bbox on image plt.switch_backend("TkAgg") classes_val = groundtruth_classes_val boxes_val = groundtruth_boxes_val scores_val = [1.0]*num_groundtruth_boxes_val image_np = image_val image_np_origin = image_val.copy() NUM_CLASSES = 90 IMAGE_SIZE = (12, 8) PATH_TO_LABELS = '../../data/mscoco_label_map.pbtxt' label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) vis_util.visualize_boxes_and_labels_on_image_array( image_np, boxes_val, np.squeeze(classes_val).astype(np.int32), np.squeeze(scores_val), category_index, use_normalized_coordinates=True, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.subplot(121) plt.imshow(image_np) plt.subplot(122) plt.imshow(image_np_origin) plt.show() print('finish') pass coord.request_stop() coord.join(threads)
def eval_models_general(xml_path, MODEL_NAME, path, PATH_TO_LABELS, threshold_scores, threshold_boxes, width, height): global xml_list xml_list = [] for xml_file in glob.glob(xml_path): tree = ET.parse(xml_file) root = tree.getroot() for member in root.findall('object'): xmin_xml = int(member[4][0].text) ymin_xml = int(member[4][1].text) xmax_xml = int(member[4][2].text) ymax_xml = int(member[4][3].text) filename_xml = root.find('filename').text if xmin_xml != 0 and ymin_xml != 0 and xmax_xml != 0 and ymax_xml != 0: value = [filename_xml, xmin_xml, ymin_xml, xmax_xml, ymax_xml] xml_list.append(value) #value_num = [int(member[4][0].text), int(member[4][1].text), int(member[4][2].text), int(member[4][3].text)] #all_ground_truth = np.vstack([all_ground_truth, value_num]) #print(value) #a = ['bresil_ad_blue1_100.png', 1000, 200, 300, 400] #xml_list.append(a) #b = ['bresil_ad_blue1_100.png', 1100, 500, 400, 900] #xml_list.append(b) xml_list.sort() xml_list_names = [] for i in xml_list: xml_list_names.append(i[0]) ''' for i in xml_list: all_ground_truth = np.vstack([ all_ground_truth, i[1:] ]) all_ground_truth = np.delete(all_ground_truth, 0, 0) print("\nall_ground_truth") print(all_ground_truth) ''' # Name of the directory containing the object detection module we're using #MODEL_NAME = '../TensorFlow/trained-inference-graphs/output_inference_graph_v1.pb' # Name of the directory containing all images to be predicted #path = '/home/igor/Documentos/Developments/Experiment_Plan/Renault_Oficial_Images/ad_blue_all' PATH_TO_IMAGE = [] global IMAGE IMAGE = [] # r=root, d=directories, f = files for r, d, f in os.walk(path): for file in f: if '.jpg' in file: PATH_TO_IMAGE.append(os.path.join(r, file)) IMAGE.append(file) IMAGE.sort() # Grab path to current working directory CWD_PATH = os.getcwd() # Path to frozen detection graph .pb file, which contains the model that is used for object detection. PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, 'frozen_inference_graph.pb') # Path to label map file #PATH_TO_LABELS = os.path.join(CWD_PATH,'../TensorFlow/annotations','label_map.pbtxt') # Number of classes the object detector can identify NUM_CLASSES = 1 label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Load the Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') sess = tf.Session(graph=detection_graph) # Input tensor is the image image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Output tensors are the detection boxes, scores, and classes # Each box represents a part of the image where a particular object was detected detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represents level of confidence for each of the objectbresil_ad_blue4_0.pngs. # The score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') # Number of objects detected num_detections = detection_graph.get_tensor_by_name('num_detections:0') # width = 705, height = 447 # width = 2590, height = 1942 #score draw boxes #threshold_scores = 0.5 #score IoU #threshold_boxes = 0.6 global images_list images_list = [] global pred_labels_list pred_labels_list = [] global result_list result_list = [] global result_num_list result_num_list = [] global all_predictions all_predictions = np.zeros([1, 4], dtype=int) all_predictions = np.delete(all_predictions, 0, 0) global all_ground_truth all_ground_truth = np.zeros([1, 4], dtype=int) all_ground_truth = np.delete(all_ground_truth, 0, 0) FP, TP, FN = 0, 0, 0 lista_string = list(range(0, 274, 1)) #amount of xml_files + 1 x = 0 for f in range(len(IMAGE)): path_to_img = os.path.join(path, IMAGE[f]) image = cv2.imread(path_to_img) image_expanded = np.expand_dims(image, axis=0) (boxes, scores, classes, num) = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_expanded}) ''' count_ocurr = xml_list_names.count(IMAGE[f]) #print("count_ocurr: ", IMAGE[f], count_ocurr) for i in range(count_ocurr): #print("i: ", i) #print("f: ", f) #print("xml_list[f+i]: ", xml_list[f+i+x]) all_ground_truth = np.vstack([all_ground_truth, xml_list[f+i+x][1:]]) #print("all_ground_truth: \n", all_ground_truth) ''' for i, j in zip(range(boxes.shape[1]), lista_string): if scores[0, i] > threshold_scores: #predctions values ymin_pred = boxes[0, i, 0] * height xmin_pred = boxes[0, i, 1] * width ymax_pred = boxes[0, i, 2] * height xmax_pred = boxes[0, i, 3] * width #str(int(ymin_pred)) #str(int(xmin_pred)) #str(int(ymax_pred)) #str(int(xmax_pred)) scores_num = '{0:.10f}'.format(scores[0, i]) scores_str = str(scores_num) name = IMAGE[f].split(".") name_1 = name[0] name_2 = name[0] + ".txt" value_pred = [ name_2, "volkswagen_logo", scores_str, str(int(xmin_pred)), str(int(ymin_pred)), str(int(xmax_pred)), str(int(ymax_pred)) ] #add score referente ao xmin, ymin, xmax, ymax pred_labels_list.append(value_pred) print(value_pred) #value_pred_num = [int(xmin_pred), int(ymin_pred), int(xmax_pred), int(ymax_pred)] #all_predictions = np.vstack([all_predictions, value_pred_num]) with open('/home/igor/Documentos/luigy/Test18/output_all_synthetic.txt', 'w') as f: for item in pred_labels_list: f.write("%s\n" % item)
def visualize(split): nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True) sensor = 'LIDAR_TOP' # pipeline_config = pipeline_pb2.TrainEvalPipelineConfig() # with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f: # text_format.Merge(f.read(), pipeline_config) # if not pipeline_config.model.HasField('ssd_augmentation'): # raise ValueError('Model with ssd_augmentation estimation is required.') detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(FLAGS.graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) for node in od_graph_def.node: if 'BatchMultiClassNonMaxSuppression' in node.name: node.device = '/device:CPU:0' tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0') detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0') detection_scores = detection_graph.get_tensor_by_name('detection_scores:0') detection_classes = detection_graph.get_tensor_by_name('detection_classes:0') num_detections = detection_graph.get_tensor_by_name('num_detections:0') belief_F_prediction = detection_graph.get_tensor_by_name('belief_F_prediction:0') belief_O_prediction = detection_graph.get_tensor_by_name('belief_O_prediction:0') z_max_detections_prediction = detection_graph.get_tensor_by_name('z_max_detections_prediction:0') detections_drivingCorridor_prediction = detection_graph.get_tensor_by_name('detections_drivingCorridor_prediction:0') scene_splits = create_splits_scenes() for scene in nusc.scene: if scene['name'] not in vis_set: continue scene_dir = os.path.join(FLAGS.output, scene['name']) os.system('mkdir {}'.format(scene_dir)) folder_inverse = os.path.join(scene_dir, 'inverse') folder_color = os.path.join(scene_dir, 'color') folder_color_inverse = os.path.join(scene_dir, 'color_inverse') os.system('mkdir {}'.format(folder_inverse)) os.system('mkdir {}'.format(folder_color)) os.system('mkdir {}'.format(folder_color_inverse)) folder_belF = os.path.join(scene_dir, 'belF') folder_belO = os.path.join(scene_dir, 'belO') folder_zMaxDet = os.path.join(scene_dir, 'zMaxDet') os.system('mkdir {}'.format(folder_belF)) os.system('mkdir {}'.format(folder_belO)) os.system('mkdir {}'.format(folder_zMaxDet)) folder_belF_clean = os.path.join(scene_dir, 'belF_clean') folder_belO_clean = os.path.join(scene_dir, 'belO_clean') folder_zMaxDet_clean = os.path.join(scene_dir, 'zMaxDet_clean') os.system('mkdir {}'.format(folder_belF_clean)) os.system('mkdir {}'.format(folder_belO_clean)) os.system('mkdir {}'.format(folder_zMaxDet_clean)) current_sample_token = scene['first_sample_token'] last_sample_token = scene['last_sample_token'] # first_sample = nusc.get('sample', scene['first_sample_token']) # current_token = first_sample['data'][sensor] sample_in_scene = True first_inference = True while sample_in_scene: # while current_token: if current_sample_token == last_sample_token: sample_in_scene = False sample = nusc.get('sample', current_sample_token) lidar_top_data = nusc.get('sample_data', sample['data'][sensor]) if first_inference: # current_token = lidar_top_data['next'] # if use_10hz_capture_frequency: # if current_token: # lidar_top_data_next = nusc.get('sample_data', current_token) # current_token = lidar_top_data_next['next'] current_sample_token = sample['next'] first_inference = False continue # Read input data filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0] image_stacked, det_mask, observation_mask, z_mask = read_images(FLAGS.data, FLAGS.data_beliefs, filename_prefix) # Inference start_time = time.time() (boxes_aligned, boxes_inclined, scores, classes, num, belief_F_pred, belief_O_pred, z_max_detections_pred, detections_drivingCorridor_pred) = sess.run( [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes, num_detections, belief_F_prediction, belief_O_prediction, z_max_detections_prediction, detections_drivingCorridor_prediction], feed_dict={image_tensor: image_stacked}) print('Inference time:', time.time() - start_time) # Visualize object detection and scene flow label_map = label_map_util.load_labelmap(FLAGS.label_map) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10, use_display_name=True) category_index = label_map_util.create_category_index(categories) # Create grid map to visualize image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) image_vis_inv = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) # Write augmentation maps # print("image_stacked--------------------------") # print(image_stacked.shape) # print("belief_F_prediction--------------------") # print(belief_F_pred) belief_F_prediction_np = resize_augm(belief_F_pred[0], image_stacked.shape[1], image_stacked.shape[2]) belief_O_prediction_np = resize_augm(belief_O_pred[0], image_stacked.shape[1], image_stacked.shape[2]) z_max_detections_prediction_np = resize_augm(z_max_detections_pred[0], image_stacked.shape[1], image_stacked.shape[2]) detections_drivingCorridor_prediction_np = resize_augm(detections_drivingCorridor_pred[0], image_stacked.shape[1], image_stacked.shape[2]) image_bel_F = augm_to_image_gray_8(belief_F_prediction_np, mode_norm255=True) # image_bel_F = cv2.bitwise_not(image_bel_F) image_bel_F_clean = image_bel_F.copy() image_bel_O = augm_to_image_gray_8(belief_O_prediction_np, mode_norm255=True) image_bel_O = cv2.bitwise_not(image_bel_O) image_bel_O_clean = image_bel_O.copy() image_z_max_detections = augm_to_image_gray_8(z_max_detections_prediction_np, mode_norm255=False) image_z_max_detections = cv2.bitwise_not(image_z_max_detections) image_z_max_detections_clean = image_z_max_detections.copy() # image_vis_color = augm_to_image_rgb(detections_drivingCorridor_prediction_np, belief_F_prediction_np, z_max_detections_prediction_np) image_vis_color = augm_to_image_rg(belief_F_prediction_np, z_max_detections_prediction_np) image_vis_color_inv = cv2.bitwise_not(image_vis_color) for (v, u), val in np.ndenumerate(det_mask): if val: image_vis[v, u] = 255 image_vis_inv[v, u] = 0 image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8) for (v, u), val in np.ndenumerate(observation_mask): if val: image_vis[v, u, :] = 50 image_vis_inv = cv2.bitwise_not(image_vis) for (v, u), val in np.ndenumerate(det_mask): if val: image_vis[v, u] = 255 image_vis_inv[v, u] = 0 # Draw inclined detection box vis_util.visualize_boxes_and_labels_on_image_array( image_vis_color, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_vis_color_inv, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_vis, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) print(image_vis.shape) vis_util.visualize_boxes_and_labels_on_image_array( image_vis_inv, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) print(image_bel_F.shape) vis_util.visualize_boxes_and_labels_on_image_array( image_bel_F, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_bel_O, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) vis_util.visualize_boxes_and_labels_on_image_array( image_z_max_detections, np.squeeze(boxes_aligned), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_inclined), min_score_thresh=0.23, use_normalized_coordinates=True, line_thickness=3) # Save image print(filename_prefix.split('/')[-1]) output_path = os.path.join(scene_dir, filename_prefix.split('/')[-1] + '.png') cv2.imwrite(output_path, image_vis) output_path_inv = os.path.join(folder_inverse, filename_prefix.split('/')[-1] + 'inv.png') output_color_path = os.path.join(folder_color, filename_prefix.split('/')[-1] + 'color.png') output_color_path_inv = os.path.join(folder_color_inverse, filename_prefix.split('/')[-1] + 'colorInv.png') output_path_belO = os.path.join(folder_belO, filename_prefix.split('/')[-1] + 'belo.png') output_path_belF = os.path.join(folder_belF, filename_prefix.split('/')[-1] + 'belf.png') output_path_zMaxDet = os.path.join(folder_zMaxDet, filename_prefix.split('/')[-1] + 'zmax.png') output_path_belO_clean = os.path.join(folder_belO_clean, filename_prefix.split('/')[-1] + 'belo_clean.png') output_path_belF_clean = os.path.join(folder_belF_clean, filename_prefix.split('/')[-1] + 'belf_clean.png') output_path_zMaxDet_clean = os.path.join(folder_zMaxDet_clean, filename_prefix.split('/')[-1] + 'zmax_clean.png') cv2.imwrite(output_path_inv, image_vis_inv) cv2.imwrite(output_color_path, image_vis_color) cv2.imwrite(output_color_path_inv, image_vis_color_inv) cv2.imwrite(output_path_belO, image_bel_O) cv2.imwrite(output_path_belF, image_bel_F) cv2.imwrite(output_path_zMaxDet, image_z_max_detections) cv2.imwrite(output_path_belO_clean, image_bel_O_clean) cv2.imwrite(output_path_belF_clean, image_bel_F_clean) cv2.imwrite(output_path_zMaxDet_clean, image_z_max_detections_clean) current_sample_token = sample['next']
def setup_platform(hass, config, add_entities, discovery_info=None): """Set up the TensorFlow image processing platform.""" model_config = config.get(CONF_MODEL) model_dir = model_config.get(CONF_MODEL_DIR) \ or hass.config.path('tensorflow') labels = model_config.get(CONF_LABELS) \ or hass.config.path('tensorflow', 'object_detection', 'data', 'mscoco_label_map.pbtxt') # Make sure locations exist if not os.path.isdir(model_dir) or not os.path.exists(labels): _LOGGER.error("Unable to locate tensorflow models or label map") return # append custom model path to sys.path sys.path.append(model_dir) try: # Verify that the TensorFlow Object Detection API is pre-installed # pylint: disable=unused-import,unused-variable os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf # noqa from object_detection.utils import label_map_util # noqa except ImportError: # pylint: disable=line-too-long _LOGGER.error( "No TensorFlow Object Detection library found! Install or compile " "for your system following instructions here: " "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md" ) # noqa return try: # Display warning that PIL will be used if no OpenCV is found. # pylint: disable=unused-import,unused-variable import cv2 # noqa except ImportError: _LOGGER.warning( "No OpenCV library found. TensorFlow will process image with " "PIL at reduced resolution") # Set up Tensorflow graph, session, and label map to pass to processor # pylint: disable=no-member detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') session = tf.Session(graph=detection_graph) label_map = label_map_util.load_labelmap(labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) entities = [] for camera in config[CONF_SOURCE]: entities.append( TensorFlowImageProcessor(hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME), session, detection_graph, category_index, config)) add_entities(entities)
def object_detection_funct(image_path, api_call=True, video_file=False): #initialize model graph path and labels map path path_name = 'model' path_to_frozen_det_graph = path_name + '/frozen_inference_graph.pb' path_to_labels = os.path.join('model', 'mscoco_label_map.pbtxt') num_of_classes = 90 #load the frozen tensorflow model detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(path_to_frozen_det_graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') #load the label maps for example 1=person, 2=cat etc. etc. label_map = label_map_util.load_labelmap(path_to_labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_of_classes, use_display_name=True) category_map = label_map_util.create_category_index(categories) #actual prediction for single image with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: if video_file == False: image = Image.open(image_path) #convert image into numpy array using the function written image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name('detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) #decode the predictions and convert it into a dictionary output_dict = {'class': classes[0], 'score': scores[0]} if api_call == True: #return final dictionary scores_predicted = output_dict['score'][ output_dict['score'] > 0.4] classes_predicted = output_dict['class'][ 0:len(scores_predicted)].astype(np.int32) results_dict = dict() for i in range(0, len(classes_predicted)): results_dict[category_map[ classes_predicted[i]]['name']] = str( round(scores_predicted[i] * 100, 2)) + " %" return results_dict else: #save image with boxes and return image path vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_map, use_normalized_coordinates=True, line_thickness=8) DIR = 'static/img_results' num_of_files_in_dir = len([ name for name in os.listdir(DIR) if os.path.isfile(os.path.join(DIR, name)) ]) final_image_path = DIR + '/image' + str( num_of_files_in_dir) + '.png' result_img = Image.fromarray(image_np, 'RGB') result_img.save(final_image_path) return final_image_path else: reader = imageio.get_reader(VIDEO_PATH) fps = reader.get_meta_data()['fps'] VID_OP_DIR = 'static/vid_results' num_of_files_in_dir = len([ name for name in os.listdir(VID_OP_DIR) if os.path.isfile(os.path.join(VID_OP_DIR, name)) ]) final_video_path = DIR + '/output_video' + str( num_of_files_in_dir) + '.mp4' writer = imageio.get_writer(final_video_path, fps=fps) for i, frame in enumerate(reader): image_np = frame # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_map, use_normalized_coordinates=True, line_thickness=8) writer.append_data(image_np) writer.close() return final_video_path
def run(video_source, path_object_model, path_encoder_model, path_labels, min_score_thresh, nms_max_overlap, max_cosine_distance, nn_budget, display, time_profile): """Run multi-target tracker on a particular sequence. Parameters ---------- video_source : str Path to the video source to process. path_object_model : str Path to object recognition model. path_encoder_model : str Path to encoder model. path_labels : str Path to object labels. min_score_thresh : float Detection confidence threshold. Disregard all detections that have a confidence lower than this value nms_max_overlap: float Maximum detection overlap (non-maxima suppression threshold). max_cosine_distance : float Gating threshold for cosine distance metric (object appearance). nn_budget : Optional[int] Maximum size of the appearance descriptor gallery. If None, no budget is enforced. display : bool If True, show visualization of intermediate tracking results. time_profile : bool If True, Show timing informations. """ def timeit(method): def timed(*args, **kw): ts = timer() result = method(*args, **kw) te = timer() if time_profile: print('%r %2.3f sec' % (method.__name__, te - ts)) return result return timed # Open video stream cap = cv2.VideoCapture(video_source) frame_count = 0 fps = cap.get(cv2.CAP_PROP_FPS) # Deep SORT stuff metric = nn_matching.NearestNeighborDistanceMetric("cosine", max_cosine_distance, nn_budget) tracker = Tracker(metric) if not os.path.exists(path_encoder_model): print("%s: No such file or directory" % path_encoder_model) sys.exit(1) encoder = generate_detections.create_box_encoder(path_encoder_model) # Object detection # ## Check if model exist otherwise download it OBJECT_MODEL_PATH = os.path.join(path_object_model, '') OBJECT_MODEL_FILE = os.path.join(OBJECT_MODEL_PATH, 'frozen_inference_graph.pb') if not os.path.exists(OBJECT_MODEL_PATH): DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' DOWNLOAD_FILE = str.split(OBJECT_MODEL_PATH, '/')[-2] + '.tar.gz' DOWNLOAD_TO = os.path.join(str.split(OBJECT_MODEL_PATH, '/')[0], '') print('Model \"%s\" not on disk' % OBJECT_MODEL_PATH) print('Download it from %s' % (DOWNLOAD_BASE + DOWNLOAD_FILE)) opener = urllib.request.URLopener() opener.retrieve(os.path.join(DOWNLOAD_BASE, DOWNLOAD_FILE), os.path.join(DOWNLOAD_TO, DOWNLOAD_FILE)) # Extract tar the model from the tar file print('Extract frozen tensorflow model') tar_file = tarfile.open(os.path.join(DOWNLOAD_TO, DOWNLOAD_FILE)) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, DOWNLOAD_TO) # ## Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(OBJECT_MODEL_FILE, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with tf.Session() as sess: # Get handles to input and output tensors # Definite input and output Tensors for detection_graph image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') # Each box represents a part of the image where a particular object was detected. detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') tensor_list = [ detection_boxes, detection_scores, detection_classes, num_detections ] # ## Loading label map # Label maps map indices to category names, so that when our convolution # network predicts `5`, we know that this corresponds to `airplane`. # Here we use internal utility functions, but anything that returns a # dictionary mapping integers to appropriate string labels would be fine if not os.path.exists(path_labels): print("%s: No such file or directory" % path_labels) sys.exit(1) label_map = label_map_util.load_labelmap(path_labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) # # ## Select some category to display # 1 : person # 2 : bycicle # 3 : car # 4 : motorcicle # 6 : bus # 8 : truck #idx_to_keep = [1,2,3,4,6,8] #category_index = { i: category_index[i] for i in idx_to_keep} # end of initialization # # Detection @timeit def object_detection(image, graph): (boxes, scores, classes, num) = sess.run(tensor_list, feed_dict={image_tensor: np.expand_dims(image, 0)}) mask = scores > min_score_thresh classes = classes[mask] boxes = boxes[mask] scores = scores[mask] return (classes, boxes, scores) @timeit def extract_features(image, boxes): image_pil = Image.fromarray(np.uint8(image)).convert('RGB') im_width, im_height = image_pil.size detections = [] for box in boxes: ymin, xmin, ymax, xmax = box (left, right, bottom, top) = (xmin * im_width, xmax * im_width, ymin * im_height, ymax * im_height) detections.append( np.array([left, bottom, right - left, top - bottom])) #scores.append(score) detections = np.array(detections) features = encoder(image, detections) detections = [ Detection(bbox, 1.0, feature) for bbox, feature in zip(detections, features) ] # Run non-maxima suppression. boxes = np.array([d.tlwh for d in detections]) scores = np.array([d.confidence for d in detections]) indices = preprocessing.non_max_suppression(boxes, nms_max_overlap, scores) detections = [detections[i] for i in indices] return detections @timeit def tracking(detections): tracker.predict() tracker.update(detections) return tracker @timeit def frame_callback(): ret, frame_np = cap.read() # Resize frame of video to 1/4 size for faster face recognition processing #frame_np = cv2.resize(frame_np, (0, 0), fx=0.25, fy=0.25) # Skip bad read frames if not ret: return # Do things here if time_profile: t_obj_start = timer() # Actual detection. tf_classes, tf_boxes, tf_scores = object_detection( frame_np, detection_graph) # Do things here if time_profile: t_obj_stop = timer() t_feat_start = timer() detections = extract_features(frame_np, tf_boxes) # Update tracker. tracker = tracking(detections) for track, tf_class, tf_score in zip(tracker.tracks, tf_classes, tf_scores): bbox = track.to_tlbr() if display: h, w, _ = frame_np.shape thick = int((h + w) / 300.) cv2.rectangle( frame_np, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), visualization.create_unique_color_uchar(track.track_id, hue_step=0.41), thick) #(255,255,255), thick) cv2.putText( frame_np, str('id: %i, class: %s, score: %.2f' % (track.track_id, category_index[tf_class]['name'], tf_score)), (int(bbox[0]), int(bbox[1]) - 12), 0, 1e-3 * h, (255, 0, 0), int(thick / 3)) cv2.imshow('object detection', cv2.resize(frame_np, (800, 450))) #cv2.imshow('object detection', frame_np) while True: print('Frame %i, %s' % (frame_count, datetime.now())) frame_callback() frame_count += 1 if cv2.waitKey(10) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows()
def get_label_map(PATH_TO_LABELS, NUM_CLASSES): print("Debug info: Inside get_label_map()") label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def __init__(self, model_name, label_file='data/mscoco_label_map.pbtxt'): # Initialize some variables print("ObjectDetector('%s', '%s')" % (model_name, label_file)) self.process_this_frame = True # download model self.graph_file = model_name + '/' + self.GRAPH_FILE_NAME if not os.path.isfile(self.graph_file): self.download_model(model_name) # Load a (frozen) Tensorflow model into memory. self.detection_graph = tf.Graph() with self.detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(self.graph_file, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') graph = self.detection_graph ops = graph.get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = graph.get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, 480, 640) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) self.tensor_dict = tensor_dict self.sess = tf.Session(graph=self.detection_graph) # Loading label map # Label maps map indices to category names, # so that when our convolution network predicts `5`, # we know that this corresponds to `airplane`. # Here we use internal utility functions, # but anything that returns a dictionary mapping integers to appropriate string labels would be fine label_map = label_map_util.load_labelmap(label_file) categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) self.category_index = label_map_util.create_category_index(categories) self.output_dict = None self.last_inference_time = 0
def image_input(): image_path = './static/input_images' try: test_record_fname = './static/annotations/test.record' train_record_fname = './static/annotations/train.record' label_map_pbtxt_fname = './static/annotations/label_map.pbtxt' pb_fname = './static/inference_graphs/engie.pb' IMAGE_SIZE = (12, 8) PATH_TO_CKPT = pb_fname PATH_TO_LABELS = label_map_pbtxt_fname num_classes = get_num_classes(label_map_pbtxt_fname) assert os.path.isfile(pb_fname) assert os.path.isfile(PATH_TO_LABELS) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.compat.v1.GraphDef() with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=num_classes, use_display_name=True) category_index = label_map_util.create_category_index(categories) for direct, subdirect, images in os.walk(image_path): for i, image_name in enumerate(images): image = Image.open(str(direct) + '/' + str(image_name)) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) output_dict = run_inference_for_single_image( image_np, detection_graph) image_res, class_name = vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=10) image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB) if class_name == 'Critical': print('Critical', image_name) cv2.imwrite('./static/critical/image_' + str(i) + '.jpg', image_np) elif class_name == 'High': print('High', image_name) cv2.imwrite('./static/high/image_' + str(i) + '.jpg', image_np) elif class_name == 'Less': print('less', image_name) cv2.imwrite('./static/less/image_' + str(i) + '.jpg', image_np) else: pass return '', 204 except Exception as e: print( "Please select another image because the current image gives an irregular array shape", e) return '', 301
def evaluate_detection_results_pascal_voc(result_lists, categories, label_id_offset=0, iou_thres=0.5, corloc_summary=False): """Computes Pascal VOC detection metrics given groundtruth and detections. This function computes Pascal VOC metrics. This function by default takes detections and groundtruth boxes encoded in result_lists and writes evaluation results to tf summaries which can be viewed on tensorboard. Args: result_lists: a dictionary holding lists of groundtruth and detection data corresponding to each image being evaluated. The following keys are required: 'image_id': a list of string ids 'detection_boxes': a list of float32 numpy arrays of shape [N, 4] 'detection_scores': a list of float32 numpy arrays of shape [N] 'detection_classes': a list of int32 numpy arrays of shape [N] 'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4] 'groundtruth_classes': a list of int32 numpy arrays of shape [M] and the remaining fields below are optional: 'difficult': a list of boolean arrays of shape [M] indicating the difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide this information and it is used to remove difficult examples from eval in order to not penalize the models on them. Note that it is okay to have additional fields in result_lists --- they are simply ignored. categories: a list of dictionaries representing all possible categories. Each dict in this list has the following keys: 'id': (required) an integer id uniquely identifying this category 'name': (required) string representing category name e.g., 'cat', 'dog', 'pizza' label_id_offset: an integer offset for the label space. iou_thres: float determining the IoU threshold at which a box is considered correct. Defaults to the standard 0.5. corloc_summary: boolean. If True, also outputs CorLoc metrics. Returns: A dictionary of metric names to scalar values. Raises: ValueError: if the set of keys in result_lists is not a superset of the expected list of keys. Unexpected keys are ignored. ValueError: if the lists in result_lists have inconsistent sizes. """ # check for expected keys in result_lists expected_keys = [ 'detection_boxes', 'detection_scores', 'detection_classes', 'image_id' ] expected_keys += ['groundtruth_boxes', 'groundtruth_classes'] if not set(expected_keys).issubset(set(result_lists.keys())): raise ValueError('result_lists does not have expected key set.') num_results = len(result_lists[expected_keys[0]]) for key in expected_keys: if len(result_lists[key]) != num_results: raise ValueError('Inconsistent list sizes in result_lists') # Pascal VOC evaluator assumes foreground index starts from zero. categories = copy.deepcopy(categories) for idx in range(len(categories)): categories[idx]['id'] -= label_id_offset # num_classes (maybe encoded as categories) num_classes = max([cat['id'] for cat in categories]) + 1 logging.info('Computing Pascal VOC metrics on results.') if all(image_id.isdigit() for image_id in result_lists['image_id']): image_ids = [int(image_id) for image_id in result_lists['image_id']] else: image_ids = range(num_results) evaluator = object_detection_evaluation.ObjectDetectionEvaluation( num_classes, matching_iou_threshold=iou_thres) difficult_lists = None if 'difficult' in result_lists and result_lists['difficult']: difficult_lists = result_lists['difficult'] for idx, image_id in enumerate(image_ids): difficult = None if difficult_lists is not None and difficult_lists[idx].size: difficult = difficult_lists[idx].astype(np.bool) evaluator.add_single_ground_truth_image_info( image_id, result_lists['groundtruth_boxes'][idx], result_lists['groundtruth_classes'][idx] - label_id_offset, difficult) evaluator.add_single_detected_image_info( image_id, result_lists['detection_boxes'][idx], result_lists['detection_scores'][idx], result_lists['detection_classes'][idx] - label_id_offset) per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = ( evaluator.evaluate()) metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap} category_index = label_map_util.create_category_index(categories) for idx in range(per_class_ap.size): if idx in category_index: display_name = ('PerformanceByCategory/mAP@{}IOU/{}' .format(iou_thres, category_index[idx]['name'])) metrics[display_name] = per_class_ap[idx] if corloc_summary: metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc for idx in range(per_class_corloc.size): if idx in category_index: display_name = ( 'PerformanceByCategory/CorLoc@{}IOU/{}'.format( iou_thres, category_index[idx]['name'])) metrics[display_name] = per_class_corloc[idx] return metrics
def main(): PATH_TO_LABELS = r'data/sim_udacity_label_map.pbtxt' NUM_CLASSES = 3 #frozen_model_file = "./models/freeze/frozen_inference_graph.pb" frozen_model_file = "./models/sim_freeze_tf1.3/frozen_inference_graph.pb" # test_img_dir = "/Users/donchan/Documents/UdaCity/MyProject/bstld/data/train/rgb/train/2015-10-05-11-26-32_bag/jpeg" #test_img_dir = "alex-lechner-udacity-traffic-light-dataset/udacity_testarea_rgb" test_img_dir = "dataset-sdcnd-capstone/data/sim_training_data/sim_data_capture" test_image = "left0546.jpg" image_path = os.path.join(test_img_dir, test_image) image = Image.open(image_path) image_np = load_image_into_numpy_array(image) label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) tfc = TrafficLightClassifier(frozen_model_file) boxes, scores, classes, num = tfc.get_classification(image_np) print("length of boxes", len(boxes)) print(scores) print(classes) print("predicted numbers ", num) print("categories", categories) print("category index", category_index) IMAGE_SIZE = (8, 6) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, max_boxes_to_draw=5, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.show() test_image = "left0030.jpg" #RED image_path = os.path.join(test_img_dir, test_image) image = Image.open(image_path) image_np = load_image_into_numpy_array(image) boxes, scores, classes, num = tfc.get_classification(image_np) print("length of boxes", np.squeeze(boxes)) print(scores) print(classes) print("predicted numbers ", num) print("categories", categories) print("category index", category_index) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, max_boxes_to_draw=5, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.show() test_image = "left0021.jpg" #YELLOW image_path = os.path.join(test_img_dir, test_image) image = Image.open(image_path) image_np = load_image_into_numpy_array(image) boxes, scores, classes, num = tfc.get_classification(image_np) print("length of boxes", np.squeeze(boxes)) print(scores) print(classes) print("predicted numbers ", num) print("categories", categories) print("category index", category_index) vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, max_boxes_to_draw=5, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.show()
def create_kitti_labels(output_path, label_map_path, calib_dir, image_dir, image_ground_dir, graph_dir, examples): grid_map_data_resolution = 0.15 grid_map_data_origin_idx = np.array([60, 30]) label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=6, use_display_name=True) category_index = label_map_util.create_category_index(categories) path_to_graph = graph_dir detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(path_to_graph, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name('image_tensor:0') detection_boxes = detection_graph.get_tensor_by_name( 'detection_boxes:0') detection_boxes_3d = detection_graph.get_tensor_by_name( 'detection_boxes_3d:0') detection_scores = detection_graph.get_tensor_by_name( 'detection_scores:0') detection_classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') for idx, example in enumerate(examples): label_calib_name = '%06d' % (int(example)) + '.txt' label_name = '%06d' % (idx) + '.txt' output_label = os.path.join(output_path, 'label', label_name) path_calib = os.path.join(calib_dir, label_calib_name) velo_to_cam = read_calib(path_calib, 6) P2 = read_calib(path_calib, 3) R0_rect = read_calib(path_calib, 5) trans_image = P2.dot(R0_rect) image_to_velo = np.array([[0, -1, grid_map_data_origin_idx[0]], [-1, 0, grid_map_data_origin_idx[1]], [0, 0, 1]]) image_name_hits = example + '_detections_cartesian.png' image_path_hits = os.path.join(image_dir, image_name_hits) image_name_obs = example + '_observations_cartesian.png' image_path_obs = os.path.join(image_dir, image_name_obs) image_name_int = example + '_intensity_cartesian.png' image_path_int = os.path.join(image_dir, image_name_int) image_name_zmin = example + '_z_min_detections_cartesian.png' image_path_zmin = os.path.join(image_dir, image_name_zmin) image_name_zmax = example + '_z_max_detections_cartesian.png' image_path_zmax = os.path.join(image_dir, image_name_zmax) image_name_prob = example + '_decay_rate_cartesian.png' image_path_prob = os.path.join(image_dir, image_name_prob) image_name_occ = example + '_z_max_occlusions_cartesian.png' image_path_occ = os.path.join(image_dir, image_name_occ) image_name_ground = example + '_ground_surface_cartesian.png' #image_path_ground = os.path.join(image_ground_dir, image_name_ground) image_path_ground = os.path.join(image_dir, image_name_ground) image_hits = cv2.imread(image_path_hits, 0) image_obs = cv2.imread(image_path_obs, 0) image_int = cv2.imread(image_path_int, 0) image_zmin = cv2.imread(image_path_zmin, 0) image_zmax = cv2.imread(image_path_zmax, 0) image_prob = cv2.imread(image_path_prob, 0) image_occ = cv2.imread(image_path_occ, 0) inv_image_hits = cv2.bitwise_not(image_hits) inv_image_obs = cv2.bitwise_not(image_obs) image_vis = np.stack( [inv_image_hits, inv_image_obs, inv_image_hits], axis=-1) #image_occlusion = cv2.imread(image_path_occlusion, 0) for x in range(0, image_vis.shape[0]): for y in range(0, image_vis.shape[1]): if image_vis[x, y, 0] < 255: image_vis[x, y, 0] = 0 image_vis[x, y, 1] = 0 image_vis[x, y, 2] = 0 elif image_vis[x, y, 1] < 255: value = 255 - image_vis[x, y, 1] value = value * 0.7 value = 255 - value value = 220 image_vis[x, y, 0] = value image_vis[x, y, 2] = value image_vis[x, y, 1] = value image_stacked = np.stack([ image_hits, image_occ, image_obs, image_int, image_zmin, image_zmax ], axis=-1) #image_stacked = np.stack([image_prob, image_int, image_zmin, image_zmax, # image_rgb[:,:,0], image_rgb[:,:,1], image_rgb[:,:,2]], axis=-1) image_ground = cv2.imread(image_path_ground, 0) image_np_expanded = np.expand_dims(image_stacked, axis=0) mask_np_expanded = np.expand_dims(image_hits, axis=0) mask_np_expanded = np.expand_dims(mask_np_expanded, axis=3) start_time = time.time() (boxes, boxes_3d, scores, classes, num) = sess.run([ detection_boxes, detection_boxes_3d, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}) print('Inference time:', time.time() - start_time) boxes_3d_np = np.squeeze(boxes_3d) boxes_np = np.squeeze(boxes) scores_np = np.squeeze(scores) classes_np = np.squeeze(classes) vis_util.visualize_boxes_and_labels_on_image_array( image_vis, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, boxes_3d=np.squeeze(boxes_3d), use_normalized_coordinates=True, line_thickness=2, max_boxes_to_draw=100, skip_labels=True) test_image_name = 'image' + str(idx) + '.png' test_image_path = os.path.join(output_path, test_image_name) cv2.imwrite(test_image_path, image_vis) file_output = open(output_label, 'w') for i in range(scores_np.shape[0]): if scores_np[i] > .3: object_class = category_index[int( classes_np[i])]['name'] box = tuple(boxes_np[i]) y_min = box[0] * image_stacked.shape[0] x_min = box[1] * image_stacked.shape[1] y_max = box[2] * image_stacked.shape[0] x_max = box[3] * image_stacked.shape[1] box_rot = tuple(boxes_3d_np[i]) x_c = box_rot[0] * image_stacked.shape[1] y_c = box_rot[1] * image_stacked.shape[0] w_s = box_rot[2] h_s = box_rot[3] sin_angle = box_rot[4] cos_angle = box_rot[5] angle_rad = math.atan2(sin_angle, cos_angle) / 2 vec_h_x = h_s * math.cos(angle_rad) / 2.0 vec_h_y = h_s * math.sin(angle_rad) / 2.0 vec_w_x = -w_s * math.sin(angle_rad) / 2.0 vec_w_y = w_s * math.cos(angle_rad) / 2.0 x1 = (x_c - vec_w_x - vec_h_x) * image_stacked.shape[1] x2 = (x_c - vec_w_x + vec_h_x) * image_stacked.shape[1] x3 = (x_c + vec_w_x + vec_h_x) * image_stacked.shape[1] y1 = (y_c - vec_w_y - vec_h_y) * image_stacked.shape[0] y2 = (y_c - vec_w_y + vec_h_y) * image_stacked.shape[0] y3 = (y_c + vec_w_y + vec_h_y) * image_stacked.shape[0] l = math.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) * (y2 - y1)) w = math.sqrt((x3 - x2) * (x3 - x2) + (y3 - y2) * (y3 - y2)) image_ground_box = image_ground[ int(round(y_min)):int(round(y_max)), int(round(x_min)):int(round(x_max))] mean_ground = image_ground_box.mean() image_height_max_box = image_zmax[ int(round(y_min)):int(round(y_max)), int(round(x_min)):int(round(x_max))] height_max = image_height_max_box.max() height_max_m = 2.6 * height_max / 255 - 2.2 mean_ground_m = 2.6 * mean_ground / 255 - 2.2 object_length_rot = l * grid_map_data_resolution object_width_rot = w * grid_map_data_resolution object_height = height_max_m - mean_ground_m if object_class == 'Car': if object_height < 1.3 or object_height > 1.9: object_height = 1.56 object_t_rot = np.array([ x_c * grid_map_data_resolution, y_c * grid_map_data_resolution ]) object_t_velo_rot = image_to_velo.dot( np.append(object_t_rot, 1)) object_t_velo_rot[2] = mean_ground_m object_t_cam_rot = velo_to_cam.dot( np.append(object_t_velo_rot, 1)) object_corners_rot = np.array( [[ object_length_rot / 2, object_length_rot / 2, -object_length_rot / 2, -object_length_rot / 2, object_length_rot / 2, object_length_rot / 2, -object_length_rot / 2, -object_length_rot / 2 ], [ 0, 0, 0, 0, -object_height, -object_height, -object_height, -object_height ], [ object_width_rot / 2, -object_width_rot / 2, -object_width_rot / 2, object_width_rot / 2, object_width_rot / 2, -object_width_rot / 2, -object_width_rot / 2, object_width_rot / 2 ], [1, 1, 1, 1, 1, 1, 1, 1]]) corners_to_cam = np.array([[ math.cos(angle_rad), 0, math.sin(angle_rad), object_t_cam_rot[0] ], [0, 1, 0, object_t_cam_rot[1]], [ -math.sin(angle_rad), 0, math.cos(angle_rad), object_t_cam_rot[2] ], [0, 0, 0, 1]]) object_corners_cam = corners_to_cam.dot( object_corners_rot) object_corners_image = trans_image.dot( object_corners_cam) object_corners_image_2d = np.array([ object_corners_image[0] / object_corners_image[2], object_corners_image[1] / object_corners_image[2] ]) write_labels(file_output, object_class, object_corners_image_2d, object_height, object_width_rot, object_length_rot, object_t_cam_rot, angle_rad, scores_np[i])
def load_label_map(pbtxt_path): label_map = label_map_util.load_labelmap(pbtxt_path) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=6, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def main(): parser = argparse.ArgumentParser( description="run inference by using specified model", formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('model_name', help="specify the model name") parser.add_argument('work_dir', help="specify the work space directory") parser.add_argument('--model_dir', default=None, help="specify the dir storing models.") args = parser.parse_args() model_dir = args.model_dir if model_dir is None: assert os.getenv('MODEL_INPUT_DIR') is not None model_dir = os.path.join(os.getenv('MODEL_INPUT_DIR'), 'object_detection') model_name = args.model_name model_file = model_name + '.tar.gz' tar_file = tarfile.open(os.path.join(model_dir, model_file)) recorded_name = model_name for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: recorded_name = file.name tar_file.extract(file, args.work_dir) PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') PATH_TO_CKPT = os.path.join(args.work_dir, recorded_name) NUM_CLASSES = 90 detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name=model_name) label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) PATH_TO_TEST_IMAGES_DIR = 'test_images' TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 2) ] with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: image_tensor = detection_graph.get_tensor_by_name( '{}/image_tensor:0'.format(model_name)) detection_boxes = detection_graph.get_tensor_by_name( '{}/detection_boxes:0'.format(model_name)) detection_scores = detection_graph.get_tensor_by_name( '{}/detection_scores:0'.format(model_name)) detection_classes = detection_graph.get_tensor_by_name( '{}/detection_classes:0'.format(model_name)) num_detections = detection_graph.get_tensor_by_name( '{}/num_detections:0'.format(model_name)) for image_path in TEST_IMAGE_PATHS: image = Image.open(image_path) image_np = load_image_into_numpy_array(image) image_np_expanded = np.expand_dims(image_np, axis=0) options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() results = sess.run([ detection_boxes, detection_scores, detection_classes, num_detections ], feed_dict={image_tensor: image_np_expanded}, options=options, run_metadata=run_metadata) cg = CompGraph(model_name, run_metadata, detection_graph, keyword_filter="while") cg_tensor_dict = cg.get_tensors() cg_sorted_keys = sorted(cg_tensor_dict.keys()) #cg_sorted_shape = [] #for cg_key in cg_sorted_keys: # print(cg_key) # t = tf.shape(cg_tensor_dict[cg_key]) # cg_sorted_shape.append(t.eval(feed_dict={image_tensor: image_np_expanded}, # session=sess)) cg_sorted_items = [] for cg_key in cg_sorted_keys: cg_sorted_items.append(tf.shape(cg_tensor_dict[cg_key])) cg_sorted_shape = sess.run( cg_sorted_items, feed_dict={image_tensor: image_np_expanded}) cg.op_analysis(dict(zip(cg_sorted_keys, cg_sorted_shape)), '{}.pickle'.format(model_name)) print('Image: {}, number of detected: {}'.format( image_path, len(results[3])))
def find_labels(image_path, image_name, stub, request, model, n): """ Args: image_path: path dell'immagine in input image_name: nome dell'immagine ottenuto con la funzione time di python stub: viene utilizzato per la comunicazione client-server request: richiesta da inviare al server model: nome del modello di object detection, puo' essere pet model o people model n: numero massimo delle labels che si vogliono considerare """ labels = [] # vettore con le labels del dataset specifico bbx = [] # vettore con le coordinate dei bounding box trovati request.model_spec.name = model result = stub.Predict( request, 10.0) # risultati della richiesta di prediction, 10 secs timeout classes = result.outputs[ 'detection_classes'].float_val # id delle classi trovate, in ordine dalla classe con score piu' alto scores = result.outputs[ 'detection_scores'].float_val # score delle classi,dallo score piu' alto #print zip(classes, scores) boxes = result.outputs[ 'detection_boxes'].float_val # posizione dei bounding box # trasformo il vettore in modo che ogni elemento sia una quadrupla che identifica il bounding box boxes = np.reshape(boxes, [100, 4]) # per salvare l'immagine con i bounding box, dobbiamo aprire l'immagine e sfruttare la libreria vis_util di tensorflow im = misc.imread( image_path) # legge l'immagine come un array multidimensionale if (model == "pets_model"): label_map_path = "Label_maps/pets_label_map.pbtxt" # mappa delle label label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map=label_map, max_num_classes=37) else: label_map_path = "Label_maps/people_label_map.pbtxt" label_map = label_map_util.load_labelmap(label_map_path) categories = label_map_util.convert_label_map_to_categories( label_map=label_map, max_num_classes=2) category_index = label_map_util.create_category_index( categories) # dizionario coppie chiave ("id"), valore ("nome classe") # viene creato un array (img_height, img_width, 3) con i bounding box sovrapposti image_vis = vis_util.visualize_boxes_and_labels_on_image_array( im, boxes, np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, max_boxes_to_draw=10, # num max di bounding box da visualizzare min_score_thresh=.6, # soglia minima dei bounding box da visualizzare use_normalized_coordinates=True, line_thickness=5) # larghezza linea del contorno dei box sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) port_result = sock.connect_ex(('localhost', 50070)) client_hdfs = InsecureClient( 'http://localhost:50070') # client per accedere al HDFS if (model == "pets_model"): misc.imsave("Images_bbx/{}_pets.jpg".format(image_name), image_vis) # salva l'array in locale come un'immagine JPEG if port_result == 0: # se l'HDFS e' connesso, vi sposto l'immagine client_hdfs.upload( '/zora-object-detection/images/{}_pets.jpg'.format(image_name), 'Images_bbx/{}_pets.jpg'.format(image_name)) os.remove("Images_bbx/{}_pets.jpg".format(image_name)) else: misc.imsave("Images_bbx/{}_people.jpg".format(image_name), image_vis) if port_result == 0: client_hdfs.upload( '/zora-object-detection/images/{}_people.jpg'.format( image_name), 'Images_bbx/{}_people.jpg'.format(image_name)) os.remove("Images_bbx/{}_people.jpg".format(image_name)) # inseriamo le labels trovate nella detection in un vettore da passare allo script obj_detection per formare la stringa # da far pronunciare al robot. Le coordinate del bounding box invece verranno salvate nel file log dell'HDFS. boxes = boxes.tolist() # trasforma l'array multidimensionale in una lista for i in range(0, n): # considero solo le labels con uno score >= 0.6 ed escludo quelle che identificano un bounding box gia' inserito # con uno score piu' alto if (scores[i] >= 0.6 and boxes[i] not in bbx): bbx.append(boxes[i]) labels.append(str(category_index[int(classes[i])]['name'])) return labels, bbx
def setup_platform(hass, config, add_entities, discovery_info=None): """Set up the TensorFlow image processing platform.""" model_config = config.get(CONF_MODEL) model_dir = model_config.get(CONF_MODEL_DIR) \ or hass.config.path('tensorflow') labels = model_config.get(CONF_LABELS) \ or hass.config.path('tensorflow', 'object_detection', 'data', 'mscoco_label_map.pbtxt') # Make sure locations exist if not os.path.isdir(model_dir) or not os.path.exists(labels): _LOGGER.error("Unable to locate tensorflow models or label map") return # append custom model path to sys.path sys.path.append(model_dir) try: # Verify that the TensorFlow Object Detection API is pre-installed # pylint: disable=unused-import,unused-variable os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' import tensorflow as tf # noqa from object_detection.utils import label_map_util # noqa except ImportError: # pylint: disable=line-too-long _LOGGER.error( "No TensorFlow Object Detection library found! Install or compile " "for your system following instructions here: " "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa return try: # Display warning that PIL will be used if no OpenCV is found. # pylint: disable=unused-import,unused-variable import cv2 # noqa except ImportError: _LOGGER.warning( "No OpenCV library found. TensorFlow will process image with " "PIL at reduced resolution") # setup tensorflow graph, session, and label map to pass to processor # pylint: disable=no-member detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') session = tf.Session(graph=detection_graph) label_map = label_map_util.load_labelmap(labels) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=90, use_display_name=True) category_index = label_map_util.create_category_index(categories) entities = [] for camera in config[CONF_SOURCE]: entities.append(TensorFlowImageProcessor( hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME), session, detection_graph, category_index, config)) add_entities(entities)
class GymObjectDetector(object): videoPath = "/home/eamonn/FYP/Videos/" \ "videoplayback" rotater = IR(videoPath) print("Rotation: " + IR.detectRotation()) cap = cv2.VideoCapture(videoPath) gymObjects = { 'Gym_Plate': { 'Location': '', 'Frame': 0 }, 'FootWear': { 'Location': [], 'Frame': 0 } } MODEL_PATH = '/home/eamonn/FYP/models/research/object_detection/' MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017' MODEL_FILE = MODEL_PATH + MODEL_NAME + '.tar.gz' PATH_TO_CKPT = MODEL_PATH + 'gym_plate_inference_graph' + '/frozen_inference_graph.pb' PATH_TO_LABELS = os.path.join(MODEL_PATH, 'training/object-detection.pbtxt') NUM_CLASSES = 4 tar_file = tarfile.open(MODEL_FILE) for file in tar_file.getmembers(): file_name = os.path.basename(file.name) if 'frozen_inference_graph.pb' in file_name: tar_file.extract(file, os.getcwd()) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.\ convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: plate_detected = False while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) if np.squeeze(scores)[0] > 0.5: gymObjects['Gym_Plate']['Location'] = ( np.squeeze(boxes)[0]) gymObjects['Gym_Plate']['Frame'] = (cap.get( cv2.CAP_PROP_POS_FRAMES)) cv2.destroyAllWindows() break PATH_TO_CKPT = MODEL_PATH + 'FDG_inference_graph' + '/frozen_inference_graph.pb' detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') with detection_graph.as_default(): with tf.Session(graph=detection_graph) as sess: plate_detected = False while True: ret, image_np = cap.read() # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) image_tensor = detection_graph.get_tensor_by_name( 'image_tensor:0') # Each box represents a part of the image where a particular object was detected. boxes = detection_graph.get_tensor_by_name('detection_boxes:0') # Each score represent how level of confidence for each of the objects. # Score is shown on the result image, together with the class label. scores = detection_graph.get_tensor_by_name( 'detection_scores:0') classes = detection_graph.get_tensor_by_name( 'detection_classes:0') num_detections = detection_graph.get_tensor_by_name( 'num_detections:0') # Actual detection. (boxes, scores, classes, num_detections) = sess.run( [boxes, scores, classes, num_detections], feed_dict={image_tensor: image_np_expanded}) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, np.squeeze(boxes), np.squeeze(classes).astype(np.int32), np.squeeze(scores), category_index, use_normalized_coordinates=True, line_thickness=8) if np.squeeze(scores)[0] > 0.5 and np.squeeze(classes).astype( np.int32)[0] == 4: gymObjects['FootWear']['Location'] = (np.squeeze(boxes)[0]) gymObjects['FootWear']['Frame'] = (cap.get( cv2.CAP_PROP_POS_FRAMES)) break for object in ['Gym_Plate', 'FootWear']: normalisedCoordinates = gymObjects[object]['Location'] if not rotater.toBeRotated: width = cap.get(cv2.CAP_PROP_FRAME_WIDTH) # float height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) else: width = cap.get(cv2.CAP_PROP_FRAME_HEIGHT) # float height = cap.get(cv2.CAP_PROP_FRAME_WIDTH) normalisedCoordinates[0] = normalisedCoordinates[0] * height normalisedCoordinates[1] = normalisedCoordinates[1] * width normalisedCoordinates[2] = normalisedCoordinates[2] * height normalisedCoordinates[3] = normalisedCoordinates[3] * width gymObjects[object]['Location'] = normalisedCoordinates
def _load_label_map(self): label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) return category_index
def main(): camera_left = cv2.VideoCapture(0) print(camera_left.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH)) print(camera_left.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT)) if tf.__version__ < '1.4.0': raise ImportError( 'Please upgrade your tensorflow installation to v1.4.* or later!') # This is needed to display the images. #% matplotlib #inline # This is needed since the notebook is stored in the object_detection folder. sys.path.append("..") DATASET_PATH = '/Users/marco/Documents/Datasets/drAIver/object_detector/' # What model to download. MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17' MODEL_FILE = MODEL_NAME + '.tar.gz' DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/' # Path to frozen detection graph. This is the actual model that is used for the object detection. PATH_TO_CKPT = DATASET_PATH + MODEL_NAME + '/frozen_inference_graph.pb' # List of the strings that is used to add correct label for each box. PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt') NUM_CLASSES = 90 #TODO fix download path # opener = urllib.request.URLopener() # opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE) # tar_file = tarfile.open(MODEL_FILE) # for file in tar_file.getmembers(): # file_name = os.path.basename(file.name) # if 'frozen_inference_graph.pb' in file_name: # tar_file.extract(file, os.getcwd()) detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) # For the sake of simplicity we will use only 2 images: # image1.jpg # image2.jpg # If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS. PATH_TO_TEST_IMAGES_DIR = DATASET_PATH + 'test_images' TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(1, 3) ] # Size, in inches, of the output images. IMAGE_SIZE = (12, 8) while True: if camera_left.isOpened(): _, image_np = camera_left.read() #cv2.imshow("camera_left", frame_left) #print(frame_left.shape)+ # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image( image_np, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) cv2.imshow("obj_detection", image_np) cv2.waitKey(1)
def main(): print("Creating eval directory") os.makedirs(OUT_PATH_EVAL_IMAGES, exist_ok=True) # load frozen graph in memory detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') # load label map label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) ordered_test_set = [ i for i in sorted( filter(lambda a: a.endswith(".jpeg"), os.listdir(PATH_TO_TEST_IMAGES_DIR))) ] print("We are going to run the inference for {} images".format( len(ordered_test_set))) for i in range(len(ordered_test_set)): print("running {}: {}".format(i, ordered_test_set[i])) im_current_path = os.path.join(PATH_TO_TEST_IMAGES_DIR, ordered_test_set[i]) im_prev_path = im_current_path if i == 0 else os.path.join( PATH_TO_TEST_IMAGES_DIR, ordered_test_set[i - 1]) current_frame = skimage.io.imread(im_current_path) prev_frame = skimage.io.imread(im_prev_path) image_s = cv2.subtract(current_frame, prev_frame) image_s = cv2.cvtColor(image_s, cv2.COLOR_BGR2GRAY) image_s = np.expand_dims(image_s, axis=2) four_channels_im = np.concatenate((current_frame, image_s, image_s), axis=2) # Image.fromarray(image_s).save("tmp.jpeg") out_debug_image_path = os.path.join(OUT_PATH_EVAL_IMAGES, os.path.basename(im_current_path)) # if os.path.isfile(out_debug_image_path): # continue # image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. # image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_s, axis=0) # Actual detection. output_dict = run_inference_for_single_image(four_channels_im, detection_graph) # Visualization of the results of a detection. # draw only poles vis_util.visualize_boxes_and_labels_on_image_array( current_frame, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=4) plt.figure(figsize=IMAGE_SIZE) plt.imshow(current_frame) # draw_court_lines_from_detections(image_np, output_dict['detection_boxes'], # output_dict['detection_classes'], # output_dict['detection_scores']) Image.fromarray(current_frame).save(out_debug_image_path)
def run_inference(image_path, output_filename): PATH_TO_CKPT = Config.PATH_FROZEN_INFERENCE_GRAPH PATH_TO_LABELS = 'label_map.pbtxt' NUM_CLASSES = 1 ## Load a (frozen) Tensorflow model into memory. detection_graph = tf.Graph() with detection_graph.as_default(): od_graph_def = tf.GraphDef() with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: serialized_graph = fid.read() od_graph_def.ParseFromString(serialized_graph) tf.import_graph_def(od_graph_def, name='') label_map = label_map_util.load_labelmap(PATH_TO_LABELS) categories = label_map_util.convert_label_map_to_categories( label_map, max_num_classes=NUM_CLASSES, use_display_name=True) category_index = label_map_util.create_category_index(categories) def load_image_into_numpy_array(image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # add path to the images to the TEST_IMAGE_PATHS. PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images' TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(10, 11) ] # Size, in inches, of the output images. IMAGE_SIZE = (10, 8) def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze( tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze( tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast( tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict[ 'detection_boxes'][0] output_dict['detection_scores'] = output_dict[ 'detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] return output_dict image = Image.open(image_path) # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Actual detection. output_dict = run_inference_for_single_image(image_np, detection_graph) # Visualization of the results of a detection. vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) #print(output_dict['detection_scores']) final_score = np.squeeze(output_dict['detection_scores']) count = 0 for i in range(100): if final_score[i] > 0.5: count = count + 1 print "coconut count = %d" % count plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) #print(image.filename) plt.savefig('static/out/' + output_filename, bbox_inches='tight') output_dict = None return count