def __init__(self):
        logger.info('Loading Tensorflow Detection API')

        weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL,
                                cache_dir=os.path.abspath(config.WEIGHT_PATH),
                                cache_subdir='models')

        extract_path = weights_path.replace('.tar.gz', '')
        if not os.path.exists(extract_path):
            tar = tarfile.open(weights_path, "r:gz")
            tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models'))
            tar.close()
        pb_path = os.path.join(extract_path, self.PB_NAME)

        self.graph = tf.Graph()
        with self.graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(pb_path, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        self.categories = label_map_util.convert_label_map_to_categories(self.label_map,
                                                                         max_num_classes=self.NUM_CLASSES,
                                                                         use_display_name=True)
        self.category_index = label_map_util.create_category_index(self.categories)
  def _build_metric_names(self):
    """Builds a list with metric names."""

    self._metric_names = [
        self._metric_prefix + 'Precision/mAP@{}IOU'.format(
            self._matching_iou_threshold)
    ]
    if self._evaluate_corlocs:
      self._metric_names.append(
          self._metric_prefix +
          'Precision/meanCorLoc@{}IOU'.format(self._matching_iou_threshold))

    category_index = label_map_util.create_category_index(self._categories)
    for idx in range(self._num_classes):
      if idx + self._label_id_offset in category_index:
        category_name = category_index[idx + self._label_id_offset]['name']
        try:
          category_name = unicode(category_name, 'utf-8')
        except TypeError:
          pass
        category_name = unicodedata.normalize('NFKD', category_name).encode(
            'ascii', 'ignore')
        self._metric_names.append(
            self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
                self._matching_iou_threshold, category_name))
        if self._evaluate_corlocs:
          self._metric_names.append(
              self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'
              .format(self._matching_iou_threshold, category_name))
  def evaluate(self):
    """Compute evaluation result.

    Returns:
      A dictionary of metrics with the following fields -

      1. summary_metrics:
        '<prefix if not empty>_Precision/mAP@<matching_iou_threshold>IOU': mean
        average precision at the specified IOU threshold.

      2. per_category_ap: category specific results with keys of the form
        '<prefix if not empty>_PerformanceByCategory/
        mAP@<matching_iou_threshold>IOU/category'.
    """
    (per_class_ap, mean_ap, per_class_precision, per_class_recall,
     per_class_corloc, mean_corloc) = (
         self._evaluation.evaluate())
    pascal_metrics = {self._metric_names[0]: mean_ap}
    if self._evaluate_corlocs:
      pascal_metrics[self._metric_names[1]] = mean_corloc
    category_index = label_map_util.create_category_index(self._categories)
    for idx in range(per_class_ap.size):
      if idx + self._label_id_offset in category_index:
        category_name = category_index[idx + self._label_id_offset]['name']
        try:
          category_name = unicode(category_name, 'utf-8')
        except TypeError:
          pass
        category_name = unicodedata.normalize(
            'NFKD', category_name).encode('ascii', 'ignore')
        display_name = (
            self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
                self._matching_iou_threshold, category_name))
        pascal_metrics[display_name] = per_class_ap[idx]

        # Optionally add precision and recall values
        if self._evaluate_precision_recall:
          display_name = (
              self._metric_prefix +
              'PerformanceByCategory/Precision@{}IOU/{}'.format(
                  self._matching_iou_threshold, category_name))
          pascal_metrics[display_name] = per_class_precision[idx]
          display_name = (
              self._metric_prefix +
              'PerformanceByCategory/Recall@{}IOU/{}'.format(
                  self._matching_iou_threshold, category_name))
          pascal_metrics[display_name] = per_class_recall[idx]

        # Optionally add CorLoc metrics.classes
        if self._evaluate_corlocs:
          display_name = (
              self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'
              .format(self._matching_iou_threshold, category_name))
          pascal_metrics[display_name] = per_class_corloc[idx]

    return pascal_metrics
 def test_create_category_index(self):
   categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}]
   category_index = label_map_util.create_category_index(categories)
   self.assertDictEqual({
       1: {
           'name': u'1',
           'id': 1
       },
       2: {
           'name': u'2',
           'id': 2
       }
   }, category_index)
def _create_tf_record_from_coco_annotations(
    annotations_file, image_dir, output_path, include_masks, num_shards):
  """Loads COCO annotation json files and converts to tf.Record format.

  Args:
    annotations_file: JSON file containing bounding box annotations.
    image_dir: Directory containing the image files.
    output_path: Path to output tf.Record file.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    num_shards: number of output file shards.
  """
  with contextlib2.ExitStack() as tf_record_close_stack, \
      tf.gfile.GFile(annotations_file, 'r') as fid:
    output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
        tf_record_close_stack, output_path, num_shards)
    groundtruth_data = json.load(fid)
    images = groundtruth_data['images']
    category_index = label_map_util.create_category_index(
        groundtruth_data['categories'])

    annotations_index = {}
    if 'annotations' in groundtruth_data:
      tf.logging.info(
          'Found groundtruth annotations. Building annotations index.')
      for annotation in groundtruth_data['annotations']:
        image_id = annotation['image_id']
        if image_id not in annotations_index:
          annotations_index[image_id] = []
        annotations_index[image_id].append(annotation)
    missing_annotation_count = 0
    for image in images:
      image_id = image['id']
      if image_id not in annotations_index:
        missing_annotation_count += 1
        annotations_index[image_id] = []
    tf.logging.info('%d images are missing annotations.',
                    missing_annotation_count)

    total_num_annotations_skipped = 0
    for idx, image in enumerate(images):
      if idx % 100 == 0:
        tf.logging.info('On image %d of %d', idx, len(images))
      annotations_list = annotations_index[image['id']]
      _, tf_example, num_annotations_skipped = create_tf_example(
          image, annotations_list, image_dir, category_index, include_masks)
      total_num_annotations_skipped += num_annotations_skipped
      shard_idx = idx % num_shards
      output_tfrecords[shard_idx].write(tf_example.SerializeToString())
    tf.logging.info('Finished writing, skipped %d annotations.',
                    total_num_annotations_skipped)
  def __init__(self):
    self.detection_graph = tf.Graph()
    with self.detection_graph.as_default():
      od_graph_def = tf.GraphDef()
      with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    with self.detection_graph.as_default():
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    self.tensor_dict = tensor_dict
    self.image_tensor = image_tensor
    self.label_map = label_map
    self.category_index = category_index
    self.session = tf.Session(graph=self.detection_graph)
  def evaluate(self):
    """Compute evaluation result.

    Returns:
      A dictionary of metrics with the following fields -

      1. summary_metrics:
        'Precision/mAP@<matching_iou_threshold>IOU': mean average precision at
        the specified IOU threshold.

      2. per_category_ap: category specific results with keys of the form
        'PerformanceByCategory/mAP@<matching_iou_threshold>IOU/category'.
    """
    (per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc) = (
        self._evaluation.evaluate())
    pascal_metrics = {
        self._metric_prefix +
        'Precision/mAP@{}IOU'.format(self._matching_iou_threshold):
            mean_ap
    }
    if self._evaluate_corlocs:
      pascal_metrics[self._metric_prefix + 'Precision/meanCorLoc@{}IOU'.format(
          self._matching_iou_threshold)] = mean_corloc
    category_index = label_map_util.create_category_index(self._categories)
    for idx in range(per_class_ap.size):
      if idx + self._label_id_offset in category_index:
        display_name = (
            self._metric_prefix + 'PerformanceByCategory/AP@{}IOU/{}'.format(
                self._matching_iou_threshold,
                category_index[idx + self._label_id_offset]['name']))
        pascal_metrics[display_name] = per_class_ap[idx]

        # Optionally add CorLoc metrics.classes
        if self._evaluate_corlocs:
          display_name = (
              self._metric_prefix + 'PerformanceByCategory/CorLoc@{}IOU/{}'
              .format(self._matching_iou_threshold,
                      category_index[idx + self._label_id_offset]['name']))
          pascal_metrics[display_name] = per_class_corloc[idx]

    return pascal_metrics
CWD_PATH = os.getcwd()

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)


def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
    scores = detection_graph.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')
def evaluate_detection_results_pascal_voc(result_lists,
                                          categories,
                                          label_id_offset=0,
                                          iou_thres=0.5,
                                          corloc_summary=False):
    """Computes Pascal VOC detection metrics given groundtruth and detections.

  This function computes Pascal VOC metrics. This function by default
  takes detections and groundtruth boxes encoded in result_lists and writes
  evaluation results to tf summaries which can be viewed on tensorboard.

  Args:
    result_lists: a dictionary holding lists of groundtruth and detection
      data corresponding to each image being evaluated.  The following keys
      are required:
        'image_id': a list of string ids
        'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
        'detection_scores': a list of float32 numpy arrays of shape [N]
        'detection_classes': a list of int32 numpy arrays of shape [N]
        'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
        'groundtruth_classes': a list of int32 numpy arrays of shape [M]
      and the remaining fields below are optional:
        'difficult': a list of boolean arrays of shape [M] indicating the
          difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
          this information and it is used to remove difficult examples from eval
          in order to not penalize the models on them.
      Note that it is okay to have additional fields in result_lists --- they
      are simply ignored.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
    label_id_offset: an integer offset for the label space.
    iou_thres: float determining the IoU threshold at which a box is considered
        correct. Defaults to the standard 0.5.
    corloc_summary: boolean. If True, also outputs CorLoc metrics.

  Returns:
    A dictionary of metric names to scalar values.

  Raises:
    ValueError: if the set of keys in result_lists is not a superset of the
      expected list of keys.  Unexpected keys are ignored.
    ValueError: if the lists in result_lists have inconsistent sizes.
  """
    # check for expected keys in result_lists
    expected_keys = [
        'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
    ]
    expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
    if not set(expected_keys).issubset(set(result_lists.keys())):
        raise ValueError('result_lists does not have expected key set.')
    num_results = len(result_lists[expected_keys[0]])
    for key in expected_keys:
        if len(result_lists[key]) != num_results:
            raise ValueError('Inconsistent list sizes in result_lists')

    # Pascal VOC evaluator assumes foreground index starts from zero.
    categories = copy.deepcopy(categories)
    for idx in range(len(categories)):
        categories[idx]['id'] -= label_id_offset

    # num_classes (maybe encoded as categories)
    num_classes = max([cat['id'] for cat in categories]) + 1
    logging.info('Computing Pascal VOC metrics on results.')
    if all(image_id.isdigit() for image_id in result_lists['image_id']):
        image_ids = [int(image_id) for image_id in result_lists['image_id']]
    else:
        image_ids = range(num_results)

    evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
        num_classes, matching_iou_threshold=iou_thres)

    difficult_lists = None
    if 'difficult' in result_lists and result_lists['difficult']:
        difficult_lists = result_lists['difficult']
    for idx, image_id in enumerate(image_ids):
        difficult = None
        if difficult_lists is not None and difficult_lists[idx].size:
            difficult = difficult_lists[idx].astype(np.bool)
        evaluator.add_single_ground_truth_image_info(
            image_id, result_lists['groundtruth_boxes'][idx],
            result_lists['groundtruth_classes'][idx] - label_id_offset,
            difficult)
        evaluator.add_single_detected_image_info(
            image_id, result_lists['detection_boxes'][idx],
            result_lists['detection_scores'][idx],
            result_lists['detection_classes'][idx] - label_id_offset)
    per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
        evaluator.evaluate())

    metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
    category_index = label_map_util.create_category_index(categories)
    for idx in range(per_class_ap.size):
        if idx in category_index:
            display_name = ('PerformanceByCategory/mAP@{}IOU/{}'.format(
                iou_thres, category_index[idx]['name']))
            metrics[display_name] = per_class_ap[idx]

    if corloc_summary:
        metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
        for idx in range(per_class_corloc.size):
            if idx in category_index:
                display_name = ('PerformanceByCategory/CorLoc@{}IOU/{}'.format(
                    iou_thres, category_index[idx]['name']))
                metrics[display_name] = per_class_corloc[idx]
    return metrics
Exemple #10
0
def visualize_detection_results(result_dict,
                                tag,
                                global_step,
                                categories,
                                summary_dir='',
                                export_dir='',
                                agnostic_mode=False,
                                show_groundtruth=False,
                                groundtruth_box_visualization_color='black',
                                min_score_thresh=.5,
                                max_num_predictions=20,
                                skip_scores=False,
                                skip_labels=False,
                                keep_image_id_for_visualization_export=False):
  """Visualizes detection results and writes visualizations to image summaries.

  This function visualizes an image with its detected bounding boxes and writes
  to image summaries which can be viewed on tensorboard.  It optionally also
  writes images to a directory. In the case of missing entry in the label map,
  unknown class name in the visualization is shown as "N/A".

  Args:
    result_dict: a dictionary holding groundtruth and detection
      data corresponding to each image being evaluated.  The following keys
      are required:
        'original_image': a numpy array representing the image with shape
          [1, height, width, 3] or [1, height, width, 1]
        'detection_boxes': a numpy array of shape [N, 4]
        'detection_scores': a numpy array of shape [N]
        'detection_classes': a numpy array of shape [N]
      The following keys are optional:
        'groundtruth_boxes': a numpy array of shape [N, 4]
        'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
      Detections are assumed to be provided in decreasing order of score and for
      display, and we assume that scores are probabilities between 0 and 1.
    tag: tensorboard tag (string) to associate with image.
    global_step: global step at which the visualization are generated.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
          'supercategory': (optional) string representing the supercategory
            e.g., 'animal', 'vehicle', 'food', etc
    summary_dir: the output directory to which the image summaries are written.
    export_dir: the output directory to which images are written.  If this is
      empty (default), then images are not exported.
    agnostic_mode: boolean (default: False) controlling whether to evaluate in
      class-agnostic mode or not.
    show_groundtruth: boolean (default: False) controlling whether to show
      groundtruth boxes in addition to detected boxes
    groundtruth_box_visualization_color: box color for visualizing groundtruth
      boxes
    min_score_thresh: minimum score threshold for a box to be visualized
    max_num_predictions: maximum number of detections to visualize
    skip_scores: whether to skip score when drawing a single detection
    skip_labels: whether to skip label when drawing a single detection
    keep_image_id_for_visualization_export: whether to keep image identifier in
      filename when exported to export_dir
  Raises:
    ValueError: if result_dict does not contain the expected keys (i.e.,
      'original_image', 'detection_boxes', 'detection_scores',
      'detection_classes')
  """
  detection_fields = fields.DetectionResultFields
  input_fields = fields.InputDataFields
  if not set([
      input_fields.original_image,
      detection_fields.detection_boxes,
      detection_fields.detection_scores,
      detection_fields.detection_classes,
  ]).issubset(set(result_dict.keys())):
    raise ValueError('result_dict does not contain all expected keys.')
  if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
    raise ValueError('If show_groundtruth is enabled, result_dict must contain '
                     'groundtruth_boxes.')
  logging.info('Creating detection visualizations.')
  category_index = label_map_util.create_category_index(categories)

  image = np.squeeze(result_dict[input_fields.original_image], axis=0)
  if image.shape[2] == 1:  # If one channel image, repeat in RGB.
    image = np.tile(image, [1, 1, 3])
  detection_boxes = result_dict[detection_fields.detection_boxes]
  detection_scores = result_dict[detection_fields.detection_scores]
  detection_classes = np.int32((result_dict[
      detection_fields.detection_classes]))
  detection_keypoints = result_dict.get(detection_fields.detection_keypoints)
  detection_masks = result_dict.get(detection_fields.detection_masks)
  detection_boundaries = result_dict.get(detection_fields.detection_boundaries)

  # Plot groundtruth underneath detections
  if show_groundtruth:
    groundtruth_boxes = result_dict[input_fields.groundtruth_boxes]
    groundtruth_keypoints = result_dict.get(input_fields.groundtruth_keypoints)
    vis_utils.visualize_boxes_and_labels_on_image_array(
        image=image,
        boxes=groundtruth_boxes,
        classes=None,
        scores=None,
        category_index=category_index,
        keypoints=groundtruth_keypoints,
        use_normalized_coordinates=False,
        max_boxes_to_draw=None,
        groundtruth_box_visualization_color=groundtruth_box_visualization_color)
  vis_utils.visualize_boxes_and_labels_on_image_array(
      image,
      detection_boxes,
      detection_classes,
      detection_scores,
      category_index,
      instance_masks=detection_masks,
      instance_boundaries=detection_boundaries,
      keypoints=detection_keypoints,
      use_normalized_coordinates=False,
      max_boxes_to_draw=max_num_predictions,
      min_score_thresh=min_score_thresh,
      agnostic_mode=agnostic_mode,
      skip_scores=skip_scores,
      skip_labels=skip_labels)

  if export_dir:
    if keep_image_id_for_visualization_export and result_dict[fields.
                                                              InputDataFields()
                                                              .key]:
      export_path = os.path.join(export_dir, 'export-{}-{}.png'.format(
          tag, result_dict[fields.InputDataFields().key]))
    else:
      export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
    vis_utils.save_image_array_as_png(image, export_path)

  summary = tf.Summary(value=[
      tf.Summary.Value(
          tag=tag,
          image=tf.Summary.Image(
              encoded_image_string=vis_utils.encode_image_array_as_png_str(
                  image)))
  ])
  summary_writer = tf.summary.FileWriterCache.get(summary_dir)
  summary_writer.add_summary(summary, global_step)

  logging.info('Detection visualizations written to summary with tag %s.', tag)
Exemple #11
0
reference_pts = np.float32([
    list(reference_tags[0]['center']),
    list(reference_tags[1]['center']),
    list(reference_tags[2]['center']),
    list(reference_tags[3]['center'])
])

# cv2.namedWindow("Reference", cv2.WINDOW_KEEPRATIO)
cv2.namedWindow("Distorced", cv2.WINDOW_KEEPRATIO)
cv2.namedWindow("Reverse Warped", cv2.WINDOW_KEEPRATIO)

label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    sess = tf.Session(graph=detection_graph)

image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
Exemple #12
0
def get_num_classes(pbtxt_fname):
    label_map = label_map_util.load_labelmap(pbtxt_fname)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return len(category_index.keys())
def visualize_detection_results(result_dict,
                                tag,
                                global_step,
                                categories,
                                summary_dir='',
                                export_dir='',
                                agnostic_mode=False,
                                show_groundtruth=False,
                                groundtruth_box_visualization_color='black',
                                min_score_thresh=.5,
                                max_num_predictions=20,
                                skip_scores=False,
                                skip_labels=False,
                                keep_image_id_for_visualization_export=False):
    """Visualizes detection results and writes visualizations to image summaries.

  This function visualizes an image with its detected bounding boxes and writes
  to image summaries which can be viewed on tensorboard.  It optionally also
  writes images to a directory. In the case of missing entry in the label map,
  unknown class name in the visualization is shown as "N/A".

  Args:
    result_dict: a dictionary holding groundtruth and detection
      data corresponding to each image being evaluated.  The following keys
      are required:
        'original_image': a numpy array representing the image with shape
          [1, height, width, 3] or [1, height, width, 1]
        'detection_boxes': a numpy array of shape [N, 4]
        'detection_scores': a numpy array of shape [N]
        'detection_classes': a numpy array of shape [N]
      The following keys are optional:
        'groundtruth_boxes': a numpy array of shape [N, 4]
        'groundtruth_keypoints': a numpy array of shape [N, num_keypoints, 2]
      Detections are assumed to be provided in decreasing order of score and for
      display, and we assume that scores are probabilities between 0 and 1.
    tag: tensorboard tag (string) to associate with image.
    global_step: global step at which the visualization are generated.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
          'supercategory': (optional) string representing the supercategory
            e.g., 'animal', 'vehicle', 'food', etc
    summary_dir: the output directory to which the image summaries are written.
    export_dir: the output directory to which images are written.  If this is
      empty (default), then images are not exported.
    agnostic_mode: boolean (default: False) controlling whether to evaluate in
      class-agnostic mode or not.
    show_groundtruth: boolean (default: False) controlling whether to show
      groundtruth boxes in addition to detected boxes
    groundtruth_box_visualization_color: box color for visualizing groundtruth
      boxes
    min_score_thresh: minimum score threshold for a box to be visualized
    max_num_predictions: maximum number of detections to visualize
    skip_scores: whether to skip score when drawing a single detection
    skip_labels: whether to skip label when drawing a single detection
    keep_image_id_for_visualization_export: whether to keep image identifier in
      filename when exported to export_dir
  Raises:
    ValueError: if result_dict does not contain the expected keys (i.e.,
      'original_image', 'detection_boxes', 'detection_scores',
      'detection_classes')
  """
    detection_fields = fields.DetectionResultFields
    input_fields = fields.InputDataFields
    if not set([
            input_fields.original_image,
            detection_fields.detection_boxes,
            detection_fields.detection_scores,
            detection_fields.detection_classes,
    ]).issubset(set(result_dict.keys())):
        raise ValueError('result_dict does not contain all expected keys.')
    if show_groundtruth and input_fields.groundtruth_boxes not in result_dict:
        raise ValueError(
            'If show_groundtruth is enabled, result_dict must contain '
            'groundtruth_boxes.')
    logging.info('Creating detection visualizations.')
    category_index = label_map_util.create_category_index(categories)

    image = np.squeeze(result_dict[input_fields.original_image], axis=0)
    if image.shape[2] == 1:  # If one channel image, repeat in RGB.
        image = np.tile(image, [1, 1, 3])
    detection_boxes = result_dict[detection_fields.detection_boxes]
    detection_scores = result_dict[detection_fields.detection_scores]
    detection_classes = np.int32(
        (result_dict[detection_fields.detection_classes]))
    detection_keypoints = result_dict.get(detection_fields.detection_keypoints)
    detection_masks = result_dict.get(detection_fields.detection_masks)
    detection_boundaries = result_dict.get(
        detection_fields.detection_boundaries)

    # Plot groundtruth underneath detections
    if show_groundtruth:
        groundtruth_boxes = result_dict[input_fields.groundtruth_boxes]
        groundtruth_keypoints = result_dict.get(
            input_fields.groundtruth_keypoints)
        vis_utils.visualize_boxes_and_labels_on_image_array(
            image=image,
            boxes=groundtruth_boxes,
            classes=None,
            scores=None,
            category_index=category_index,
            keypoints=groundtruth_keypoints,
            use_normalized_coordinates=False,
            max_boxes_to_draw=None,
            groundtruth_box_visualization_color=
            groundtruth_box_visualization_color)
    vis_utils.visualize_boxes_and_labels_on_image_array(
        image,
        detection_boxes,
        detection_classes,
        detection_scores,
        category_index,
        instance_masks=detection_masks,
        instance_boundaries=detection_boundaries,
        keypoints=detection_keypoints,
        use_normalized_coordinates=False,
        max_boxes_to_draw=max_num_predictions,
        min_score_thresh=min_score_thresh,
        agnostic_mode=agnostic_mode,
        skip_scores=skip_scores,
        skip_labels=skip_labels)

    if export_dir:
        if keep_image_id_for_visualization_export and result_dict[
                fields.InputDataFields().key]:
            export_path = os.path.join(
                export_dir, 'export-{}-{}.png'.format(
                    tag, result_dict[fields.InputDataFields().key]))
        else:
            export_path = os.path.join(export_dir, 'export-{}.png'.format(tag))
        vis_utils.save_image_array_as_png(image, export_path)

    summary = tf.Summary(value=[
        tf.Summary.Value(tag=tag,
                         image=tf.Summary.Image(
                             encoded_image_string=vis_utils.
                             encode_image_array_as_png_str(image)))
    ])
    summary_writer = tf.summary.FileWriterCache.get(summary_dir)
    summary_writer.add_summary(summary, global_step)

    logging.info('Detection visualizations written to summary with tag %s.',
                 tag)
    def run(self):

        time1 = time.time()
        MIN_ratio = 0.8

        #MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
        MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28'
        GRAPH_FILE_NAME = 'frozen_inference_graph.pb'
        LABEL_FILE = 'data/mscoco_label_map.pbtxt'
        NUM_CLASSES = 90
        #end define

        label_map = lmu.load_labelmap(LABEL_FILE)
        categories = lmu.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        categories_index = lmu.create_category_index(categories)

        print("call label_map & categories : %0.5f" % (time.time() - time1))

        graph_file = MODEL_NAME + '/' + GRAPH_FILE_NAME

        #thread function
        def find_detection_target(categories_index, classes, scores):
            time1_1 = time.time()  #스레드함수 시작시간
            print("스레드 시작")

            objects = []  #리스트 생성
            for index, value in enumerate(classes[0]):
                object_dict = {}  #딕셔너리
                if scores[0][index] > MIN_ratio:
                    object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \
                            scores[0][index]
                    objects.append(object_dict)  #리스트 추가
            print(objects)

            print("스레드 함수 처리시간 %0.5f" & (time.time() - time1_1))

        #end thread function

        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(graph_file, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            sses = tf.Session(graph=detection_graph)

        print("store in memoey time : %0.5f" % (time.time() - time1))

        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        detection_boxes = detection_graph.get_tensor_by_name(
            'detection_boxes:0')

        detection_scores = detection_graph.get_tensor_by_name(
            'detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name(
            'detection_classes:0')

        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        print("make tensor time : %0.5f" % (time.time() - time1))

        #capture = cv2.VideoCapture(0)
        capture = cv2.VideoCapture("20190916_162900.mp4")
        prevtime = 0

        #thread_1 = Process(target = find_detection_target, args = (categories_index, classes, scores))#쓰레드 생성
        print("road Video time : %0.5f" % (time.time() - time1))

        while True:
            ret, frame = capture.read()
            frame_expanded = np.expand_dims(frame, axis=0)
            height, width, channel = frame.shape

            #프레임 표시
            curtime = time.time()
            sec = curtime - prevtime
            prevtime = curtime
            fps = 1 / sec
            str = "FPS : %0.1f" % fps
            cv2.putText(frame, str, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 0))
            #end 프레임

            (boxes, scores, classes, nums) = sses.run(  #np.ndarray
                [
                    detection_boxes, detection_scores, detection_classes,
                    num_detections
                ],
                feed_dict={image_tensor: frame_expanded})  #end sses.run()

            vis_util.visualize_boxes_and_labels_on_image_array(
                frame,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                categories_index,
                use_normalized_coordinates=True,
                min_score_thresh=MIN_ratio,  #최소 인식률
                line_thickness=2)  #선두께

            if ret:
                # https://stackoverflow.com/a/55468544/6622587
                rgbImage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                h, w, ch = rgbImage.shape
                bytesPerLine = ch * w
                convertToQtFormat = QtGui.QImage(rgbImage.data, w, h,
                                                 bytesPerLine,
                                                 QtGui.QImage.Format_RGB888)
                p = convertToQtFormat.scaled(640, 480, Qt.KeepAspectRatio)
                self.changePixmap.emit(p)

            # objects = [] #리스트 생성
            for index, value in enumerate(classes[0]):
                object_dict = {}  # 딕셔너리
                if scores[0][index] > MIN_ratio:
                    object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \
                        scores[0][index]
                    # objects.append(object_dict) #리스트 추가

                    #visualize_boxes_and_labels_on_image_array box_size_info 이미지 정
                    #for box, color in box_to_color_map.items():
                    #    ymin, xmin, ymax, xmax = box
                    #[index][0] [1]   [2]  [3]

                    ymin = int((boxes[0][index][0] * height))
                    xmin = int((boxes[0][index][1] * width))
                    ymax = int((boxes[0][index][2] * height))
                    xmax = int((boxes[0][index][3] * width))

                    Result = frame[ymin:ymax, xmin:xmax]
                    cv2.imwrite('car.jpg', Result)
                    try:
                        result_chars = NP.number_recognition('car.jpg')
                        ui.label_6.setText(result_chars)
                        # print(NP.check())

                    except:
                        print("응안돼")
                    #cv2.imshow('re', Result)
            # print(objects)

            key = cv2.waitKey(1) & 0xFF

            if key == ord("q"):
                break
def main(_):
    tf.logging.set_verbosity(tf.logging.INFO)

    required_flags = [
        'input_tfrecord_paths', 'output_tfrecord_path', 'inference_graph',
        'meta', 'label_map'
    ]
    for flag_name in required_flags:
        if not getattr(FLAGS, flag_name):
            raise ValueError('Flag --{} is required'.format(flag_name))

    ## Load meta data for yolo
    meta = detection_inference.build_meta(FLAGS.meta)

    ## Load category_index of coco data from google detection
    NUM_CLASSES = 90
    label_map = label_map_util.load_labelmap(FLAGS.label_map)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    #pdb.set_trace()

    with tf.Session() as sess:
        input_tfrecord_paths = [
            v for v in FLAGS.input_tfrecord_paths.split(',') if v
        ]
        tf.logging.info('Reading input from %d files',
                        len(input_tfrecord_paths))
        serialized_example_tensor, image_tensor = detection_inference.build_input(
            meta, input_tfrecord_paths)
        tf.logging.info('Reading graph and building model...')
        detected_boxes_tensor = detection_inference.build_inference_graph(
            image_tensor, FLAGS.inference_graph)

        tf.logging.info('Running inference and writing output to {}'.format(
            FLAGS.output_tfrecord_path))
        sess.run(tf.local_variables_initializer())
        tf.train.start_queue_runners()

        print("entering into loop ")
        starttime = datetime.datetime.now()
        with tf.python_io.TFRecordWriter(
                FLAGS.output_tfrecord_path) as tf_record_writer:
            try:
                for counter in itertools.count():
                    tf.logging.log_every_n(tf.logging.INFO,
                                           'Processed %d images...', 10,
                                           counter)
                    t0 = datetime.datetime.now()
                    tf_example = detection_inference.infer_detections_and_add_to_example(
                        meta, category_index, serialized_example_tensor,
                        detected_boxes_tensor, FLAGS.discard_image_pixels)
                    t1 = datetime.datetime.now()
                    tf.logging.info('processed an image in %d ms',
                                    (t1 - t0).microseconds / 1000)
                    tf_record_writer.write(tf_example.SerializeToString())
            except tf.errors.OutOfRangeError:
                tf.logging.info('Finished processing records')
            endtime = datetime.datetime.now()
            print("running time is ")
            print((endtime - starttime).seconds)
Exemple #16
0
def _create_tf_record_from_coco_annotations(
        annotations_file,
        image_dir,
        output_path,
        include_masks,
        num_shards,
        keypoint_annotations_file='',
        densepose_annotations_file='',
        remove_non_person_annotations=False,
        remove_non_person_images=False):
    """Loads COCO annotation json files and converts to tf.Record format.
  Args:
    annotations_file: JSON file containing bounding box annotations.
    image_dir: Directory containing the image files.
    output_path: Path to output tf.Record file.
    include_masks: Whether to include instance segmentations masks
      (PNG encoded) in the result. default: False.
    num_shards: number of output file shards.
    keypoint_annotations_file: JSON file containing the person keypoint
      annotations. If empty, then no person keypoint annotations will be
      generated.
    densepose_annotations_file: JSON file containing the DensePose annotations.
      If empty, then no DensePose annotations will be generated.
    remove_non_person_annotations: Whether to remove any annotations that are
      not the "person" class.
    remove_non_person_images: Whether to remove any images that do not contain
      at least one "person" annotation.
  """
    with contextlib2.ExitStack() as tf_record_close_stack, \
        tf.gfile.GFile(annotations_file, 'r') as fid:
        output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords(
            tf_record_close_stack, output_path, num_shards)
        groundtruth_data = json.load(fid)
        images = groundtruth_data['images']
        category_index = label_map_util.create_category_index(
            groundtruth_data['categories'])

        annotations_index = {}
        if 'annotations' in groundtruth_data:
            logging.info(
                'Found groundtruth annotations. Building annotations index.')
            for annotation in groundtruth_data['annotations']:
                image_id = annotation['image_id']
                if image_id not in annotations_index:
                    annotations_index[image_id] = []
                annotations_index[image_id].append(annotation)
        missing_annotation_count = 0
        for image in images:
            image_id = image['id']
            if image_id not in annotations_index:
                missing_annotation_count += 1
                annotations_index[image_id] = []
        logging.info('%d images are missing annotations.',
                     missing_annotation_count)

        keypoint_annotations_index = {}
        if keypoint_annotations_file:
            with tf.gfile.GFile(keypoint_annotations_file, 'r') as kid:
                keypoint_groundtruth_data = json.load(kid)
            if 'annotations' in keypoint_groundtruth_data:
                for annotation in keypoint_groundtruth_data['annotations']:
                    image_id = annotation['image_id']
                    if image_id not in keypoint_annotations_index:
                        keypoint_annotations_index[image_id] = {}
                    keypoint_annotations_index[image_id][
                        annotation['id']] = annotation

        densepose_annotations_index = {}
        if densepose_annotations_file:
            with tf.gfile.GFile(densepose_annotations_file, 'r') as fid:
                densepose_groundtruth_data = json.load(fid)
            if 'annotations' in densepose_groundtruth_data:
                for annotation in densepose_groundtruth_data['annotations']:
                    image_id = annotation['image_id']
                    if image_id not in densepose_annotations_index:
                        densepose_annotations_index[image_id] = {}
                    densepose_annotations_index[image_id][
                        annotation['id']] = annotation

        total_num_annotations_skipped = 0
        total_num_keypoint_annotations_skipped = 0
        total_num_densepose_annotations_skipped = 0
        for idx, image in enumerate(images):
            if idx % 100 == 0:
                logging.info('On image %d of %d', idx, len(images))
            annotations_list = annotations_index[image['id']]
            keypoint_annotations_dict = None
            if keypoint_annotations_file:
                keypoint_annotations_dict = {}
                if image['id'] in keypoint_annotations_index:
                    keypoint_annotations_dict = keypoint_annotations_index[
                        image['id']]
            densepose_annotations_dict = None
            if densepose_annotations_file:
                densepose_annotations_dict = {}
                if image['id'] in densepose_annotations_index:
                    densepose_annotations_dict = densepose_annotations_index[
                        image['id']]
            (_, tf_example, num_annotations_skipped,
             num_keypoint_annotations_skipped,
             num_densepose_annotations_skipped) = create_tf_example(
                 image, annotations_list, image_dir, category_index,
                 include_masks, keypoint_annotations_dict,
                 densepose_annotations_dict, remove_non_person_annotations,
                 remove_non_person_images)
            total_num_annotations_skipped += num_annotations_skipped
            total_num_keypoint_annotations_skipped += num_keypoint_annotations_skipped
            total_num_densepose_annotations_skipped += (
                num_densepose_annotations_skipped)
            shard_idx = idx % num_shards
            if tf_example:
                output_tfrecords[shard_idx].write(
                    tf_example.SerializeToString())
        logging.info('Finished writing, skipped %d annotations.',
                     total_num_annotations_skipped)
        if keypoint_annotations_file:
            logging.info('Finished writing, skipped %d keypoint annotations.',
                         total_num_keypoint_annotations_skipped)
        if densepose_annotations_file:
            logging.info('Finished writing, skipped %d DensePose annotations.',
                         total_num_densepose_annotations_skipped)
def object_detection(t, tensorflow_venv, object_detection_api, model_path,
                     detection_threshold, detection_graph, sess,
                     category_index, bridge, sign,
                     camera):  #, left_eye, right_eye):
    from PIL import Image, ImageDraw
    import numpy
    # initialize the TensorFlow Object Detection session and store it as needed
    if detection_graph.value is None:

        # import TensorFlow in the NRP, update this path for your local installation
        try:
            import site
            site.addsitedir(tensorflow_venv.value +
                            '/lib/python2.7/site-packages')
            import tensorflow as tf
        except:
            clientLogger.info(
                "Unable to import TensorFlow, did you change the path in the transfer function?"
            )
            raise

        # configure Object Detection environment
        import sys

        # paths to saved model states, update these paths if different in your local installation
        MODEL_BASE = object_detection_api.value
        sys.path.append(MODEL_BASE)
        sys.path.append(MODEL_BASE + '/object_detection')
        sys.path.append(MODEL_BASE + '/slim')

        PATH_TO_CKPT = model_path.value + '/frozen_inference_graph.pb'
        PATH_TO_LABELS = model_path.value + '/label_map.pbtxt'

        # initialize the detection graph
        import object_detection.utils.label_map_util as label_map_util
        #from utils import label_map_util
        detection_graph.value = tf.Graph()
        with detection_graph.value.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
                sess.value = tf.Session(graph=detection_graph.value)

        # create internal label and category mappings
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=4, use_display_name=True)
        category_index.value = label_map_util.create_category_index(categories)

        # OpenCV bridge for ROS <-> CV image conversion
        from cv_bridge import CvBridge, CvBridgeError
        bridge.value = CvBridge()

        # initialized, start searching
        sign.value = ''

    # no image received yet, do nothing
    if camera.value is None:
        return

    # convert the ROS image to an OpenCV image and Numpy array
    cv_image = bridge.value.imgmsg_to_cv2(camera.value, "rgb8")
    numpy_image = np.expand_dims(cv_image, axis=0)

    # run the actual detection
    image_tensor = detection_graph.value.get_tensor_by_name('image_tensor:0')
    boxes = detection_graph.value.get_tensor_by_name('detection_boxes:0')
    scores = detection_graph.value.get_tensor_by_name('detection_scores:0')
    classes = detection_graph.value.get_tensor_by_name('detection_classes:0')
    num_detections = detection_graph.value.get_tensor_by_name(
        'num_detections:0')

    (boxes, scores, classes,
     num_detections) = sess.value.run([boxes, scores, classes, num_detections],
                                      feed_dict={image_tensor: numpy_image})

    boxes, scores, classes, num_detections = map(
        np.squeeze, [boxes, scores, classes, num_detections])

    # annotate detections on the image
    pil_image = Image.fromarray(cv_image)
    detections = []
    closest_sign = {'name': sign.value, 'square': -1}

    for i in range(num_detections):

        # only accept high enough detection scores
        if scores[i] < detection_threshold.value: continue

        name = category_index.value[classes[i]]['name']
        # log the detection at timestamp
        clientLogger.info(t, name, scores[i])
        detections.append(name)

        # annotate the image with boxes
        draw = ImageDraw.Draw(pil_image)
        im_width, im_height = pil_image.size
        ymin, xmin, ymax, xmax = boxes[i]

        (left, right, top, bottom) = (xmin * im_width, xmax * im_width,
                                      ymin * im_height, ymax * im_height)
        draw.line([(left, top), (left, bottom), (right, bottom), (right, top),
                   (left, top)],
                  width=int(scores[i] * 10) - 4,
                  fill='red')

        square = (xmax - xmin) * (ymax - ymin)
        if closest_sign['square'] < square:
            closest_sign['square'] = square
            closest_sign['name'] = name

    clientLogger.info("Current sign:", closest_sign['name'])
    sign.value = closest_sign['name']

    # publish a ROS image with annotations
    return bridge.value.cv2_to_imgmsg(numpy.array(pil_image), "rgb8")
Exemple #18
0
def camera_connection(process):

    inside = 0
    print("Started process", process)
    # log.info("{}:Started process".format(process))

    # Taking current directory
    CWD_PATH = os.getcwd()
    # warning_enable = False
    try:
        # General name for the model
        MODEL_NAME = 'inference_graph1'

        # Assigning label txt file
        PATH_TO_LABELS = os.path.join(CWD_PATH, 'training1', 'labelmap.pbtxt')

        # PATH_TO_LABELS="/home/server3/tensorflow1/models/research/object_detection/shell2sideviewtraining/labelmap.pbtxt"
        NUM_CLASSES = 6

        # Path to frozen detection graph .pb file, which contains the model that is used for object detection.
        PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                    'frozen_inference_graph.pb')

        # Path to label map file
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)

        # print("label_map={}".format(label_map))
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        # Taking Categories
        category_index = label_map_util.create_category_index(categories)

        # Load the Tensorflow model into memory.
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            # with tf.io.GFile(PATH_TO_CKPT, 'rb') as fid:
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            # This is tensor flow 1.x code to run the above model
            sess = tf.Session(graph=detection_graph)

        # Define input and output tensors (i.e. data) for the object detection classifier Input tensor is the image
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

        # Output tensors are the detection boxes, scores, and classes Each box represents a part of the image where a particular object was detected
        detection_boxes = detection_graph.get_tensor_by_name(
            'detection_boxes:0')

        # Each score represents level of confidence for each of the objects.The score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name(
            'detection_scores:0')

        # Class detection
        detection_classes = detection_graph.get_tensor_by_name(
            'detection_classes:0')

        # Number of objects detected
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        # print("Biswa", num_detections)

        # Reading local video
        cap = cv2.VideoCapture(
            '/home/vert/Foreign_Obj_Det_Project/Project_Demo_Video/Shell2FrontView_wop_20210209_140215007565.avi'
        )

        # Running while loop into the video to testing the code
        while True:

            _, img = cap.read()
            img1 = img.copy()

            # Scaling down the image to display
            scale_percent1 = 40  # percent of original size
            width1 = int(img1.shape[1] * scale_percent1 / 100)
            height1 = int(img1.shape[0] * scale_percent1 / 100)
            dim1 = (width1, height1)
            nf1 = cv2.resize(img1, dim1, interpolation=cv2.INTER_AREA)
            # vout1.write(nf1)
            frame_expanded = np.expand_dims(img, axis=0)

            # Copy image the image
            # img=nf1.copy()

            # DRAWING LINE for shell2 side view
            # Left LINE
            x1, y1 = 800, 950
            x1, y2 = 600, 1050
            # off1=2.0
            cv2.line(img, (x1, y1), (x1, y2), (0, 0, 255), 2)

            # Right LINE
            X1, Y1 = 1200, 950
            X1, Y2 = 1200, 1050
            # off2=2.0275
            cv2.line(img, (X1, Y1), (X1, Y2), (0, 0, 255), 2)

            # # LINE - 3
            p1, q1 = 600, 750
            p2, q1 = 1150, 950
            # off3 = 10
            cv2.line(img, (p1, q1), (p2, q1), (0, 0, 255), 2)
            #
            # LINE - 4
            P1, Q1 = 600, 850
            P2, Q1 = 1150, 1100
            # off3 = 10
            cv2.line(img, (P1, Q1), (P2, Q1), (0, 0, 255), 2)
            # #
            #
            # # DRAWING LINE for shell1 front view
            #
            # # Right LINE-1
            # x1, y1 = 700, 800
            # x1, y2 = 600, 950
            # # off1=2.0
            # # cv2.line(img, (x1, y1), (x1, y2), (0, 0, 255), 2)
            #
            # # Left LINE-2
            # X1, Y1 = 1200, 800
            # X1, Y2 = 1200, 950
            # # off2=2.0
            # # cv2.line(img, (X1, Y1), (X1, Y2), (0, 0, 255), 2)
            #
            # # LINE - 3
            # p1, q1 = 600, 750
            # p2, q1 = 1200, 800
            # # off3 = 10
            # # cv2.line(img, (p1, q1), (p2, q1), (0, 0, 255), 2)
            #
            # # LINE - 4
            # P1, Q1 = 600, 850
            # P2, Q1 = 1200, 950
            # off3 = 10
            # cv2.line(img, (P1, Q1), (P2, Q1), (0, 0, 255), 2)
            # print("Empty class found end")

            # print("{} shape {}".format(process,frame_expanded.shape))
            #log.info("{} shape {}".format(process, frame_expanded.shape))

            # Assighing the boxes, scores, class and num from the model
            (boxes, scores, classes,
             num) = sess.run([
                 detection_boxes, detection_scores, detection_classes,
                 num_detections
             ],
                             feed_dict={image_tensor: frame_expanded})

            # Converting image to array to pass inside the model
            img1 = np.array(img1)

            # Taking coordinates from function which is available in util folder
            im, ymin, xmin, ymax, xmax = vis_util.visualize_boxes_and_labels_on_image_array(
                img1,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8,
                min_score_thresh=0.40)

            # Taking boxes details
            boxes1 = np.squeeze(boxes)

            # get all boxes from an array
            max_boxes_to_draw = boxes1.shape[0]

            # get scores to get a threshold
            scores1 = np.squeeze(scores)
            # print(scores1)

            # this is set as a default but feel free to adjust it to your needs
            min_score_thresh = .4

            # Checking person class from the video to take the time
            for i in range(min(max_boxes_to_draw, boxes1.shape[0])):
                if scores1 is None or scores1[i] > min_score_thresh:
                    class_name = category_index[np.squeeze(classes).astype(
                        np.int32)[i]]['name']

                    # If the object is person then we need to process below lines
                    if class_name == "Person":
                        # print(class_name)

                        # Taking the shape of the image
                        h, w = img.shape[:2]

                        # Taking l,r,t,b value from the box
                        l, r, t, b = int(xmin * w), int(xmax * w), int(
                            ymin * h), int(ymax * h)

                        # Printing logger
                        logger.info("print l,r,t,b-{}-{}-{}-{}".format(
                            l, r, t, b))
                        # print("Coordinates-{}-{}-{}-{}".format(l,r,t,b))

                        # Creating Rectangle box out side of the object
                        cv2.rectangle(img, (l, b), (r, t), (1, 190, 200), 2)
                        logger.info("print image shape-{}".img.shape())

                        # Creating bounding box with different color
                        # cv2.rectangle(img, (l, b), (r, t),(0,0,255), 1)

                        # centroid calculation
                        c1 = int(l + ((r - l) / 2))
                        c2 = int(t + ((b - t) / 2))
                        logger.info("print ci and c2 -{}-{}".format(c1, c2))

                        # Applying cv2 circle method to create centroid in side the object
                        cv2.circle(img, (c1, c2), 1, (255, 153, 255), 3)
                        cv2.putText(img, "Person", (l, t),
                                    cv2.FONT_HERSHEY_PLAIN, 1, (255, 127, 0),
                                    1)

                        # Checking the distance to take the time
                        # if q1<c2+offset and q1>c2-offset:
                        #     print("Person Crossed The Line")
                        #     # Writing the current timestamp inside the txt file in our folder
                        #     with open("/home/vert/Desktop/Time_Tracking/Log.txt","a") as f:
                        #         f.write("Person Crossed Line-{}".format(datetime.now()))
                        #         f.write("\n")

                        # Checking the condition to take the time
                        if q1 < c2 < Q1 and x1 < c1 < X1:
                            inside += 1
                            if inside == 1:

                                # He is inside the warning zone
                                print("INSIDE WARNING ZONE")
                                with open(
                                        "/home/vert/Desktop/Time_Tracking/Log.txt",
                                        "a") as f:
                                    f.write("Person Crossed Line-{}".format(
                                        datetime.now()))
                                    f.write("\n")
                                cv2.putText(img, "Inside Warning zone",
                                            (75, 75), cv2.FONT_HERSHEY_SIMPLEX,
                                            2, (0, 0, 255), 2)
                        else:
                            # if (inside>=1 and c2<q1) or (inside>=1 and c2>Q1):
                            if inside > 0:
                                # HE's OUT
                                print("OUTSIDE WARNING ZONE")
                                with open(
                                        "/home/vert/Desktop/Time_Tracking/Log.txt",
                                        "a") as f:
                                    f.write("Outside Warning Zone-{}".format(
                                        datetime.now()))
                                    f.write("\n")
                                cv2.putText(img, "Out of Warning Zone",
                                            (75, 75), cv2.FONT_HERSHEY_SIMPLEX,
                                            2, (130, 255, 255), 2)
                                # RESET inside to "0"
                                inside = 0
                else:
                    pass
            cv2.imshow("SSD Model Image", nf1)
            cv2.imshow("Normal Image", img)
            cv2.waitKey(1)
            # if cv2.waitKey(1) == 27:
            #   break

    except (Exception, Exception) as exc:
        print("Ending Processing..theres exceptiPon-{}".format(exc))
    def __init__(self):

        # ROS initialize
        #        rospy.init_node('ros_tensorflow_ObjectDetection')
        #        rospy.on_shutdown(self.shutdown)

        # Set model path and image topic
        #        model_path = rospy.get_param("~model_path", "")
        #        image_topic = rospy.get_param("~image_topic", "")

        #        self._cv_bridge = CvBridge()

        #        rospy.loginfo("finding model path...")
        '''select model path ,model label and model name,include 'MODEL_NAME' 'PATH_TO_CKPT' and 'PATH_TO_LABELS' '''

        #        MODEL_NAME = '/outputing'
        #        PATH_TO_CKPT = model_path + MODEL_NAME +'/frozen_inference_graph.pb'
        #
        #        PATH_TO_LABELS = os.path.join(model_path + '/data', 'frame_label_map.pbtxt')

        # What model to download.
        #        MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
        #        MODEL_FILE = MODEL_NAME + '.tar.gz'
        #        DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

        # Path to frozen detection graph. This is the actual model that is used for the object detection.
        #        PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

        # List of the strings that is used to add correct label for each box.
        #        PATH_TO_LABELS = os.path.join(model_path+'/data', 'mscoco_label_map.pbtxt')

        #        NUM_CLASSES = 1
        NUM_CLASSES = 90

        # Download Model
        #        rospy.loginfo("Downloading models...")            #send loginfo
        #        opener = urllib.request.URLopener()
        #        opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
        #        tar_file = tarfile.open(MODEL_FILE)
        #        for file in tar_file.getmembers():
        #            file_name = os.path.basename(file.name)       #use os.path.basename for
        #            if 'frozen_inference_graph.pb' in file_name:
        #                    tar_file.extract(file, os.getcwd())   #os.getcwd()

        #Load a (frozen) Tensorflow model into memory.
        #        self.detection_graph = tf.Graph()
        #
        #
        #        with self.detection_graph.as_default():
        #            od_graph_def = tf.GraphDef()
        #            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        #                serialized_graph = fid.read()
        #                od_graph_def.ParseFromString(serialized_graph)
        #                tf.import_graph_def(od_graph_def, name='')

        #        rospy.loginfo("loading models' label ......")
        #        rospy.loginfo("please wait")

        # Loading label map
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)

        #        #Initialize ROS Subscriber and Publisher
        #        self._sub = rospy.Subscriber(image_topic, ROSImage, self.callback, queue_size=10)
        #        self._pub = rospy.Publisher('object_detection', ROSImage, queue_size=1)
        #        rospy.loginfo("Start object dectecter ...")

        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True  #
Exemple #20
0
    def __init__(self, *args, **kwds):
          
        # Metricas de deteccion y tiempo
        self.tmin = 100
        self.tmax = 0
        self.ttot = 0
        self.tcount = 0
        self.dmin = 100
        self.dmax = 0
        self.dboxtot = 0
        self.davgtot = 0
        self.dcount = 0
        
        
        # begin wxGlade: MyFrame.__init__
        kwds["style"] = kwds.get("style", 0) | wx.DEFAULT_FRAME_STYLE
        wx.Frame.__init__(self, *args, **kwds)
        self.SetSize((812, 522))

        self.Bind(wx.EVT_KEY_DOWN, self.KeyDown)
        # Menu Bar
        self.frame_menubar = wx.MenuBar()
        wxglade_tmp_menu = wx.Menu()
        item = wxglade_tmp_menu.Append(wx.ID_ANY, u"Configuración", "")
        self.Bind(wx.EVT_MENU, self.configuraciónClick, id=item.GetId())
        item = wxglade_tmp_menu.Append(wx.ID_ANY, "Acerca de...", "")
        self.Bind(wx.EVT_MENU, self.acercaDeClick, id=item.GetId())
        item = wxglade_tmp_menu.Append(wx.ID_ANY, "Salir", "")
        self.Bind(wx.EVT_MENU, self.salirClick, id=item.GetId())
        self.frame_menubar.Append(wxglade_tmp_menu, "Menu")
        wxglade_tmp_menu = wx.Menu()
        item = wxglade_tmp_menu.Append(wx.ID_ANY, "Start/Pause\tSPACE BAR", "")
        self.Bind(wx.EVT_MENU, self.cambiarEstadoCNN, id=item.GetId())
        '''accel_tbl = wx.AcceleratorTable([(wx.ACCEL_CTRL,  ord('s'), item.GetId() )])
        self.SetAcceleratorTable(accel_tbl)'''
        item = wxglade_tmp_menu.Append(wx.ID_ANY, "Person/Empty\tCTRL", "")
        self.Bind(wx.EVT_MENU, self.cambiarAnotation, id=item.GetId())
        '''accel_tbl = wx.AcceleratorTable([(wx.ACCEL_CTRL,  ord('a'), item.GetId() )])
        self.SetAcceleratorTable(accel_tbl)'''
        self.frame_menubar.Append(wxglade_tmp_menu, "Capturar Frames")
        self.SetMenuBar(self.frame_menubar)
        # Menu Bar end
        self.label_1 = wx.StaticText(self, wx.ID_ANY, "Ubicaciones:")
        self.cantUbicaciones = wx.StaticText(self, wx.ID_ANY, "0")
        self.label_2 = wx.StaticText(self, wx.ID_ANY, "Ocupadas: ")
        self.cantOcupadas = wx.StaticText(self, wx.ID_ANY, "0")
        self.label_3 = wx.StaticText(self, wx.ID_ANY, "Libres:")
        self.cantLibres = wx.StaticText(self, wx.ID_ANY, "0")

        self.__set_properties()
        self.__do_layout()

        # end wxGlade
             

        #Create objects
        self.THROBLESHOOT = 0.0001 #Default = 0.7
        self.RN=False #Activa/Desactiva la RN
        
        self.CaptureWidth = 720
        self.CaptureHeight = 1280

        #Para Camara en vivo
        self.Screen1Width = 360
        self.Screen1Height = 640
        self.Screen1 = wx.StaticBitmap(self, size = (self.Screen1Width, self.Screen1Height)) # Static bitmaps for OpenCV images

        img = wx.Image('imagenes/bancaLibre.png').Scale(self.Screen1Width, self.Screen1Height, wx.IMAGE_QUALITY_HIGH)
        self.wxbmp = img.ConvertToBitmap()
        self.num=-1
        self.boxes=0
        self.scores=0
        self.classes=0
		
        self.sizer_2.Add( self.Screen1, 1, wx.FIXED_MINSIZE |wx.ALL, 5 )
                     
        self.Screen1.Bind(wx.EVT_ERASE_BACKGROUND, self.onEraseBackground)              
        self.Screen1.Bind(wx.EVT_PAINT, self.onPaint)

        # Add objects to sizer
        #self.sizer_2.Add(self.Screen1, 0, wx.EXPAND | wx.ALL, 10)

        #Para resultado del analisis
        self.Screen2Width = 550
        self.Screen2Height = 270
        
        #Maximizo ventana para que ocupe todo el escritorio menos la barra de tareas
        c_x, c_y, c_w, c_h = wx.ClientDisplayRect()
        self.SetSize((c_w, c_h))
        self.SetPosition((c_x, c_y))
        
        #Ventana mitad de escritorio
        self.SetSize((c_w/2, c_h))
        self.SetPosition((c_w/2, c_y))

  
        #Obtengo la posicion, dentro de la toma completa, de cada ubicacion 
        path_locations='configuracion'
        self.images_location=self.xml_to_locations(path_locations)
        self.cantUbicaciones.Label=str(len(self.images_location))
        self.cantLibres.Label=str(len(self.images_location))
        #Lista para guardar el estado de cada banca:
        # [OK] = ocupada
        # [ ] = libre
        # [?] = indeterminado
        self.locations_state=[]
         
        self.imagenes_bancas_select = { "ocupada": 'imagenes/bancaOcupadaSelect.png', "libre": 'imagenes/bancaLibreSelect.png', "indeterminado": 'imagenes/bancaIndeterminadoSelect.png' }

        #Creo tantas bancas como posiciones guardadas en el xml y las guardo en una lista
        #Las StaticBitmap contendran las imagenes de los estados de las bancas
        self.screen_list=[]
        for i in self.images_location:
           sb=wx.StaticBitmap(self, size = (self.Screen2Width, self.Screen2Height))
           #sb.SetPosition(wx.Point(0,0))  
           self.screen_list.append(banca.Banca(sb,i[0],i[1],i[2],i[3],i[4]))

        #Creo un diccionario para consultar datos de cada banca, al hacer click en una banca
        self.dict_bancas= {} # create an empty dictionary
        for i in range(len(self.screen_list)):
          self.dict_bancas[self.screen_list[i].staticBitmap]=self.screen_list[i]     
           
        

        #Seteo estado,posicion y evento de cada StaticBitmap
        for i in self.screen_list:

           #Seteo posicion proporcional al tamaño del screen y al tamaño de la captura
           xmin,ymin=i.getPosicionXML()
           xpos=int((xmin/self.CaptureWidth)*self.Screen2Width)
           ypos=int((ymin/self.CaptureHeight)*self.Screen2Height)
           x, y = self.sizer_3.GetPosition()
           i.setPosicionVentana(x+xpos,y+ypos)  

           #Seteo el eventos
           i.staticBitmap.Bind(wx.EVT_LEFT_UP, self.bancaClick)
           i.staticBitmap.Bind(wx.EVT_ENTER_WINDOW, self.onMouseOverBanca)
           i.staticBitmap.Bind( wx.EVT_LEAVE_WINDOW, self.onMouseOutBanca)

           #Seteo cursor sobre la banca
           i.staticBitmap.SetCursor(wx.Cursor(wx.CURSOR_HAND))

        self.tiempo1=time.now()
        ipcamUrl = 'http://*****:*****@192.168.43.1:8081'

        ipcamUrlCelRodri = 'http://*****:*****@192.168.43.25:8081'
        #ipcamUrl = 'http://*****:*****@192.168.43.93:8081'
        
        #ipcamUrl = 'toma_lateral.mov'
        ipcam = {}
        ipcamDesc = 'Celular'
        ipcam[ipcamDesc] = urlparse(ipcamUrl)
        print(time.now())
        
        # Prueba la conexión al destino ip
        if len(ipcamUrl) > 5:
          err,errMsg = self.urlTest(ipcam[ipcamDesc].hostname,ipcam[ipcamDesc].port)
          if err > 0:
              print(time.now(),"Falló conexión. ",errMsg)
              exit(1)
        
        try:
          self.capture = cv2.VideoCapture(ipcamUrl)
          
          self.capture.set(3,self.CaptureWidth) #1024 640 1280 800 384
          self.capture.set(4,self.CaptureHeight) #600 480 960 600 288
          
          fourcc = cv2.VideoWriter_fourcc(*'MJPG')
          self.out = cv2.VideoWriter('output.avi', fourcc, 20.0, (int(self.capture.get(4)),int(self.capture.get(3))))
          
        
          sys.path.append("..")
        
          # Importación del módulo de detección de objetos.
          from object_detection.utils import label_map_util
          from object_detection.utils import visualization_utils as vis_util
        
          PATH_TO_CKPT = 'modelo_congelado/frozen_inference_graph.pb'
        
          PATH_TO_LABELS = os.path.join('configuracion', 'label_map.pbtxt')
          
          NUM_CLASSES = 90
                  
          self.detection_graph = tf.Graph()
          with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
              serialized_graph = fid.read()
              od_graph_def.ParseFromString(serialized_graph)
              tf.import_graph_def(od_graph_def, name='')
        
        
          label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
          categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
          self.category_index = label_map_util.create_category_index(categories)
                          
        except IOError as e:
            print(time.now(), "Error abriendo socket: ", ipcamUrl)
        except KeyboardInterrupt as e:
            print(time.now(), "Detenido por teclado.")
        except BaseException as e:
            print(time.now(), "Error desconocido: ", e)
        #    if e.number == -138:
        #        print("Compruebe la conexión con '" + ipcamUrl + "'")
        #    else:
        #        print("Error: " + e.message)
        finally:
            #self.capture.release()
            cv2.destroyAllWindows()
        
        with self.detection_graph.as_default():
            with tf.Session(graph=self.detection_graph) as sess:
              self.sess = tf.Session()
              self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0')
              # Each box represents a part of the image where a particular object was detected.
              self.detection_boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0')
              # Each score represent how level of confidence for each of the objects.
              # Score is shown on the result image, together with the class label.
              self.detection_scores = self.detection_graph.get_tensor_by_name('detection_scores:0')
              self.detection_classes = self.detection_graph.get_tensor_by_name('detection_classes:0')
              self.num_detections = self.detection_graph.get_tensor_by_name('num_detections:0')

              #Creo un timer para:
              # 1) Actualizar la información en pantalla
              # 2) Activar la CNN y obtener datos del analisis
              self.timer = wx.Timer(self)
              self.Bind(wx.EVT_TIMER, self.OnTimer)

              #Inicialmente la CNN está inactiva
              self.analisis='PAUSADO'

              self.anotation='Person'
              
              self.Bind(wx.EVT_CLOSE, self.onClose)
              self.Bind(wx.EVT_LEFT_UP, self.VentanaClick)

              #Estado del programa
              self.STATE_RUNNING = 1
              self.STATE_CLOSING = 2
              self.state = self.STATE_RUNNING
              

                
              #Seteo cada cuanto tiempo se activará el timer
              self.fps=60
              self.timer.Start(1000./self.fps)    # timer interval

              #Cantidad de ciclos del timer que la CNN no trabaja
              #Esto es para evitar lag
              self.FREC= 10
              self.FRECUENCIA_CNN=self.FREC
              self.imagenes=4000
              self.VisualBoxes=0
def main(_):
  assert FLAGS.train_dir, '`train_dir` is missing.'
  if FLAGS.task == 0: tf.gfile.MakeDirs(FLAGS.train_dir)
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
    if FLAGS.task == 0:
      tf.gfile.Copy(FLAGS.pipeline_config_path,
                    os.path.join(FLAGS.train_dir, 'pipeline.config'),
                    overwrite=True)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        train_config_path=FLAGS.train_config_path,
        train_input_config_path=FLAGS.input_config_path)
    if FLAGS.task == 0:
      for name, config in [('model.config', FLAGS.model_config_path),
                           ('train.config', FLAGS.train_config_path),
                           ('input.config', FLAGS.input_config_path)]:
        tf.gfile.Copy(config, os.path.join(FLAGS.train_dir, name),
                      overwrite=True)

  model_config = configs['model']
  train_config = configs['train_config']
  input_config = configs['train_input_config']

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=True)
  
  #iterator = dataset_util.make_initializable_iterator(dataset_builder.build(input_config))
  datasetmy = dataset_builder.build(input_config)
  iterator = datasetmy.make_initializable_iterator()
  
  def get_next(config):
    return iterator.get_next()

  create_input_dict_fn = functools.partial(get_next, input_config)

  
  data_augmentation_options = [
      preprocessor_builder.build(step)
      for step in train_config.data_augmentation_options]
  
  input_queue = trainer.create_input_queue(
      train_config.batch_size, create_input_dict_fn,
      train_config.batch_queue_capacity,
      train_config.num_batch_queue_threads,
      train_config.prefetch_queue_capacity, data_augmentation_options)
  
  tensors = input_queue.dequeue()

  #print all tensors in tfrecord
  print(tensors)
  
  groundtruth_difficult = tensors[0]['groundtruth_difficult']
  groundtruth_group_of = tensors[0]['groundtruth_group_of']
  groundtruth_weights = tensors[0]['groundtruth_weights']
  groundtruth_is_crowd = tensors[0]['groundtruth_is_crowd']
  key = tensors[0]['key']
  groundtruth_boxes = tensors[0]['groundtruth_boxes']
  image = tensors[0]['image']
  groundtruth_area = tensors[0]['groundtruth_area']
  groundtruth_classes = tensors[0]['groundtruth_classes']
  filename = tensors[0]['filename']
  num_groundtruth_boxes = tensors[0]['num_groundtruth_boxes']
  source_id = tensors[0]['source_id']
  
  
  
   
  init_op=tf.initialize_all_variables()
  with tf.Session() as sess:
    sess.run(iterator.initializer)
    sess.run(tf.tables_initializer())
    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord)
    sess.run(init_op)
    for i in range(10):
      groundtruth_weights_val,groundtruth_difficult_val,groundtruth_group_of_val,groundtruth_is_crowd_val,key_val,groundtruth_boxes_val,image_val,groundtruth_area_val,groundtruth_classes_val,filename_val,num_groundtruth_boxes_val,source_id_val = \
      sess.run([groundtruth_weights,groundtruth_difficult,groundtruth_group_of,groundtruth_is_crowd,key,groundtruth_boxes,image,groundtruth_area,groundtruth_classes,filename,num_groundtruth_boxes,source_id])
#       print(groundtruth_weights_val)
      print(groundtruth_boxes_val)
#       print(groundtruth_difficult_val)
#       print(groundtruth_group_of_val)
#       print(groundtruth_is_crowd_val)
#       print(key_val)
#       print(image_val)
#       print(groundtruth_area_val)
      print(groundtruth_classes_val)
      print(filename_val)
      print(num_groundtruth_boxes_val)
#       print(source_id_val)
      image_val = image_val[0]
      image_val = image_val.astype(np.uint8)
#       cv2.imshow('image', image_val)
#       cv2.waitKey()
#       plt.imshow(image_val)
#       plt.show()  
      print('finish')
      
      #plot bbox on image
      plt.switch_backend("TkAgg")
      classes_val = groundtruth_classes_val
      boxes_val = groundtruth_boxes_val
      scores_val = [1.0]*num_groundtruth_boxes_val
      image_np = image_val
      image_np_origin = image_val.copy()
      NUM_CLASSES = 90
      IMAGE_SIZE = (12, 8)
      PATH_TO_LABELS = '../../data/mscoco_label_map.pbtxt'
      label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
      categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                                  use_display_name=True)
      category_index = label_map_util.create_category_index(categories)
      vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                boxes_val,
                np.squeeze(classes_val).astype(np.int32),
                np.squeeze(scores_val),
                category_index,
                use_normalized_coordinates=True,
                line_thickness=8)
      plt.figure(figsize=IMAGE_SIZE)
      plt.subplot(121)
      plt.imshow(image_np)
      plt.subplot(122)
      plt.imshow(image_np_origin)
      plt.show()  
      print('finish')
           
           
      pass
  coord.request_stop()
  coord.join(threads)
Exemple #22
0
def eval_models_general(xml_path, MODEL_NAME, path, PATH_TO_LABELS,
                        threshold_scores, threshold_boxes, width, height):

    global xml_list
    xml_list = []
    for xml_file in glob.glob(xml_path):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            xmin_xml = int(member[4][0].text)
            ymin_xml = int(member[4][1].text)
            xmax_xml = int(member[4][2].text)
            ymax_xml = int(member[4][3].text)
            filename_xml = root.find('filename').text
            if xmin_xml != 0 and ymin_xml != 0 and xmax_xml != 0 and ymax_xml != 0:
                value = [filename_xml, xmin_xml, ymin_xml, xmax_xml, ymax_xml]
                xml_list.append(value)
            #value_num = [int(member[4][0].text), int(member[4][1].text), int(member[4][2].text), int(member[4][3].text)]
            #all_ground_truth = np.vstack([all_ground_truth, value_num])
            #print(value)

    #a = ['bresil_ad_blue1_100.png', 1000, 200, 300, 400]
    #xml_list.append(a)
    #b = ['bresil_ad_blue1_100.png', 1100, 500, 400, 900]
    #xml_list.append(b)
    xml_list.sort()

    xml_list_names = []
    for i in xml_list:
        xml_list_names.append(i[0])
    '''
    for i in xml_list:
        all_ground_truth = np.vstack([ all_ground_truth, i[1:] ])

    all_ground_truth = np.delete(all_ground_truth, 0, 0)
    print("\nall_ground_truth")
    print(all_ground_truth)
    '''
    # Name of the directory containing the object detection module we're using
    #MODEL_NAME = '../TensorFlow/trained-inference-graphs/output_inference_graph_v1.pb'

    # Name of the directory containing all images to be predicted
    #path = '/home/igor/Documentos/Developments/Experiment_Plan/Renault_Oficial_Images/ad_blue_all'

    PATH_TO_IMAGE = []
    global IMAGE
    IMAGE = []
    # r=root, d=directories, f = files
    for r, d, f in os.walk(path):
        for file in f:
            if '.jpg' in file:
                PATH_TO_IMAGE.append(os.path.join(r, file))
                IMAGE.append(file)

    IMAGE.sort()

    # Grab path to current working directory
    CWD_PATH = os.getcwd()

    # Path to frozen detection graph .pb file, which contains the model that is used for object detection.
    PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME,
                                'frozen_inference_graph.pb')

    # Path to label map file
    #PATH_TO_LABELS = os.path.join(CWD_PATH,'../TensorFlow/annotations','label_map.pbtxt')

    # Number of classes the object detector can identify
    NUM_CLASSES = 1

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Load the Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        sess = tf.Session(graph=detection_graph)

    # Input tensor is the image
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Output tensors are the detection boxes, scores, and classes
    # Each box represents a part of the image where a particular object was detected
    detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represents level of confidence for each of the objectbresil_ad_blue4_0.pngs.
    # The score is shown on the result image, together with the class label.
    detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
    detection_classes = detection_graph.get_tensor_by_name(
        'detection_classes:0')

    # Number of objects detected
    num_detections = detection_graph.get_tensor_by_name('num_detections:0')

    # width = 705, height = 447
    # width = 2590, height = 1942

    #score draw boxes
    #threshold_scores = 0.5
    #score IoU
    #threshold_boxes = 0.6

    global images_list
    images_list = []

    global pred_labels_list
    pred_labels_list = []
    global result_list
    result_list = []
    global result_num_list
    result_num_list = []

    global all_predictions
    all_predictions = np.zeros([1, 4], dtype=int)
    all_predictions = np.delete(all_predictions, 0, 0)

    global all_ground_truth
    all_ground_truth = np.zeros([1, 4], dtype=int)
    all_ground_truth = np.delete(all_ground_truth, 0, 0)

    FP, TP, FN = 0, 0, 0

    lista_string = list(range(0, 274, 1))  #amount of xml_files + 1
    x = 0

    for f in range(len(IMAGE)):

        path_to_img = os.path.join(path, IMAGE[f])
        image = cv2.imread(path_to_img)
        image_expanded = np.expand_dims(image, axis=0)

        (boxes, scores, classes,
         num) = sess.run([
             detection_boxes, detection_scores, detection_classes,
             num_detections
         ],
                         feed_dict={image_tensor: image_expanded})
        '''
        count_ocurr = xml_list_names.count(IMAGE[f])
        #print("count_ocurr: ", IMAGE[f], count_ocurr)
        for i in range(count_ocurr):
            #print("i: ", i)
            #print("f: ", f)
            #print("xml_list[f+i]: ", xml_list[f+i+x])
            all_ground_truth = np.vstack([all_ground_truth, xml_list[f+i+x][1:]])
        #print("all_ground_truth: \n", all_ground_truth)
        '''
        for i, j in zip(range(boxes.shape[1]), lista_string):

            if scores[0, i] > threshold_scores:

                #predctions values
                ymin_pred = boxes[0, i, 0] * height
                xmin_pred = boxes[0, i, 1] * width
                ymax_pred = boxes[0, i, 2] * height
                xmax_pred = boxes[0, i, 3] * width

                #str(int(ymin_pred))
                #str(int(xmin_pred))
                #str(int(ymax_pred))
                #str(int(xmax_pred))
                scores_num = '{0:.10f}'.format(scores[0, i])
                scores_str = str(scores_num)

                name = IMAGE[f].split(".")
                name_1 = name[0]
                name_2 = name[0] + ".txt"

                value_pred = [
                    name_2, "volkswagen_logo", scores_str,
                    str(int(xmin_pred)),
                    str(int(ymin_pred)),
                    str(int(xmax_pred)),
                    str(int(ymax_pred))
                ]  #add score referente ao xmin, ymin, xmax, ymax

                pred_labels_list.append(value_pred)
                print(value_pred)

                #value_pred_num = [int(xmin_pred), int(ymin_pred), int(xmax_pred), int(ymax_pred)]
                #all_predictions = np.vstack([all_predictions, value_pred_num])

    with open('/home/igor/Documentos/luigy/Test18/output_all_synthetic.txt',
              'w') as f:
        for item in pred_labels_list:
            f.write("%s\n" % item)
def visualize(split):
    nusc = NuScenes(version='v1.0-trainval', dataroot=FLAGS.nuscenes, verbose=True)
    sensor = 'LIDAR_TOP'

    # pipeline_config = pipeline_pb2.TrainEvalPipelineConfig()
    # with tf.gfile.GFile(FLAGS.pipeline_config_path, 'r') as f:
    #     text_format.Merge(f.read(), pipeline_config)
    # if not pipeline_config.model.HasField('ssd_augmentation'):
    #     raise ValueError('Model with ssd_augmentation estimation is required.')



    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(FLAGS.graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            for node in od_graph_def.node:
                if 'BatchMultiClassNonMaxSuppression' in node.name:
                    node.device = '/device:CPU:0'
            tf.import_graph_def(od_graph_def, name='')
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            detection_boxes_inclined = detection_graph.get_tensor_by_name('detection_boxes_3d:0')
            detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name('num_detections:0')
            belief_F_prediction = detection_graph.get_tensor_by_name('belief_F_prediction:0')
            belief_O_prediction = detection_graph.get_tensor_by_name('belief_O_prediction:0')
            z_max_detections_prediction = detection_graph.get_tensor_by_name('z_max_detections_prediction:0')
            detections_drivingCorridor_prediction = detection_graph.get_tensor_by_name('detections_drivingCorridor_prediction:0')
            scene_splits = create_splits_scenes()
            for scene in nusc.scene:
                if scene['name'] not in vis_set:
                    continue
                scene_dir = os.path.join(FLAGS.output, scene['name'])
                os.system('mkdir {}'.format(scene_dir))
                folder_inverse = os.path.join(scene_dir, 'inverse')
                folder_color = os.path.join(scene_dir, 'color')
                folder_color_inverse = os.path.join(scene_dir, 'color_inverse')
                os.system('mkdir {}'.format(folder_inverse))
                os.system('mkdir {}'.format(folder_color))
                os.system('mkdir {}'.format(folder_color_inverse))
                folder_belF = os.path.join(scene_dir, 'belF')
                folder_belO = os.path.join(scene_dir, 'belO')
                folder_zMaxDet = os.path.join(scene_dir, 'zMaxDet')
                os.system('mkdir {}'.format(folder_belF))
                os.system('mkdir {}'.format(folder_belO))
                os.system('mkdir {}'.format(folder_zMaxDet))

                folder_belF_clean = os.path.join(scene_dir, 'belF_clean')
                folder_belO_clean = os.path.join(scene_dir, 'belO_clean')
                folder_zMaxDet_clean = os.path.join(scene_dir, 'zMaxDet_clean')
                os.system('mkdir {}'.format(folder_belF_clean))
                os.system('mkdir {}'.format(folder_belO_clean))
                os.system('mkdir {}'.format(folder_zMaxDet_clean))


                current_sample_token = scene['first_sample_token']
                last_sample_token = scene['last_sample_token']
                # first_sample = nusc.get('sample', scene['first_sample_token'])
                # current_token = first_sample['data'][sensor]
                sample_in_scene = True
                first_inference = True
                while sample_in_scene:
                    # while current_token:
                    if current_sample_token == last_sample_token:
                        sample_in_scene = False
                    sample = nusc.get('sample', current_sample_token)
                    lidar_top_data = nusc.get('sample_data', sample['data'][sensor])
                    if first_inference:
                        # current_token = lidar_top_data['next']
                        # if use_10hz_capture_frequency:
                        #    if current_token:
                        #        lidar_top_data_next = nusc.get('sample_data', current_token)
                        #        current_token = lidar_top_data_next['next']
                        current_sample_token = sample['next']
                        first_inference = False
                        continue

                    # Read input data
                    filename_prefix = os.path.splitext(os.path.splitext(lidar_top_data['filename'])[0])[0]
                    image_stacked, det_mask, observation_mask, z_mask = read_images(FLAGS.data, FLAGS.data_beliefs,
                                                                                    filename_prefix)
                    # Inference
                    start_time = time.time()

                    (boxes_aligned, boxes_inclined, scores, classes, num, belief_F_pred, belief_O_pred,
                     z_max_detections_pred,  detections_drivingCorridor_pred) = sess.run(
                        [detection_boxes, detection_boxes_inclined, detection_scores, detection_classes,
                         num_detections, belief_F_prediction, belief_O_prediction, z_max_detections_prediction, detections_drivingCorridor_prediction],
                        feed_dict={image_tensor: image_stacked})
                    print('Inference time:', time.time() - start_time)

                    # Visualize object detection and scene flow
                    label_map = label_map_util.load_labelmap(FLAGS.label_map)
                    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=10,
                                                                                use_display_name=True)
                    category_index = label_map_util.create_category_index(categories)

                    # Create grid map to visualize
                    image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3),
                                         dtype=np.uint8)
                    image_vis_inv = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3),
                                             dtype=np.uint8)

                    # Write augmentation maps
                    # print("image_stacked--------------------------")
                    # print(image_stacked.shape)
                    # print("belief_F_prediction--------------------")
                    # print(belief_F_pred)
                    belief_F_prediction_np = resize_augm(belief_F_pred[0], image_stacked.shape[1], image_stacked.shape[2])
                    belief_O_prediction_np = resize_augm(belief_O_pred[0], image_stacked.shape[1], image_stacked.shape[2])
                    z_max_detections_prediction_np = resize_augm(z_max_detections_pred[0], image_stacked.shape[1], image_stacked.shape[2])
                    detections_drivingCorridor_prediction_np = resize_augm(detections_drivingCorridor_pred[0], image_stacked.shape[1], image_stacked.shape[2])

                    image_bel_F = augm_to_image_gray_8(belief_F_prediction_np, mode_norm255=True)
                    # image_bel_F = cv2.bitwise_not(image_bel_F)
                    image_bel_F_clean = image_bel_F.copy()

                    image_bel_O = augm_to_image_gray_8(belief_O_prediction_np, mode_norm255=True)
                    image_bel_O = cv2.bitwise_not(image_bel_O)
                    image_bel_O_clean = image_bel_O.copy()

                    image_z_max_detections = augm_to_image_gray_8(z_max_detections_prediction_np, mode_norm255=False)
                    image_z_max_detections = cv2.bitwise_not(image_z_max_detections)
                    image_z_max_detections_clean = image_z_max_detections.copy()

                    # image_vis_color = augm_to_image_rgb(detections_drivingCorridor_prediction_np, belief_F_prediction_np, z_max_detections_prediction_np)
                    image_vis_color = augm_to_image_rg(belief_F_prediction_np, z_max_detections_prediction_np)

                    image_vis_color_inv = cv2.bitwise_not(image_vis_color)

                    for (v, u), val in np.ndenumerate(det_mask):
                        if val:
                            image_vis[v, u] = 255
                            image_vis_inv[v, u] = 0

                    image_vis = np.zeros((image_stacked.shape[1], image_stacked.shape[2], 3), dtype=np.uint8)
                    for (v, u), val in np.ndenumerate(observation_mask):
                        if val:
                            image_vis[v, u, :] = 50
                    image_vis_inv = cv2.bitwise_not(image_vis)
                    for (v, u), val in np.ndenumerate(det_mask):
                        if val:
                            image_vis[v, u] = 255
                            image_vis_inv[v, u] = 0

                    # Draw inclined detection box
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis_color,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis_color_inv,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    print(image_vis.shape)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_vis_inv,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)

                    print(image_bel_F.shape)

                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_bel_F,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)


                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_bel_O,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_z_max_detections,
                        np.squeeze(boxes_aligned),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_index,
                        boxes_3d=np.squeeze(boxes_inclined),
                        min_score_thresh=0.23,
                        use_normalized_coordinates=True,
                        line_thickness=3)

                    # Save image
                    print(filename_prefix.split('/')[-1])
                    output_path = os.path.join(scene_dir, filename_prefix.split('/')[-1] + '.png')
                    cv2.imwrite(output_path, image_vis)

                    output_path_inv = os.path.join(folder_inverse, filename_prefix.split('/')[-1] + 'inv.png')
                    output_color_path = os.path.join(folder_color, filename_prefix.split('/')[-1] + 'color.png')
                    output_color_path_inv = os.path.join(folder_color_inverse, filename_prefix.split('/')[-1] + 'colorInv.png')

                    output_path_belO = os.path.join(folder_belO, filename_prefix.split('/')[-1] + 'belo.png')
                    output_path_belF = os.path.join(folder_belF, filename_prefix.split('/')[-1] + 'belf.png')
                    output_path_zMaxDet = os.path.join(folder_zMaxDet, filename_prefix.split('/')[-1] + 'zmax.png')

                    output_path_belO_clean = os.path.join(folder_belO_clean, filename_prefix.split('/')[-1] + 'belo_clean.png')
                    output_path_belF_clean = os.path.join(folder_belF_clean, filename_prefix.split('/')[-1] + 'belf_clean.png')
                    output_path_zMaxDet_clean = os.path.join(folder_zMaxDet_clean, filename_prefix.split('/')[-1] + 'zmax_clean.png')

                    cv2.imwrite(output_path_inv, image_vis_inv)
                    cv2.imwrite(output_color_path, image_vis_color)
                    cv2.imwrite(output_color_path_inv, image_vis_color_inv)

                    cv2.imwrite(output_path_belO, image_bel_O)
                    cv2.imwrite(output_path_belF, image_bel_F)
                    cv2.imwrite(output_path_zMaxDet, image_z_max_detections)

                    cv2.imwrite(output_path_belO_clean, image_bel_O_clean)
                    cv2.imwrite(output_path_belF_clean, image_bel_F_clean)
                    cv2.imwrite(output_path_zMaxDet_clean, image_z_max_detections_clean)
                    current_sample_token = sample['next']
Exemple #24
0
def setup_platform(hass, config, add_entities, discovery_info=None):
    """Set up the TensorFlow image processing platform."""
    model_config = config.get(CONF_MODEL)
    model_dir = model_config.get(CONF_MODEL_DIR) \
        or hass.config.path('tensorflow')
    labels = model_config.get(CONF_LABELS) \
        or hass.config.path('tensorflow', 'object_detection',
                            'data', 'mscoco_label_map.pbtxt')

    # Make sure locations exist
    if not os.path.isdir(model_dir) or not os.path.exists(labels):
        _LOGGER.error("Unable to locate tensorflow models or label map")
        return

    # append custom model path to sys.path
    sys.path.append(model_dir)

    try:
        # Verify that the TensorFlow Object Detection API is pre-installed
        # pylint: disable=unused-import,unused-variable
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        import tensorflow as tf  # noqa
        from object_detection.utils import label_map_util  # noqa
    except ImportError:
        # pylint: disable=line-too-long
        _LOGGER.error(
            "No TensorFlow Object Detection library found! Install or compile "
            "for your system following instructions here: "
            "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md"
        )  # noqa
        return

    try:
        # Display warning that PIL will be used if no OpenCV is found.
        # pylint: disable=unused-import,unused-variable
        import cv2  # noqa
    except ImportError:
        _LOGGER.warning(
            "No OpenCV library found. TensorFlow will process image with "
            "PIL at reduced resolution")

    # Set up Tensorflow graph, session, and label map to pass to processor
    # pylint: disable=no-member
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    session = tf.Session(graph=detection_graph)
    label_map = label_map_util.load_labelmap(labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    entities = []

    for camera in config[CONF_SOURCE]:
        entities.append(
            TensorFlowImageProcessor(hass, camera[CONF_ENTITY_ID],
                                     camera.get(CONF_NAME), session,
                                     detection_graph, category_index, config))

    add_entities(entities)
Exemple #25
0
def object_detection_funct(image_path, api_call=True, video_file=False):
    #initialize model graph path and labels map path
    path_name = 'model'
    path_to_frozen_det_graph = path_name + '/frozen_inference_graph.pb'
    path_to_labels = os.path.join('model', 'mscoco_label_map.pbtxt')
    num_of_classes = 90

    #load the frozen tensorflow model
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(path_to_frozen_det_graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    #load the label maps for example 1=person, 2=cat etc. etc.
    label_map = label_map_util.load_labelmap(path_to_labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=num_of_classes, use_display_name=True)
    category_map = label_map_util.create_category_index(categories)

    #actual prediction for single image
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            if video_file == False:
                image = Image.open(image_path)
                #convert image into numpy array using the function written
                image_np = load_image_into_numpy_array(image)

                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')

                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})

                #decode the predictions and convert it into a dictionary
                output_dict = {'class': classes[0], 'score': scores[0]}

                if api_call == True:
                    #return final dictionary
                    scores_predicted = output_dict['score'][
                        output_dict['score'] > 0.4]
                    classes_predicted = output_dict['class'][
                        0:len(scores_predicted)].astype(np.int32)
                    results_dict = dict()
                    for i in range(0, len(classes_predicted)):
                        results_dict[category_map[
                            classes_predicted[i]]['name']] = str(
                                round(scores_predicted[i] * 100, 2)) + " %"
                    return results_dict
                else:
                    #save image with boxes and return image path
                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_map,
                        use_normalized_coordinates=True,
                        line_thickness=8)
                    DIR = 'static/img_results'
                    num_of_files_in_dir = len([
                        name for name in os.listdir(DIR)
                        if os.path.isfile(os.path.join(DIR, name))
                    ])
                    final_image_path = DIR + '/image' + str(
                        num_of_files_in_dir) + '.png'
                    result_img = Image.fromarray(image_np, 'RGB')
                    result_img.save(final_image_path)
                    return final_image_path
            else:
                reader = imageio.get_reader(VIDEO_PATH)
                fps = reader.get_meta_data()['fps']
                VID_OP_DIR = 'static/vid_results'
                num_of_files_in_dir = len([
                    name for name in os.listdir(VID_OP_DIR)
                    if os.path.isfile(os.path.join(VID_OP_DIR, name))
                ])
                final_video_path = DIR + '/output_video' + str(
                    num_of_files_in_dir) + '.mp4'
                writer = imageio.get_writer(final_video_path, fps=fps)
                for i, frame in enumerate(reader):
                    image_np = frame
                    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                    image_np_expanded = np.expand_dims(image_np, axis=0)
                    image_tensor = detection_graph.get_tensor_by_name(
                        'image_tensor:0')

                    boxes = detection_graph.get_tensor_by_name(
                        'detection_boxes:0')
                    scores = detection_graph.get_tensor_by_name(
                        'detection_scores:0')
                    classes = detection_graph.get_tensor_by_name(
                        'detection_classes:0')
                    num_detections = detection_graph.get_tensor_by_name(
                        'num_detections:0')
                    (boxes, scores, classes, num_detections) = sess.run(
                        [boxes, scores, classes, num_detections],
                        feed_dict={image_tensor: image_np_expanded})

                    vis_util.visualize_boxes_and_labels_on_image_array(
                        image_np,
                        np.squeeze(boxes),
                        np.squeeze(classes).astype(np.int32),
                        np.squeeze(scores),
                        category_map,
                        use_normalized_coordinates=True,
                        line_thickness=8)
                    writer.append_data(image_np)
                writer.close()
                return final_video_path
Exemple #26
0
def run(video_source, path_object_model, path_encoder_model, path_labels,
        min_score_thresh, nms_max_overlap, max_cosine_distance, nn_budget,
        display, time_profile):
    """Run multi-target tracker on a particular sequence.

    Parameters
    ----------
    video_source : str
        Path to the video source to process.
    path_object_model : str
        Path to object recognition model.
    path_encoder_model : str
        Path to encoder model.
    path_labels : str
        Path to object labels.
    min_score_thresh : float
        Detection confidence threshold. Disregard 
        all detections that have a confidence lower than this value
    nms_max_overlap: float
        Maximum detection overlap (non-maxima suppression threshold).
    max_cosine_distance : float
        Gating threshold for cosine distance metric (object appearance).
    nn_budget : Optional[int]
        Maximum size of the appearance descriptor gallery. If None, no budget
        is enforced.
    display : bool
        If True, show visualization of intermediate tracking results.
    time_profile : bool
        If True, Show timing informations.
    """
    def timeit(method):
        def timed(*args, **kw):
            ts = timer()
            result = method(*args, **kw)
            te = timer()

            if time_profile:
                print('%r %2.3f sec' % (method.__name__, te - ts))
            return result

        return timed

    # Open video stream
    cap = cv2.VideoCapture(video_source)
    frame_count = 0
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Deep SORT stuff
    metric = nn_matching.NearestNeighborDistanceMetric("cosine",
                                                       max_cosine_distance,
                                                       nn_budget)
    tracker = Tracker(metric)

    if not os.path.exists(path_encoder_model):
        print("%s: No such file or directory" % path_encoder_model)
        sys.exit(1)
    encoder = generate_detections.create_box_encoder(path_encoder_model)

    # Object detection

    # ## Check if model exist otherwise download it
    OBJECT_MODEL_PATH = os.path.join(path_object_model, '')
    OBJECT_MODEL_FILE = os.path.join(OBJECT_MODEL_PATH,
                                     'frozen_inference_graph.pb')

    if not os.path.exists(OBJECT_MODEL_PATH):

        DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

        DOWNLOAD_FILE = str.split(OBJECT_MODEL_PATH, '/')[-2] + '.tar.gz'

        DOWNLOAD_TO = os.path.join(str.split(OBJECT_MODEL_PATH, '/')[0], '')

        print('Model \"%s\" not on disk' % OBJECT_MODEL_PATH)
        print('Download it from %s' % (DOWNLOAD_BASE + DOWNLOAD_FILE))

        opener = urllib.request.URLopener()
        opener.retrieve(os.path.join(DOWNLOAD_BASE, DOWNLOAD_FILE),
                        os.path.join(DOWNLOAD_TO, DOWNLOAD_FILE))

        # Extract tar the model from the tar file
        print('Extract frozen tensorflow model')
        tar_file = tarfile.open(os.path.join(DOWNLOAD_TO, DOWNLOAD_FILE))
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, DOWNLOAD_TO)

    # ## Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(OBJECT_MODEL_FILE, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
        with tf.Session() as sess:
            # Get handles to input and output tensors
            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')

            tensor_list = [
                detection_boxes, detection_scores, detection_classes,
                num_detections
            ]

    # ## Loading label map
    # Label maps map indices to category names, so that when our convolution
    # network predicts `5`, we know that this corresponds to `airplane`.
    # Here we use internal utility functions, but anything that returns a
    # dictionary mapping integers to appropriate string labels would be fine
    if not os.path.exists(path_labels):
        print("%s: No such file or directory" % path_labels)
        sys.exit(1)

    label_map = label_map_util.load_labelmap(path_labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    #
    # ## Select some category to display
    # 1 : person
    # 2 : bycicle
    # 3 : car
    # 4 : motorcicle
    # 6 : bus
    # 8 : truck
    #idx_to_keep = [1,2,3,4,6,8]
    #category_index = { i: category_index[i] for i in idx_to_keep}

    # end of initialization

    # # Detection
    @timeit
    def object_detection(image, graph):

        (boxes, scores, classes,
         num) = sess.run(tensor_list,
                         feed_dict={image_tensor: np.expand_dims(image, 0)})

        mask = scores > min_score_thresh
        classes = classes[mask]
        boxes = boxes[mask]
        scores = scores[mask]

        return (classes, boxes, scores)

    @timeit
    def extract_features(image, boxes):

        image_pil = Image.fromarray(np.uint8(image)).convert('RGB')
        im_width, im_height = image_pil.size
        detections = []

        for box in boxes:

            ymin, xmin, ymax, xmax = box
            (left, right, bottom, top) = (xmin * im_width, xmax * im_width,
                                          ymin * im_height, ymax * im_height)

            detections.append(
                np.array([left, bottom, right - left, top - bottom]))
            #scores.append(score)

        detections = np.array(detections)

        features = encoder(image, detections)

        detections = [
            Detection(bbox, 1.0, feature)
            for bbox, feature in zip(detections, features)
        ]

        # Run non-maxima suppression.
        boxes = np.array([d.tlwh for d in detections])

        scores = np.array([d.confidence for d in detections])

        indices = preprocessing.non_max_suppression(boxes, nms_max_overlap,
                                                    scores)

        detections = [detections[i] for i in indices]

        return detections

    @timeit
    def tracking(detections):
        tracker.predict()
        tracker.update(detections)
        return tracker

    @timeit
    def frame_callback():
        ret, frame_np = cap.read()

        # Resize frame of video to 1/4 size for faster face recognition processing
        #frame_np = cv2.resize(frame_np, (0, 0), fx=0.25, fy=0.25)

        # Skip bad read frames
        if not ret:
            return

        # Do things here
        if time_profile:
            t_obj_start = timer()

        # Actual detection.
        tf_classes, tf_boxes, tf_scores = object_detection(
            frame_np, detection_graph)

        # Do things here
        if time_profile:
            t_obj_stop = timer()
            t_feat_start = timer()

        detections = extract_features(frame_np, tf_boxes)

        # Update tracker.
        tracker = tracking(detections)

        for track, tf_class, tf_score in zip(tracker.tracks, tf_classes,
                                             tf_scores):

            bbox = track.to_tlbr()

            if display:

                h, w, _ = frame_np.shape
                thick = int((h + w) / 300.)

                cv2.rectangle(
                    frame_np, (int(bbox[0]), int(bbox[1])),
                    (int(bbox[2]), int(bbox[3])),
                    visualization.create_unique_color_uchar(track.track_id,
                                                            hue_step=0.41),
                    thick)
                #(255,255,255), thick)

                cv2.putText(
                    frame_np,
                    str('id: %i, class: %s, score: %.2f' %
                        (track.track_id, category_index[tf_class]['name'],
                         tf_score)), (int(bbox[0]), int(bbox[1]) - 12), 0,
                    1e-3 * h, (255, 0, 0), int(thick / 3))

                cv2.imshow('object detection',
                           cv2.resize(frame_np, (800, 450)))
                #cv2.imshow('object detection', frame_np)

    while True:

        print('Frame %i, %s' % (frame_count, datetime.now()))

        frame_callback()

        frame_count += 1

        if cv2.waitKey(10) & 0xFF == ord('q'):
            break

    cap.release()
    cv2.destroyAllWindows()
def get_label_map(PATH_TO_LABELS, NUM_CLASSES):
  print("Debug info: Inside get_label_map()")
  label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
  categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
  category_index = label_map_util.create_category_index(categories)
  return category_index
Exemple #28
0
    def __init__(self, model_name, label_file='data/mscoco_label_map.pbtxt'):
        # Initialize some variables
        print("ObjectDetector('%s', '%s')" % (model_name, label_file))
        self.process_this_frame = True

        # download model
        self.graph_file = model_name + '/' + self.GRAPH_FILE_NAME
        if not os.path.isfile(self.graph_file):
            self.download_model(model_name)

        # Load a (frozen) Tensorflow model into memory.
        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(self.graph_file, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            graph = self.detection_graph

            ops = graph.get_operations()
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            tensor_dict = {}
            for key in [
                  'num_detections', 'detection_boxes', 'detection_scores',
                  'detection_classes', 'detection_masks'
              ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = graph.get_tensor_by_name(tensor_name)

            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, 480, 640)
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)

            self.tensor_dict = tensor_dict

        self.sess = tf.Session(graph=self.detection_graph)

        # Loading label map
        # Label maps map indices to category names,
        # so that when our convolution network predicts `5`,
        # we know that this corresponds to `airplane`.
        # Here we use internal utility functions,
        # but anything that returns a dictionary mapping integers to appropriate string labels would be fine
        label_map = label_map_util.load_labelmap(label_file)
        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)
        self.output_dict = None

        self.last_inference_time = 0
Exemple #29
0
def image_input():
    image_path = './static/input_images'
    try:
        test_record_fname = './static/annotations/test.record'
        train_record_fname = './static/annotations/train.record'
        label_map_pbtxt_fname = './static/annotations/label_map.pbtxt'
        pb_fname = './static/inference_graphs/engie.pb'
        IMAGE_SIZE = (12, 8)
        PATH_TO_CKPT = pb_fname
        PATH_TO_LABELS = label_map_pbtxt_fname
        num_classes = get_num_classes(label_map_pbtxt_fname)
        assert os.path.isfile(pb_fname)
        assert os.path.isfile(PATH_TO_LABELS)
        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.compat.v1.GraphDef()
            with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
        label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=num_classes, use_display_name=True)
        category_index = label_map_util.create_category_index(categories)
        for direct, subdirect, images in os.walk(image_path):
            for i, image_name in enumerate(images):
                image = Image.open(str(direct) + '/' + str(image_name))
                image_np = load_image_into_numpy_array(image)
                image_np_expanded = np.expand_dims(image_np, axis=0)
                output_dict = run_inference_for_single_image(
                    image_np, detection_graph)
                image_res, class_name = vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    output_dict['detection_boxes'],
                    output_dict['detection_classes'],
                    output_dict['detection_scores'],
                    category_index,
                    instance_masks=output_dict.get('detection_masks'),
                    use_normalized_coordinates=True,
                    line_thickness=10)
                image_np = cv2.cvtColor(image_np, cv2.COLOR_BGR2RGB)
                if class_name == 'Critical':
                    print('Critical', image_name)
                    cv2.imwrite('./static/critical/image_' + str(i) + '.jpg',
                                image_np)
                elif class_name == 'High':
                    print('High', image_name)
                    cv2.imwrite('./static/high/image_' + str(i) + '.jpg',
                                image_np)
                elif class_name == 'Less':
                    print('less', image_name)
                    cv2.imwrite('./static/less/image_' + str(i) + '.jpg',
                                image_np)
                else:
                    pass
        return '', 204

    except Exception as e:
        print(
            "Please select another image because the current image gives an irregular array shape",
            e)
        return '', 301
def evaluate_detection_results_pascal_voc(result_lists,
                                          categories,
                                          label_id_offset=0,
                                          iou_thres=0.5,
                                          corloc_summary=False):
  """Computes Pascal VOC detection metrics given groundtruth and detections.

  This function computes Pascal VOC metrics. This function by default
  takes detections and groundtruth boxes encoded in result_lists and writes
  evaluation results to tf summaries which can be viewed on tensorboard.

  Args:
    result_lists: a dictionary holding lists of groundtruth and detection
      data corresponding to each image being evaluated.  The following keys
      are required:
        'image_id': a list of string ids
        'detection_boxes': a list of float32 numpy arrays of shape [N, 4]
        'detection_scores': a list of float32 numpy arrays of shape [N]
        'detection_classes': a list of int32 numpy arrays of shape [N]
        'groundtruth_boxes': a list of float32 numpy arrays of shape [M, 4]
        'groundtruth_classes': a list of int32 numpy arrays of shape [M]
      and the remaining fields below are optional:
        'difficult': a list of boolean arrays of shape [M] indicating the
          difficulty of groundtruth boxes. Some datasets like PASCAL VOC provide
          this information and it is used to remove difficult examples from eval
          in order to not penalize the models on them.
      Note that it is okay to have additional fields in result_lists --- they
      are simply ignored.
    categories: a list of dictionaries representing all possible categories.
      Each dict in this list has the following keys:
          'id': (required) an integer id uniquely identifying this category
          'name': (required) string representing category name
            e.g., 'cat', 'dog', 'pizza'
    label_id_offset: an integer offset for the label space.
    iou_thres: float determining the IoU threshold at which a box is considered
        correct. Defaults to the standard 0.5.
    corloc_summary: boolean. If True, also outputs CorLoc metrics.

  Returns:
    A dictionary of metric names to scalar values.

  Raises:
    ValueError: if the set of keys in result_lists is not a superset of the
      expected list of keys.  Unexpected keys are ignored.
    ValueError: if the lists in result_lists have inconsistent sizes.
  """
  # check for expected keys in result_lists
  expected_keys = [
      'detection_boxes', 'detection_scores', 'detection_classes', 'image_id'
  ]
  expected_keys += ['groundtruth_boxes', 'groundtruth_classes']
  if not set(expected_keys).issubset(set(result_lists.keys())):
    raise ValueError('result_lists does not have expected key set.')
  num_results = len(result_lists[expected_keys[0]])
  for key in expected_keys:
    if len(result_lists[key]) != num_results:
      raise ValueError('Inconsistent list sizes in result_lists')

  # Pascal VOC evaluator assumes foreground index starts from zero.
  categories = copy.deepcopy(categories)
  for idx in range(len(categories)):
    categories[idx]['id'] -= label_id_offset

  # num_classes (maybe encoded as categories)
  num_classes = max([cat['id'] for cat in categories]) + 1
  logging.info('Computing Pascal VOC metrics on results.')
  if all(image_id.isdigit() for image_id in result_lists['image_id']):
    image_ids = [int(image_id) for image_id in result_lists['image_id']]
  else:
    image_ids = range(num_results)

  evaluator = object_detection_evaluation.ObjectDetectionEvaluation(
      num_classes, matching_iou_threshold=iou_thres)

  difficult_lists = None
  if 'difficult' in result_lists and result_lists['difficult']:
    difficult_lists = result_lists['difficult']
  for idx, image_id in enumerate(image_ids):
    difficult = None
    if difficult_lists is not None and difficult_lists[idx].size:
      difficult = difficult_lists[idx].astype(np.bool)
    evaluator.add_single_ground_truth_image_info(
        image_id, result_lists['groundtruth_boxes'][idx],
        result_lists['groundtruth_classes'][idx] - label_id_offset,
        difficult)
    evaluator.add_single_detected_image_info(
        image_id, result_lists['detection_boxes'][idx],
        result_lists['detection_scores'][idx],
        result_lists['detection_classes'][idx] - label_id_offset)
  per_class_ap, mean_ap, _, _, per_class_corloc, mean_corloc = (
      evaluator.evaluate())

  metrics = {'Precision/mAP@{}IOU'.format(iou_thres): mean_ap}
  category_index = label_map_util.create_category_index(categories)
  for idx in range(per_class_ap.size):
    if idx in category_index:
      display_name = ('PerformanceByCategory/mAP@{}IOU/{}'
                      .format(iou_thres, category_index[idx]['name']))
      metrics[display_name] = per_class_ap[idx]

  if corloc_summary:
    metrics['CorLoc/CorLoc@{}IOU'.format(iou_thres)] = mean_corloc
    for idx in range(per_class_corloc.size):
      if idx in category_index:
        display_name = (
            'PerformanceByCategory/CorLoc@{}IOU/{}'.format(
                iou_thres, category_index[idx]['name']))
        metrics[display_name] = per_class_corloc[idx]
  return metrics
def main():

    PATH_TO_LABELS = r'data/sim_udacity_label_map.pbtxt'
    NUM_CLASSES = 3

    #frozen_model_file = "./models/freeze/frozen_inference_graph.pb"
    frozen_model_file = "./models/sim_freeze_tf1.3/frozen_inference_graph.pb"

    # test_img_dir = "/Users/donchan/Documents/UdaCity/MyProject/bstld/data/train/rgb/train/2015-10-05-11-26-32_bag/jpeg"
    #test_img_dir = "alex-lechner-udacity-traffic-light-dataset/udacity_testarea_rgb"
    test_img_dir = "dataset-sdcnd-capstone/data/sim_training_data/sim_data_capture"
    test_image = "left0546.jpg"
    image_path = os.path.join(test_img_dir, test_image)

    image = Image.open(image_path)
    image_np = load_image_into_numpy_array(image)

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    tfc = TrafficLightClassifier(frozen_model_file)

    boxes, scores, classes, num = tfc.get_classification(image_np)

    print("length of boxes", len(boxes))
    print(scores)
    print(classes)
    print("predicted numbers ", num)
    print("categories", categories)
    print("category index", category_index)

    IMAGE_SIZE = (8, 6)
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=5,
        line_thickness=8)
    plt.figure(figsize=IMAGE_SIZE)
    plt.imshow(image_np)
    plt.show()

    test_image = "left0030.jpg"  #RED
    image_path = os.path.join(test_img_dir, test_image)
    image = Image.open(image_path)
    image_np = load_image_into_numpy_array(image)
    boxes, scores, classes, num = tfc.get_classification(image_np)

    print("length of boxes", np.squeeze(boxes))
    print(scores)
    print(classes)
    print("predicted numbers ", num)
    print("categories", categories)
    print("category index", category_index)

    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=5,
        line_thickness=8)
    plt.figure(figsize=IMAGE_SIZE)
    plt.imshow(image_np)
    plt.show()

    test_image = "left0021.jpg"  #YELLOW
    image_path = os.path.join(test_img_dir, test_image)
    image = Image.open(image_path)
    image_np = load_image_into_numpy_array(image)
    boxes, scores, classes, num = tfc.get_classification(image_np)

    print("length of boxes", np.squeeze(boxes))
    print(scores)
    print(classes)
    print("predicted numbers ", num)
    print("categories", categories)
    print("category index", category_index)

    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        np.squeeze(boxes),
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        use_normalized_coordinates=True,
        max_boxes_to_draw=5,
        line_thickness=8)
    plt.figure(figsize=IMAGE_SIZE)
    plt.imshow(image_np)
    plt.show()
Exemple #32
0
def create_kitti_labels(output_path, label_map_path, calib_dir, image_dir,
                        image_ground_dir, graph_dir, examples):
    grid_map_data_resolution = 0.15
    grid_map_data_origin_idx = np.array([60, 30])

    label_map = label_map_util.load_labelmap(label_map_path)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=6, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    path_to_graph = graph_dir
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(path_to_graph, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            detection_boxes_3d = detection_graph.get_tensor_by_name(
                'detection_boxes_3d:0')
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')
            for idx, example in enumerate(examples):
                label_calib_name = '%06d' % (int(example)) + '.txt'
                label_name = '%06d' % (idx) + '.txt'
                output_label = os.path.join(output_path, 'label', label_name)
                path_calib = os.path.join(calib_dir, label_calib_name)
                velo_to_cam = read_calib(path_calib, 6)
                P2 = read_calib(path_calib, 3)
                R0_rect = read_calib(path_calib, 5)
                trans_image = P2.dot(R0_rect)
                image_to_velo = np.array([[0, -1, grid_map_data_origin_idx[0]],
                                          [-1, 0, grid_map_data_origin_idx[1]],
                                          [0, 0, 1]])
                image_name_hits = example + '_detections_cartesian.png'
                image_path_hits = os.path.join(image_dir, image_name_hits)
                image_name_obs = example + '_observations_cartesian.png'
                image_path_obs = os.path.join(image_dir, image_name_obs)
                image_name_int = example + '_intensity_cartesian.png'
                image_path_int = os.path.join(image_dir, image_name_int)
                image_name_zmin = example + '_z_min_detections_cartesian.png'
                image_path_zmin = os.path.join(image_dir, image_name_zmin)
                image_name_zmax = example + '_z_max_detections_cartesian.png'
                image_path_zmax = os.path.join(image_dir, image_name_zmax)
                image_name_prob = example + '_decay_rate_cartesian.png'
                image_path_prob = os.path.join(image_dir, image_name_prob)
                image_name_occ = example + '_z_max_occlusions_cartesian.png'
                image_path_occ = os.path.join(image_dir, image_name_occ)
                image_name_ground = example + '_ground_surface_cartesian.png'
                #image_path_ground = os.path.join(image_ground_dir, image_name_ground)
                image_path_ground = os.path.join(image_dir, image_name_ground)
                image_hits = cv2.imread(image_path_hits, 0)
                image_obs = cv2.imread(image_path_obs, 0)
                image_int = cv2.imread(image_path_int, 0)
                image_zmin = cv2.imread(image_path_zmin, 0)
                image_zmax = cv2.imread(image_path_zmax, 0)
                image_prob = cv2.imread(image_path_prob, 0)
                image_occ = cv2.imread(image_path_occ, 0)

                inv_image_hits = cv2.bitwise_not(image_hits)
                inv_image_obs = cv2.bitwise_not(image_obs)
                image_vis = np.stack(
                    [inv_image_hits, inv_image_obs, inv_image_hits], axis=-1)
                #image_occlusion = cv2.imread(image_path_occlusion, 0)

                for x in range(0, image_vis.shape[0]):
                    for y in range(0, image_vis.shape[1]):
                        if image_vis[x, y, 0] < 255:
                            image_vis[x, y, 0] = 0
                            image_vis[x, y, 1] = 0
                            image_vis[x, y, 2] = 0
                        elif image_vis[x, y, 1] < 255:
                            value = 255 - image_vis[x, y, 1]
                            value = value * 0.7
                            value = 255 - value
                            value = 220
                            image_vis[x, y, 0] = value
                            image_vis[x, y, 2] = value
                            image_vis[x, y, 1] = value
                image_stacked = np.stack([
                    image_hits, image_occ, image_obs, image_int, image_zmin,
                    image_zmax
                ],
                                         axis=-1)
                #image_stacked = np.stack([image_prob, image_int, image_zmin, image_zmax,
                #                          image_rgb[:,:,0], image_rgb[:,:,1], image_rgb[:,:,2]], axis=-1)
                image_ground = cv2.imread(image_path_ground, 0)
                image_np_expanded = np.expand_dims(image_stacked, axis=0)
                mask_np_expanded = np.expand_dims(image_hits, axis=0)
                mask_np_expanded = np.expand_dims(mask_np_expanded, axis=3)
                start_time = time.time()
                (boxes, boxes_3d, scores, classes,
                 num) = sess.run([
                     detection_boxes, detection_boxes_3d, detection_scores,
                     detection_classes, num_detections
                 ],
                                 feed_dict={image_tensor: image_np_expanded})
                print('Inference time:', time.time() - start_time)
                boxes_3d_np = np.squeeze(boxes_3d)
                boxes_np = np.squeeze(boxes)
                scores_np = np.squeeze(scores)
                classes_np = np.squeeze(classes)
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_vis,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    boxes_3d=np.squeeze(boxes_3d),
                    use_normalized_coordinates=True,
                    line_thickness=2,
                    max_boxes_to_draw=100,
                    skip_labels=True)
                test_image_name = 'image' + str(idx) + '.png'
                test_image_path = os.path.join(output_path, test_image_name)
                cv2.imwrite(test_image_path, image_vis)
                file_output = open(output_label, 'w')
                for i in range(scores_np.shape[0]):
                    if scores_np[i] > .3:
                        object_class = category_index[int(
                            classes_np[i])]['name']
                        box = tuple(boxes_np[i])
                        y_min = box[0] * image_stacked.shape[0]
                        x_min = box[1] * image_stacked.shape[1]
                        y_max = box[2] * image_stacked.shape[0]
                        x_max = box[3] * image_stacked.shape[1]
                        box_rot = tuple(boxes_3d_np[i])
                        x_c = box_rot[0] * image_stacked.shape[1]
                        y_c = box_rot[1] * image_stacked.shape[0]
                        w_s = box_rot[2]
                        h_s = box_rot[3]
                        sin_angle = box_rot[4]
                        cos_angle = box_rot[5]
                        angle_rad = math.atan2(sin_angle, cos_angle) / 2
                        vec_h_x = h_s * math.cos(angle_rad) / 2.0
                        vec_h_y = h_s * math.sin(angle_rad) / 2.0
                        vec_w_x = -w_s * math.sin(angle_rad) / 2.0
                        vec_w_y = w_s * math.cos(angle_rad) / 2.0
                        x1 = (x_c - vec_w_x - vec_h_x) * image_stacked.shape[1]
                        x2 = (x_c - vec_w_x + vec_h_x) * image_stacked.shape[1]
                        x3 = (x_c + vec_w_x + vec_h_x) * image_stacked.shape[1]
                        y1 = (y_c - vec_w_y - vec_h_y) * image_stacked.shape[0]
                        y2 = (y_c - vec_w_y + vec_h_y) * image_stacked.shape[0]
                        y3 = (y_c + vec_w_y + vec_h_y) * image_stacked.shape[0]
                        l = math.sqrt((x2 - x1) * (x2 - x1) + (y2 - y1) *
                                      (y2 - y1))
                        w = math.sqrt((x3 - x2) * (x3 - x2) + (y3 - y2) *
                                      (y3 - y2))
                        image_ground_box = image_ground[
                            int(round(y_min)):int(round(y_max)),
                            int(round(x_min)):int(round(x_max))]
                        mean_ground = image_ground_box.mean()
                        image_height_max_box = image_zmax[
                            int(round(y_min)):int(round(y_max)),
                            int(round(x_min)):int(round(x_max))]
                        height_max = image_height_max_box.max()
                        height_max_m = 2.6 * height_max / 255 - 2.2
                        mean_ground_m = 2.6 * mean_ground / 255 - 2.2

                        object_length_rot = l * grid_map_data_resolution
                        object_width_rot = w * grid_map_data_resolution
                        object_height = height_max_m - mean_ground_m
                        if object_class == 'Car':
                            if object_height < 1.3 or object_height > 1.9:
                                object_height = 1.56
                        object_t_rot = np.array([
                            x_c * grid_map_data_resolution,
                            y_c * grid_map_data_resolution
                        ])
                        object_t_velo_rot = image_to_velo.dot(
                            np.append(object_t_rot, 1))
                        object_t_velo_rot[2] = mean_ground_m
                        object_t_cam_rot = velo_to_cam.dot(
                            np.append(object_t_velo_rot, 1))
                        object_corners_rot = np.array(
                            [[
                                object_length_rot / 2, object_length_rot / 2,
                                -object_length_rot / 2, -object_length_rot / 2,
                                object_length_rot / 2, object_length_rot / 2,
                                -object_length_rot / 2, -object_length_rot / 2
                            ],
                             [
                                 0, 0, 0, 0, -object_height, -object_height,
                                 -object_height, -object_height
                             ],
                             [
                                 object_width_rot / 2, -object_width_rot / 2,
                                 -object_width_rot / 2, object_width_rot / 2,
                                 object_width_rot / 2, -object_width_rot / 2,
                                 -object_width_rot / 2, object_width_rot / 2
                             ], [1, 1, 1, 1, 1, 1, 1, 1]])
                        corners_to_cam = np.array([[
                            math.cos(angle_rad), 0,
                            math.sin(angle_rad), object_t_cam_rot[0]
                        ], [0, 1, 0, object_t_cam_rot[1]],
                                                   [
                                                       -math.sin(angle_rad), 0,
                                                       math.cos(angle_rad),
                                                       object_t_cam_rot[2]
                                                   ], [0, 0, 0, 1]])
                        object_corners_cam = corners_to_cam.dot(
                            object_corners_rot)
                        object_corners_image = trans_image.dot(
                            object_corners_cam)
                        object_corners_image_2d = np.array([
                            object_corners_image[0] / object_corners_image[2],
                            object_corners_image[1] / object_corners_image[2]
                        ])
                        write_labels(file_output, object_class,
                                     object_corners_image_2d, object_height,
                                     object_width_rot, object_length_rot,
                                     object_t_cam_rot, angle_rad, scores_np[i])
Exemple #33
0
def load_label_map(pbtxt_path):
    label_map = label_map_util.load_labelmap(pbtxt_path)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=6, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return category_index
Exemple #34
0
def main():

    parser = argparse.ArgumentParser(
        description="run inference by using specified model",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument('model_name', help="specify the model name")
    parser.add_argument('work_dir', help="specify the work space directory")
    parser.add_argument('--model_dir',
                        default=None,
                        help="specify the dir storing models.")

    args = parser.parse_args()

    model_dir = args.model_dir
    if model_dir is None:
        assert os.getenv('MODEL_INPUT_DIR') is not None
        model_dir = os.path.join(os.getenv('MODEL_INPUT_DIR'),
                                 'object_detection')

    model_name = args.model_name
    model_file = model_name + '.tar.gz'
    tar_file = tarfile.open(os.path.join(model_dir, model_file))
    recorded_name = model_name
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            recorded_name = file.name
            tar_file.extract(file, args.work_dir)

    PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')
    PATH_TO_CKPT = os.path.join(args.work_dir, recorded_name)
    NUM_CLASSES = 90

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name=model_name)

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    PATH_TO_TEST_IMAGES_DIR = 'test_images'
    TEST_IMAGE_PATHS = [
        os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i))
        for i in range(1, 2)
    ]

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:

            image_tensor = detection_graph.get_tensor_by_name(
                '{}/image_tensor:0'.format(model_name))
            detection_boxes = detection_graph.get_tensor_by_name(
                '{}/detection_boxes:0'.format(model_name))
            detection_scores = detection_graph.get_tensor_by_name(
                '{}/detection_scores:0'.format(model_name))
            detection_classes = detection_graph.get_tensor_by_name(
                '{}/detection_classes:0'.format(model_name))
            num_detections = detection_graph.get_tensor_by_name(
                '{}/num_detections:0'.format(model_name))

            for image_path in TEST_IMAGE_PATHS:
                image = Image.open(image_path)
                image_np = load_image_into_numpy_array(image)
                image_np_expanded = np.expand_dims(image_np, axis=0)

                options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
                run_metadata = tf.RunMetadata()

                results = sess.run([
                    detection_boxes, detection_scores, detection_classes,
                    num_detections
                ],
                                   feed_dict={image_tensor: image_np_expanded},
                                   options=options,
                                   run_metadata=run_metadata)
                cg = CompGraph(model_name,
                               run_metadata,
                               detection_graph,
                               keyword_filter="while")

                cg_tensor_dict = cg.get_tensors()
                cg_sorted_keys = sorted(cg_tensor_dict.keys())
                #cg_sorted_shape = []
                #for cg_key in cg_sorted_keys:
                #    print(cg_key)
                #    t = tf.shape(cg_tensor_dict[cg_key])
                #    cg_sorted_shape.append(t.eval(feed_dict={image_tensor: image_np_expanded},
                #                                  session=sess))

                cg_sorted_items = []
                for cg_key in cg_sorted_keys:
                    cg_sorted_items.append(tf.shape(cg_tensor_dict[cg_key]))

                cg_sorted_shape = sess.run(
                    cg_sorted_items,
                    feed_dict={image_tensor: image_np_expanded})
                cg.op_analysis(dict(zip(cg_sorted_keys, cg_sorted_shape)),
                               '{}.pickle'.format(model_name))

                print('Image: {}, number of detected: {}'.format(
                    image_path, len(results[3])))
def find_labels(image_path, image_name, stub, request, model, n):
    """
    Args:
        image_path: path dell'immagine in input
        image_name: nome dell'immagine ottenuto con la funzione time di python
        stub: viene utilizzato per la comunicazione client-server
        request: richiesta da inviare al server
        model: nome del modello di object detection, puo' essere pet model o people model
        n: numero massimo delle labels che si vogliono considerare
    """
    labels = []  # vettore con le labels del dataset specifico
    bbx = []  # vettore con le coordinate dei bounding box trovati
    request.model_spec.name = model
    result = stub.Predict(
        request,
        10.0)  # risultati della richiesta di prediction, 10 secs timeout
    classes = result.outputs[
        'detection_classes'].float_val  # id delle classi trovate, in ordine dalla classe con score piu' alto
    scores = result.outputs[
        'detection_scores'].float_val  # score delle classi,dallo score piu' alto
    #print zip(classes, scores)
    boxes = result.outputs[
        'detection_boxes'].float_val  # posizione dei bounding box
    # trasformo il vettore in modo che ogni elemento sia una quadrupla che identifica il bounding box
    boxes = np.reshape(boxes, [100, 4])

    # per salvare l'immagine con i bounding box, dobbiamo aprire l'immagine e sfruttare la libreria vis_util di tensorflow
    im = misc.imread(
        image_path)  # legge l'immagine come un array multidimensionale
    if (model == "pets_model"):
        label_map_path = "Label_maps/pets_label_map.pbtxt"  # mappa delle label
        label_map = label_map_util.load_labelmap(label_map_path)
        categories = label_map_util.convert_label_map_to_categories(
            label_map=label_map, max_num_classes=37)
    else:
        label_map_path = "Label_maps/people_label_map.pbtxt"
        label_map = label_map_util.load_labelmap(label_map_path)
        categories = label_map_util.convert_label_map_to_categories(
            label_map=label_map, max_num_classes=2)
    category_index = label_map_util.create_category_index(
        categories)  # dizionario coppie chiave ("id"), valore ("nome classe")

    # viene creato un array (img_height, img_width, 3) con i bounding box sovrapposti
    image_vis = vis_util.visualize_boxes_and_labels_on_image_array(
        im,
        boxes,
        np.squeeze(classes).astype(np.int32),
        np.squeeze(scores),
        category_index,
        max_boxes_to_draw=10,  # num max di bounding box da visualizzare
        min_score_thresh=.6,  # soglia minima dei bounding box da visualizzare
        use_normalized_coordinates=True,
        line_thickness=5)  # larghezza linea del contorno dei box

    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    port_result = sock.connect_ex(('localhost', 50070))
    client_hdfs = InsecureClient(
        'http://localhost:50070')  # client per accedere al HDFS
    if (model == "pets_model"):
        misc.imsave("Images_bbx/{}_pets.jpg".format(image_name),
                    image_vis)  # salva l'array in locale come un'immagine JPEG
        if port_result == 0:  # se l'HDFS e' connesso, vi sposto l'immagine
            client_hdfs.upload(
                '/zora-object-detection/images/{}_pets.jpg'.format(image_name),
                'Images_bbx/{}_pets.jpg'.format(image_name))
            os.remove("Images_bbx/{}_pets.jpg".format(image_name))
    else:
        misc.imsave("Images_bbx/{}_people.jpg".format(image_name), image_vis)
        if port_result == 0:
            client_hdfs.upload(
                '/zora-object-detection/images/{}_people.jpg'.format(
                    image_name), 'Images_bbx/{}_people.jpg'.format(image_name))
            os.remove("Images_bbx/{}_people.jpg".format(image_name))

    # inseriamo le labels trovate nella detection in un vettore da passare allo script obj_detection per formare la stringa
    # da far pronunciare al robot. Le coordinate del bounding box invece verranno salvate nel file log dell'HDFS.
    boxes = boxes.tolist()  # trasforma l'array multidimensionale in una lista
    for i in range(0, n):
        # considero solo le labels con uno score >= 0.6 ed escludo quelle che identificano un bounding box gia' inserito
        # con uno score piu' alto
        if (scores[i] >= 0.6 and boxes[i] not in bbx):
            bbx.append(boxes[i])
            labels.append(str(category_index[int(classes[i])]['name']))

    return labels, bbx
Exemple #36
0
def setup_platform(hass, config, add_entities, discovery_info=None):
    """Set up the TensorFlow image processing platform."""
    model_config = config.get(CONF_MODEL)
    model_dir = model_config.get(CONF_MODEL_DIR) \
        or hass.config.path('tensorflow')
    labels = model_config.get(CONF_LABELS) \
        or hass.config.path('tensorflow', 'object_detection',
                            'data', 'mscoco_label_map.pbtxt')

    # Make sure locations exist
    if not os.path.isdir(model_dir) or not os.path.exists(labels):
        _LOGGER.error("Unable to locate tensorflow models or label map")
        return

    # append custom model path to sys.path
    sys.path.append(model_dir)

    try:
        # Verify that the TensorFlow Object Detection API is pre-installed
        # pylint: disable=unused-import,unused-variable
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        import tensorflow as tf # noqa
        from object_detection.utils import label_map_util # noqa
    except ImportError:
        # pylint: disable=line-too-long
        _LOGGER.error(
            "No TensorFlow Object Detection library found! Install or compile "
            "for your system following instructions here: "
            "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa
        return

    try:
        # Display warning that PIL will be used if no OpenCV is found.
        # pylint: disable=unused-import,unused-variable
        import cv2 # noqa
    except ImportError:
        _LOGGER.warning(
            "No OpenCV library found. TensorFlow will process image with "
            "PIL at reduced resolution")

    # setup tensorflow graph, session, and label map to pass to processor
    # pylint: disable=no-member
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    session = tf.Session(graph=detection_graph)
    label_map = label_map_util.load_labelmap(labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    entities = []

    for camera in config[CONF_SOURCE]:
        entities.append(TensorFlowImageProcessor(
            hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME),
            session, detection_graph, category_index, config))

    add_entities(entities)
class GymObjectDetector(object):
    videoPath = "/home/eamonn/FYP/Videos/" \
                "videoplayback"
    rotater = IR(videoPath)
    print("Rotation: " + IR.detectRotation())
    cap = cv2.VideoCapture(videoPath)
    gymObjects = {
        'Gym_Plate': {
            'Location': '',
            'Frame': 0
        },
        'FootWear': {
            'Location': [],
            'Frame': 0
        }
    }
    MODEL_PATH = '/home/eamonn/FYP/models/research/object_detection/'
    MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
    MODEL_FILE = MODEL_PATH + MODEL_NAME + '.tar.gz'
    PATH_TO_CKPT = MODEL_PATH + 'gym_plate_inference_graph' + '/frozen_inference_graph.pb'
    PATH_TO_LABELS = os.path.join(MODEL_PATH,
                                  'training/object-detection.pbtxt')
    NUM_CLASSES = 4

    tar_file = tarfile.open(MODEL_FILE)
    for file in tar_file.getmembers():
        file_name = os.path.basename(file.name)
        if 'frozen_inference_graph.pb' in file_name:
            tar_file.extract(file, os.getcwd())

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.\
        convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            plate_detected = False
            while True:
                ret, image_np = cap.read()
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Actual detection.
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)
                if np.squeeze(scores)[0] > 0.5:
                    gymObjects['Gym_Plate']['Location'] = (
                        np.squeeze(boxes)[0])
                    gymObjects['Gym_Plate']['Frame'] = (cap.get(
                        cv2.CAP_PROP_POS_FRAMES))
                    cv2.destroyAllWindows()
                    break

    PATH_TO_CKPT = MODEL_PATH + 'FDG_inference_graph' + '/frozen_inference_graph.pb'

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            plate_detected = False
            while True:
                ret, image_np = cap.read()
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Actual detection.
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)
                if np.squeeze(scores)[0] > 0.5 and np.squeeze(classes).astype(
                        np.int32)[0] == 4:
                    gymObjects['FootWear']['Location'] = (np.squeeze(boxes)[0])
                    gymObjects['FootWear']['Frame'] = (cap.get(
                        cv2.CAP_PROP_POS_FRAMES))
                    break

    for object in ['Gym_Plate', 'FootWear']:
        normalisedCoordinates = gymObjects[object]['Location']
        if not rotater.toBeRotated:
            width = cap.get(cv2.CAP_PROP_FRAME_WIDTH)  # float
            height = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
        else:
            width = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)  # float
            height = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
        normalisedCoordinates[0] = normalisedCoordinates[0] * height
        normalisedCoordinates[1] = normalisedCoordinates[1] * width
        normalisedCoordinates[2] = normalisedCoordinates[2] * height
        normalisedCoordinates[3] = normalisedCoordinates[3] * width
        gymObjects[object]['Location'] = normalisedCoordinates
Exemple #38
0
 def _load_label_map(self):
     label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
     category_index = label_map_util.create_category_index(categories)
     return category_index
Exemple #39
0
def main():
    camera_left = cv2.VideoCapture(0)
    print(camera_left.set(cv2.CAP_PROP_FRAME_WIDTH, FRAME_WIDTH))
    print(camera_left.set(cv2.CAP_PROP_FRAME_HEIGHT, FRAME_HEIGHT))

    if tf.__version__ < '1.4.0':
        raise ImportError(
            'Please upgrade your tensorflow installation to v1.4.* or later!')

    # This is needed to display the images.
    #% matplotlib
    #inline

    # This is needed since the notebook is stored in the object_detection folder.
    sys.path.append("..")

    DATASET_PATH = '/Users/marco/Documents/Datasets/drAIver/object_detector/'

    # What model to download.
    MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
    MODEL_FILE = MODEL_NAME + '.tar.gz'
    DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_CKPT = DATASET_PATH + MODEL_NAME + '/frozen_inference_graph.pb'

    # List of the strings that is used to add correct label for each box.
    PATH_TO_LABELS = os.path.join('data', 'mscoco_label_map.pbtxt')

    NUM_CLASSES = 90

    #TODO fix download path
    # opener = urllib.request.URLopener()
    # opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
    # tar_file = tarfile.open(MODEL_FILE)
    # for file in tar_file.getmembers():
    #     file_name = os.path.basename(file.name)
    #     if 'frozen_inference_graph.pb' in file_name:
    #         tar_file.extract(file, os.getcwd())

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # For the sake of simplicity we will use only 2 images:
    # image1.jpg
    # image2.jpg
    # If you want to test the code with your images, just add path to the images to the TEST_IMAGE_PATHS.
    PATH_TO_TEST_IMAGES_DIR = DATASET_PATH + 'test_images'
    TEST_IMAGE_PATHS = [
        os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i))
        for i in range(1, 3)
    ]

    # Size, in inches, of the output images.
    IMAGE_SIZE = (12, 8)

    while True:

        if camera_left.isOpened():
            _, image_np = camera_left.read()
            #cv2.imshow("camera_left", frame_left)
            #print(frame_left.shape)+

            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            # Actual detection.
            output_dict = run_inference_for_single_image(
                image_np, detection_graph)
            # Visualization of the results of a detection.
            vis_util.visualize_boxes_and_labels_on_image_array(
                image_np,
                output_dict['detection_boxes'],
                output_dict['detection_classes'],
                output_dict['detection_scores'],
                category_index,
                instance_masks=output_dict.get('detection_masks'),
                use_normalized_coordinates=True,
                line_thickness=8)

            cv2.imshow("obj_detection", image_np)

        cv2.waitKey(1)
Exemple #40
0
def main():
    print("Creating eval directory")
    os.makedirs(OUT_PATH_EVAL_IMAGES, exist_ok=True)

    # load frozen graph in memory
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    # load label map
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    ordered_test_set = [
        i for i in sorted(
            filter(lambda a: a.endswith(".jpeg"),
                   os.listdir(PATH_TO_TEST_IMAGES_DIR)))
    ]
    print("We are going to run the inference for {} images".format(
        len(ordered_test_set)))

    for i in range(len(ordered_test_set)):
        print("running {}: {}".format(i, ordered_test_set[i]))
        im_current_path = os.path.join(PATH_TO_TEST_IMAGES_DIR,
                                       ordered_test_set[i])
        im_prev_path = im_current_path if i == 0 else os.path.join(
            PATH_TO_TEST_IMAGES_DIR, ordered_test_set[i - 1])

        current_frame = skimage.io.imread(im_current_path)
        prev_frame = skimage.io.imread(im_prev_path)
        image_s = cv2.subtract(current_frame, prev_frame)
        image_s = cv2.cvtColor(image_s, cv2.COLOR_BGR2GRAY)
        image_s = np.expand_dims(image_s, axis=2)
        four_channels_im = np.concatenate((current_frame, image_s, image_s),
                                          axis=2)

        # Image.fromarray(image_s).save("tmp.jpeg")

        out_debug_image_path = os.path.join(OUT_PATH_EVAL_IMAGES,
                                            os.path.basename(im_current_path))
        # if os.path.isfile(out_debug_image_path):
        #    continue

        # image = Image.open(image_path)
        # the array based representation of the image will be used later in order to prepare the
        # result image with boxes and labels on it.
        # image_np = load_image_into_numpy_array(image)
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_s, axis=0)
        # Actual detection.
        output_dict = run_inference_for_single_image(four_channels_im,
                                                     detection_graph)
        # Visualization of the results of a detection.

        # draw only poles

        vis_util.visualize_boxes_and_labels_on_image_array(
            current_frame,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            instance_masks=output_dict.get('detection_masks'),
            use_normalized_coordinates=True,
            line_thickness=4)
        plt.figure(figsize=IMAGE_SIZE)
        plt.imshow(current_frame)

        # draw_court_lines_from_detections(image_np, output_dict['detection_boxes'],
        #                                 output_dict['detection_classes'],
        #                                 output_dict['detection_scores'])

        Image.fromarray(current_frame).save(out_debug_image_path)
def run_inference(image_path, output_filename):

    PATH_TO_CKPT = Config.PATH_FROZEN_INFERENCE_GRAPH
    PATH_TO_LABELS = 'label_map.pbtxt'
    NUM_CLASSES = 1

    ## Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    def load_image_into_numpy_array(image):
        (im_width, im_height) = image.size
        return np.array(image.getdata()).reshape(
            (im_height, im_width, 3)).astype(np.uint8)

    # add path to the images to the TEST_IMAGE_PATHS.
    PATH_TO_TEST_IMAGES_DIR = 'object_detection/test_images'
    TEST_IMAGE_PATHS = [
        os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i))
        for i in range(10, 11)
    ]

    # Size, in inches, of the output images.
    IMAGE_SIZE = (10, 8)

    def run_inference_for_single_image(image, graph):
        with graph.as_default():
            with tf.Session() as sess:
                # Get handles to input and output tensors
                ops = tf.get_default_graph().get_operations()
                all_tensor_names = {
                    output.name
                    for op in ops for output in op.outputs
                }
                tensor_dict = {}
                for key in [
                        'num_detections', 'detection_boxes',
                        'detection_scores', 'detection_classes',
                        'detection_masks'
                ]:
                    tensor_name = key + ':0'
                    if tensor_name in all_tensor_names:
                        tensor_dict[key] = tf.get_default_graph(
                        ).get_tensor_by_name(tensor_name)
                if 'detection_masks' in tensor_dict:
                    # The following processing is only for single image
                    detection_boxes = tf.squeeze(
                        tensor_dict['detection_boxes'], [0])
                    detection_masks = tf.squeeze(
                        tensor_dict['detection_masks'], [0])
                    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                    real_num_detection = tf.cast(
                        tensor_dict['num_detections'][0], tf.int32)
                    detection_boxes = tf.slice(detection_boxes, [0, 0],
                                               [real_num_detection, -1])
                    detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                               [real_num_detection, -1, -1])
                    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                        detection_masks, detection_boxes, image.shape[0],
                        image.shape[1])
                    detection_masks_reframed = tf.cast(
                        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                    # Follow the convention by adding back the batch dimension
                    tensor_dict['detection_masks'] = tf.expand_dims(
                        detection_masks_reframed, 0)

                image_tensor = tf.get_default_graph().get_tensor_by_name(
                    'image_tensor:0')

                # Run inference
                output_dict = sess.run(
                    tensor_dict,
                    feed_dict={image_tensor: np.expand_dims(image, 0)})

                # all outputs are float32 numpy arrays, so convert types as appropriate
                output_dict['num_detections'] = int(
                    output_dict['num_detections'][0])
                output_dict['detection_classes'] = output_dict[
                    'detection_classes'][0].astype(np.uint8)
                output_dict['detection_boxes'] = output_dict[
                    'detection_boxes'][0]
                output_dict['detection_scores'] = output_dict[
                    'detection_scores'][0]
                if 'detection_masks' in output_dict:
                    output_dict['detection_masks'] = output_dict[
                        'detection_masks'][0]
            return output_dict

    image = Image.open(image_path)
    # the array based representation of the image will be used later in order to prepare the
    # result image with boxes and labels on it.
    image_np = load_image_into_numpy_array(image)
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    # Actual detection.
    output_dict = run_inference_for_single_image(image_np, detection_graph)
    # Visualization of the results of a detection.
    vis_util.visualize_boxes_and_labels_on_image_array(
        image_np,
        output_dict['detection_boxes'],
        output_dict['detection_classes'],
        output_dict['detection_scores'],
        category_index,
        instance_masks=output_dict.get('detection_masks'),
        use_normalized_coordinates=True,
        line_thickness=8)

    #print(output_dict['detection_scores'])
    final_score = np.squeeze(output_dict['detection_scores'])
    count = 0
    for i in range(100):
        if final_score[i] > 0.5:
            count = count + 1
    print "coconut count = %d" % count
    plt.figure(figsize=IMAGE_SIZE)
    plt.imshow(image_np)
    #print(image.filename)
    plt.savefig('static/out/' + output_filename, bbox_inches='tight')
    output_dict = None
    return count