Beispiel #1
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
    tf.gfile.Copy(FLAGS.pipeline_config_path,
                  os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                  overwrite=True)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        eval_config_path=FLAGS.eval_config_path,
        eval_input_config_path=FLAGS.input_config_path)
    for name, config in [('model.config', FLAGS.model_config_path),
                         ('eval.config', FLAGS.eval_config_path),
                         ('input.config', FLAGS.input_config_path)]:
      tf.gfile.Copy(config,
                    os.path.join(FLAGS.eval_dir, name),
                    overwrite=True)

  model_config = configs['model']
  eval_config = configs['eval_config']
  input_config = configs['eval_input_config']
  if FLAGS.eval_training_data:
    input_config = configs['train_input_config']

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=False)

  def get_next(config):
    return dataset_util.make_initializable_iterator(
        dataset_builder.build(config)).get_next()

  create_input_dict_fn = functools.partial(get_next, input_config)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  graph_rewriter_fn = None
  if 'graph_rewriter_config' in configs:
    graph_rewriter_fn = graph_rewriter_builder.build(
        configs['graph_rewriter_config'], is_training=False)

  evaluator.evaluate(
      create_input_dict_fn,
      model_fn,
      eval_config,
      categories,
      FLAGS.checkpoint_dir,
      FLAGS.eval_dir,
      graph_hook_fn=graph_rewriter_fn)
Beispiel #2
0
    def __init__(self):
        logger.info('Loading Tensorflow Detection API')

        weights_path = get_file(config.SSD_INCEPTION_FILENAME, config.SSD_INCEPTION_URL,
                                cache_dir=os.path.abspath(config.WEIGHT_PATH),
                                cache_subdir='models')

        extract_path = weights_path.replace('.tar.gz', '')
        if not os.path.exists(extract_path):
            tar = tarfile.open(weights_path, "r:gz")
            tar.extractall(path=os.path.join(config.WEIGHT_PATH, 'models'))
            tar.close()
        pb_path = os.path.join(extract_path, self.PB_NAME)

        self.graph = tf.Graph()
        with self.graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(pb_path, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        self.label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
        self.categories = label_map_util.convert_label_map_to_categories(self.label_map,
                                                                         max_num_classes=self.NUM_CLASSES,
                                                                         use_display_name=True)
        self.category_index = label_map_util.create_category_index(self.categories)
def read_data_and_evaluate(input_config, eval_config):
  """Reads pre-computed object detections and groundtruth from tf_record.

  Args:
    input_config: input config proto of type
      object_detection.protos.InputReader.
    eval_config: evaluation config proto of type
      object_detection.protos.EvalConfig.

  Returns:
    Evaluated detections metrics.

  Raises:
    ValueError: if input_reader type is not supported or metric type is unknown.
  """
  if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
    input_paths = input_config.tf_record_input_reader.input_path

    label_map = label_map_util.load_labelmap(input_config.label_map_path)
    max_num_classes = max([item.id for item in label_map.item])
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes)

    object_detection_evaluators = evaluator.get_evaluators(
        eval_config, categories)
    # Support a single evaluator
    object_detection_evaluator = object_detection_evaluators[0]

    skipped_images = 0
    processed_images = 0
    for input_path in _generate_filenames(input_paths):
      tf.logging.info('Processing file: {0}'.format(input_path))

      record_iterator = tf.python_io.tf_record_iterator(path=input_path)
      data_parser = tf_example_parser.TfExampleDetectionAndGTParser()

      for string_record in record_iterator:
        tf.logging.log_every_n(tf.logging.INFO, 'Processed %d images...', 1000,
                               processed_images)
        processed_images += 1

        example = tf.train.Example()
        example.ParseFromString(string_record)
        decoded_dict = data_parser.parse(example)

        if decoded_dict:
          object_detection_evaluator.add_single_ground_truth_image_info(
              decoded_dict[standard_fields.DetectionResultFields.key],
              decoded_dict)
          object_detection_evaluator.add_single_detected_image_info(
              decoded_dict[standard_fields.DetectionResultFields.key],
              decoded_dict)
        else:
          skipped_images += 1
          tf.logging.info('Skipped images: {0}'.format(skipped_images))

    return object_detection_evaluator.evaluate()

  raise ValueError('Unsupported input_reader_config.')
Beispiel #4
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  if FLAGS.pipeline_config_path:
    configs = config_util.get_configs_from_pipeline_file(
        FLAGS.pipeline_config_path)
  else:
    configs = config_util.get_configs_from_multiple_files(
        model_config_path=FLAGS.model_config_path,
        eval_config_path=FLAGS.eval_config_path,
        eval_input_config_path=FLAGS.input_config_path)

  pipeline_proto = config_util.create_pipeline_proto_from_configs(configs)
  config_text = text_format.MessageToString(pipeline_proto)
  tf.gfile.MakeDirs(FLAGS.eval_dir)
  with tf.gfile.Open(os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                     'wb') as f:
    f.write(config_text)

  model_config = configs['model']
  lstm_config = configs['lstm_model']
  eval_config = configs['eval_config']
  input_config = configs['eval_input_config']

  if FLAGS.eval_training_data:
    input_config.external_input_reader.CopyFrom(
        configs['train_input_config'].external_input_reader)
    lstm_config.eval_unroll_length = lstm_config.train_unroll_length

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      lstm_config=lstm_config,
      is_training=False)

  def get_next(config, model_config, lstm_config, unroll_length):
    return seq_dataset_builder.build(config, model_config, lstm_config,
                                     unroll_length)

  create_input_dict_fn = functools.partial(get_next, input_config, model_config,
                                           lstm_config,
                                           lstm_config.eval_unroll_length)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
                     FLAGS.checkpoint_dir, FLAGS.eval_dir)
  def test_load_bad_label_map(self):
    label_map_string = """
      item {
        id:0
        name:'class that should not be indexed at zero'
      }
      item {
        id:2
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    with self.assertRaises(ValueError):
      label_map_util.load_labelmap(label_map_path)
  def __init__(self):
    self.detection_graph = tf.Graph()
    with self.detection_graph.as_default():
      od_graph_def = tf.GraphDef()
      with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    with self.detection_graph.as_default():
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
      ]:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
              tensor_name)
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    self.tensor_dict = tensor_dict
    self.image_tensor = image_tensor
    self.label_map = label_map
    self.category_index = category_index
    self.session = tf.Session(graph=self.detection_graph)
Beispiel #7
0
def main(unused_argv):
  assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'
  if FLAGS.pipeline_config_path:
    model_config, eval_config, input_config = get_configs_from_pipeline_file()
  else:
    model_config, eval_config, input_config = get_configs_from_multiple_files()

  model_fn = functools.partial(
      model_builder.build,
      model_config=model_config,
      is_training=False)

  create_input_dict_fn = functools.partial(
      input_reader_builder.build,
      input_config)

  label_map = label_map_util.load_labelmap(input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(
      label_map, max_num_classes)

  evaluator.evaluate(create_input_dict_fn, model_fn, eval_config, categories,
                     FLAGS.checkpoint_dir, FLAGS.eval_dir)
Beispiel #8
0
    # python infer1.py --model_file /media/htic/NewVolume1/murali/Object_detection/models/research/models/model_glaucoma/graph/frozen_inference_graph.pb --input_path /media/htic/Balamurali/Sharath/Gl_challenge/REFUGE-Validation400/360_572 --inp_img_ext jpg --output_path /media/htic/Balamurali/Sharath/Gl_challenge/REFUGE-Validation400/360_572_out/ --label_file /media/htic/NewVolume1/murali/Object_detection/models/research/data/glaucoma_label_map.pbtxt

    # Argument parsed and assigned 
    opt = parser.parse_args()
    model_file  = opt.model_file
    inp_img_ext = opt.inp_img_ext
    label_file  = opt.label_file
    detection_out_path = opt.output_path
    input_path  = opt.input_path 
    NUM_CLASSES = 1
    print (vars(opt))

    val_img_path = os.path.join(input_path ,'*.' + inp_img_ext)
    img_paths = glob.glob(val_img_path)
    
    label_map = label_map_util.load_labelmap(label_file)
    categories = label_map_util.convert_label_map_to_categories(label_map,max_num_classes=NUM_CLASSES,use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Initializing the graph
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_file,'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def,name='')

    with detection_graph.as_default():
        with tf.Session() as sess:
            ops = tf.get_default_graph().get_operations()
def create_category_index(labels_path, max_classes):
  """Create a index from category id to name"""
  label_map = label_map_util.load_labelmap(labels_path)
  categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=max_classes, use_display_name=True)
  category_index = label_map_util.create_category_index(categories)
  return category_index
PATH_TO_FROZEN_GRAPH = "E:/SOFTWARES/Object Detection/ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb"

detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

# Loading label map
# Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds
# to `airplane`. Here we use internal utility functions,but anything that returns a dictionary mapping integers
# to appropriate string labels would be fine
NUM_CLASSES = 90
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

# To Start Live Cam Object Detection
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        while True:
            ret, image_np = cap.read()
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
Beispiel #11
0
def facedet_objdet_as_service():
    Logger.info("Running face and object detection as a service...")

    # networking
    global sock
    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    try:
        sock.bind((HOST, PORT))
    except socket.error as msg:
        Logger.error("Socket bind failed. Error code : " + str(msg[0]) +
                     ", message " + msg[1])
        return
    sock.listen(10)

    # load models
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True

    g_facedet = tf.Graph()
    g_objdet = tf.Graph()
    g_facenet = tf.Graph()

    with g_facedet.as_default():
        Logger.debug("Loading face detection model: " + PATH_TO_FACEDET_MODEL)
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FACEDET_MODEL, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
        image_tensor_face = g_facedet.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        boxes_tensor_face = g_facedet.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        scores_tensor_face = g_facedet.get_tensor_by_name('detection_scores:0')
        classes_tensor_face = g_facedet.get_tensor_by_name(
            'detection_classes:0')
        num_detections_tensor_face = g_facedet.get_tensor_by_name(
            'num_detections:0')

    with g_objdet.as_default():
        Logger.debug("Loading object detection model: " + PATH_TO_OBJDET_MODEL)
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_OBJDET_MODEL, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
        # Definite input and output Tensors for detection_graph
        image_tensor_obj = g_objdet.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes_obj = g_objdet.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores_obj = g_objdet.get_tensor_by_name(
            'detection_scores:0')
        detection_classes_obj = g_objdet.get_tensor_by_name(
            'detection_classes:0')
        num_detections_obj = g_objdet.get_tensor_by_name('num_detections:0')

    with g_facenet.as_default():
        Logger.debug("Loading feature extraction model: " +
                     PATH_TO_FACENET_MODEL)
        facenet.load_model(PATH_TO_FACENET_MODEL)
        faces_placeholder = g_facenet.get_tensor_by_name('input:0')
        embeddings = g_facenet.get_tensor_by_name('embeddings:0')
        phase_train_placeholder = g_facenet.get_tensor_by_name('phase_train:0')
        embedding_size = embeddings.get_shape()[1]
        Logger.debug("Loading face classifier: " + PATH_TO_FACENET_CLASSIFIER)
        with open(PATH_TO_FACENET_CLASSIFIER, 'rb') as infile:
            (facenet_model, facenet_class_names) = pickle.load(infile)

    label_map = label_map_util.load_labelmap(PATH_TO_OBJDET_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES_OBJDET, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    ''' Server '''
    while server_flag:
        Logger.info('Waiting for connections...')
        try:
            conn, addr = sock.accept()
        except socket.error as msg:
            break

        start_time = time.time()
        img_data = []
        try:
            operation = conn.recv(32).decode()
            if operation == "train":
                conn.send("OK".encode())
                try:
                    tokens = conn.recv(32).decode().split(' ')
                    no_objects = int(tokens[0])
                    no_images = int(tokens[1])
                except Exception as e:
                    conn.send("Error with integer values: " + str(e))
                    continue
                conn.send("OK".encode())
                Logger.info(
                    "Request to perform training on {} aliases with {} images each."
                    .format(no_objects, no_images))
                for i in range(no_objects):
                    alias = conn.recv(32).decode().replace(" ", "_")
                    Logger.debug("Alias: " + alias)
                    folder = PATH_TO_LFW_IMAGES + "/" + alias
                    if os.path.exists(folder):
                        os.rmdir(folder)
                    os.makedirs(folder)
                    conn.send("OK".encode())
                    img_data, _ = Connection.download_images(conn,
                                                             in_mem=True,
                                                             close_conn=False)
                    n = 0
                    for img_buf in img_data:
                        img = cv2.imdecode(
                            np.asarray(bytearray(img_buf), dtype=np.uint8), 0)
                        cv2.imwrite(folder + "/" + alias + str(n) + ".png",
                                    img)

                dataset = facenet.get_dataset(PATH_TO_LFW_IMAGES)
                paths, labels = facenet.get_image_paths_and_labels(dataset)
                Logger.debug('Number of classes: %d' % len(dataset))
                Logger.debug('Number of images: %d' % len(paths))
                nrof_images = len(paths)
                nrof_batches_per_epoch = int(
                    math.ceil(1.0 * nrof_images / TRAIN_BATCH_SIZE))
                emb_array = np.zeros((nrof_images, embedding_size))
                with tf.Session(config=config, graph=g_facenet) as sess:
                    for i in range(nrof_batches_per_epoch):
                        start_index = i * TRAIN_BATCH_SIZE
                        end_index = min((i + 1) * TRAIN_BATCH_SIZE,
                                        nrof_images)
                        paths_batch = paths[start_index:end_index]
                        images = facenet.load_data(paths_batch, False, False,
                                                   TRAIN_IMG_SIZE)
                        feed_dict = {
                            faces_placeholder: images,
                            phase_train_placeholder: False
                        }
                        emb_array[start_index:end_index, :] = sess.run(
                            embeddings, feed_dict=feed_dict)
                model = SVC(kernel='linear', probability=True)
                model.fit(emb_array, labels)
                class_names = [cls.name.replace('_', ' ') for cls in dataset]
                with open(PATH_TO_FACENET_CLASSIFIER, 'wb') as outfile:
                    pickle.dump((model, class_names), outfile)
                Logger.debug('Saved classifier model to file "%s"' %
                             PATH_TO_FACENET_CLASSIFIER)

            elif operation == "objdet" or operation == "facerec":
                conn.send("OK".encode())
                img_data, _, img_ids = Connection.download_images(
                    conn, in_mem=True, close_conn=False)
                pprint(img_ids)
            else:
                conn.send("No such operation".encode())
        except Exception as e:
            Logger.error(str(e))
        Logger.info('Images download took {} s'.format(time.time() -
                                                       start_time))
        Logger.info('Received {} images'.format(len(img_data)))

        send_buf = ""
        start_time = time.time()
        images = []
        for i in range(len(img_data)):
            img_buf = img_data[i]
            if img_buf is None or len(img_buf) == 0:
                continue
            img = cv2.imdecode(np.asarray(bytearray(img_buf), dtype=np.uint8),
                               0)
            if img.ndim < 2:
                Logger.error('Unable to align image')
                continue
            elif img.ndim == 2:
                img = facenet.to_rgb(img)
            elif len(img.shape) > 2 and img.shape[2] == 4:
                img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)

            [h, w] = np.asarray(img.shape)[0:2]

            if operation == "objdet":
                image_np_expanded = np.expand_dims(img, axis=0)
                images.append(image_np_expanded)
            elif operation == "facerec":
                scaled = misc.imresize(img, (160, 160), interp='bilinear')
                prew = facenet.prewhiten(scaled)
                images.append(prew)

        if operation == "objdet":
            for i in range(len(images)):
                image_np_expanded = images[i]
                img_id = img_ids[i]
                # Face detection - running in both cases
                with tf.Session(config=config, graph=g_facedet) as sess:
                    np.random.seed(777)
                    (bounding_boxes, scores, classes,
                     num_detections) = sess.run(
                         [
                             boxes_tensor_face, scores_tensor_face,
                             classes_tensor_face, num_detections_tensor_face
                         ],
                         feed_dict={image_tensor_face: image_np_expanded})
                    bounding_boxes = np.squeeze(bounding_boxes)
                    scores = np.squeeze(scores)
                    for i in range(bounding_boxes.shape[0]):
                        if scores[i] < FACEDET_THRESH:
                            continue
                        send_buf = send_buf + "face;{};{};{};{};{}:".format(
                            img_id, bounding_boxes[i, 1], bounding_boxes[i, 0],
                            bounding_boxes[i, 3], bounding_boxes[i, 2])

                # Object detection
                with tf.Session(config=config, graph=g_objdet) as sess:
                    np.random.seed(777)
                    (bounding_boxes, scores, classes,
                     num_detections) = sess.run(
                         [
                             detection_boxes_obj, detection_scores_obj,
                             detection_classes_obj, num_detections_obj
                         ],
                         feed_dict={image_tensor_obj: image_np_expanded})
                    bounding_boxes = np.squeeze(bounding_boxes)
                    scores = np.squeeze(scores)
                    classes = np.squeeze(classes)
                    for i in range(bounding_boxes.shape[0]):
                        if scores[i] < FACEDET_THRESH:
                            continue
                        send_buf = send_buf + "{};{};{};{};{};{}:".format(
                            category_index[int(classes[i])]['name'], img_id,
                            bounding_boxes[i, 1], bounding_boxes[i, 0],
                            bounding_boxes[i, 3], bounding_boxes[i, 2])

        elif operation == "facerec":
            with tf.Session(config=config, graph=g_facenet) as sess:
                feed_dict = {
                    faces_placeholder: images,
                    phase_train_placeholder: False
                }
                emb_array = np.zeros((len(img_data), embedding_size))
                emb_array[:] = sess.run(embeddings, feed_dict=feed_dict)
                predictions = facenet_model.predict_proba(emb_array)
                best_class_indices = np.argmax(predictions, axis=1)
                best_class_probabilities = predictions[
                    np.arange(len(best_class_indices)), best_class_indices]
                for i in range(len(best_class_indices)):
                    send_buf = send_buf + "%d;%s;%.3f:" % (
                        i, facenet_class_names[best_class_indices[i]],
                        best_class_probabilities[i])

        send_buf = send_buf + "END"
        Logger.info('Operation {} took {} s'.format(operation,
                                                    time.time() - start_time))
        Logger.debug('Sending: ' + send_buf)
        conn.send(str.encode(send_buf))

    sock.close()
    Logger.info("Service stopped.")
Beispiel #12
0
	size = img.size
	ratio = float(640) / max(size)
	new_image_size = tuple([int(x*ratio) for x in size])
	img = img.resize(new_image_size, Image.ANTIALIAS)
	new_im = Image.new("RGB", (640, 640))
	new_im.paste(img, ((640-new_image_size[0])//2, (640-new_image_size[1])//2))
	return np.asarray(new_im)


# gpu veya cpu belirleyin. once sess.list_devices() ile uygun olanlara bakabilirsiniz.
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '1'
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'
title = "MURAT_EREN"

label_map = label_map_util.load_labelmap('v3/label_map.pbtxt')
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=14, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

detection_graph = tf.Graph()
with detection_graph.as_default():
	od_graph_def = tf.GraphDef()
	with tf.gfile.GFile('v3/201725.pb', 'rb') as fid:
		serialized_graph = fid.read()
		od_graph_def.ParseFromString(serialized_graph)
		tf.import_graph_def(od_graph_def, name='')

cap 	= cv2.VideoCapture(str(sys.argv[1]))
xwidth = cap.get(cv2.CAP_PROP_FRAME_WIDTH)
xheight = cap.get(cv2.CAP_PROP_FRAME_HEIGHT)
Beispiel #13
0
 def _load_label_map(self):
     label_map = label_map_util.load_labelmap(self.PATH_TO_LABELS)
     categories = label_map_util.convert_label_map_to_categories(
         label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
     category_index = label_map_util.create_category_index(categories)
     return category_index
Beispiel #14
0
def function1():
    
    import pyttsx3
    import pytesseract
    import numpy as np
    import os
    import six.moves.urllib as urllib
    import sys
    import tarfile
    import tensorflow as tf
    import zipfile
    from collections import defaultdict
    from io import StringIO
    from matplotlib import pyplot as plt
    from PIL import Image
    sys.path.append("..")
    pytesseract.pytesseract.tesseract_cmd = 'C:/Program Files (x86)/Tesseract-OCR/tesseract'
    from object_detection.utils import ops as utils_ops
    if tf.__version__ < '1.4.0':
      raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')
    #get_ipython().run_line_magic('matplotlib', 'inline')
    from object_detection.utils import label_map_util
    from object_detection.utils import visualization_utils as vis_util

    import cv2

    # Capture video from file
    """cap = cv2.VideoCapture(0)

    while True:

        ret, frame = cap.read()

        if ret == True:

            #gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

            cv2.imshow('frame',frame)


            if cv2.waitKey(30) & 0xFF == ord('q'):
                cv2.imwrite('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/test_images/image21.jpg',frame)
                break

        else:
            break

    cap.release()
    cv2.destroyAllWindows()
    """



    MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
    MODEL_FILE = MODEL_NAME + '.tar.gz'
    DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'
    PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'
    PATH_TO_LABELS = os.path.join('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/data', 'mscoco_label_map.pbtxt')
    NUM_CLASSES = 90
    tar_file = tarfile.open(MODEL_FILE)
    for file in tar_file.getmembers():
      file_name = os.path.basename(file.name)
      if 'frozen_inference_graph.pb' in file_name:
        tar_file.extract(file, os.getcwd())


    detection_graph = tf.Graph()
    with detection_graph.as_default():
      od_graph_def = tf.GraphDef()
      with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
        
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)




    def load_image_into_numpy_array(image):
      (im_width, im_height) = image.size
      return np.array(image.getdata()).reshape(
          (im_height, im_width, 3)).astype(np.uint8)

    PATH_TO_TEST_IMAGES_DIR = 'C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/test_images'
    TEST_IMAGE_PATHS = [ os.path.join(PATH_TO_TEST_IMAGES_DIR, 'image{}.jpg'.format(i)) for i in range(90, 91) ]

    # Size, in inches, of the output images.
    IMAGE_SIZE = (12, 8)




    def run_inference_for_single_image(image, graph):
      with graph.as_default():
        with tf.Session() as sess:
          # Get handles to input and output tensors
          ops = tf.get_default_graph().get_operations()
          all_tensor_names = {output.name for op in ops for output in op.outputs}
          tensor_dict = {}
          for key in [
              'num_detections', 'detection_boxes', 'detection_scores',
              'detection_classes', 'detection_masks'
          ]:
            tensor_name = key + ':0'
            if tensor_name in all_tensor_names:
              tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                  tensor_name)
          if 'detection_masks' in tensor_dict:
            # The following processing is only for single image
            detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
            detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
            detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, image.shape[0], image.shape[1])
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            # Follow the convention by adding back the batch dimension
            tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)
          image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

          # Run inference
          output_dict = sess.run(tensor_dict,
                                 feed_dict={image_tensor: np.expand_dims(image, 0)})

          # all outputs are float32 numpy arrays, so convert types as appropriate
          output_dict['num_detections'] = int(output_dict['num_detections'][0])
          output_dict['detection_classes'] = output_dict[
              'detection_classes'][0].astype(np.uint8)
          output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
          output_dict['detection_scores'] = output_dict['detection_scores'][0]
          if 'detection_masks' in output_dict:
            output_dict['detection_masks'] = output_dict['detection_masks'][0]
      return output_dict



    with detection_graph.as_default():
      with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name('detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name('detection_classes:0')
        num_detections = detection_graph.get_tensor_by_name('num_detections:0')
        for image_path in TEST_IMAGE_PATHS:
            image = Image.open(image_path)
              # the array based representation of the image will be used later in order to prepare the
              # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            image_np_expanded = np.expand_dims(image_np, axis=0)
            
            (boxes, scores, classes, num) = sess.run(
                [detection_boxes, detection_scores, detection_classes, num_detections],
                feed_dict={image_tensor: image_np_expanded})
            
            
            li = [category_index.get(value).get('name') for index,value in enumerate(classes[0]) if scores[0,index] > 0.5]
            li1=[]
            for i in li:
                if i not in li1:
                    li1.append(i)
            
            
            str1 = " ".join(li1)
            print("Object Detected - ",str1)
            #print(str1)
            engine = pyttsx3.init()
            engine.say(str1)
            engine.runAndWait()
            #myobj = gTTS(text=str1, lang=language, slow=False)
            #myobj.save("welcome.mp3")
            #os.system("welcome.mp3")
            
            
            output_dict = run_inference_for_single_image(image_np, detection_graph)
            vis_util.visualize_boxes_and_labels_on_image_array(
            image_np,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            instance_masks=output_dict.get('detection_masks'),
            use_normalized_coordinates=True,
            line_thickness=8)
            #plt.figure(figsize=IMAGE_SIZE)
            cv2.imshow('image',image_np)
            cv2.imwrite('image.jpg',image_np)
            #plt.show(image_np)
            
    DEBUG=0
    global img_x,img_y
    image = cv2.imread('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/object_detection/test_images/image90.jpg')
    #im=Image.open('start.jpg')
    img_y = len(image)
    img_x = len(image[0])
    #print(img_x,img_y)
    #w2,h2=im.size
    #print(w2,h2)
    gray = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
    #cv2.imshow('gray', gray)
    #cv2.waitKey(0)
    def ii(xx, yy):
        #global image, img_y, img_x
        if yy >= img_y or xx >= img_x:
            #print "pixel out of bounds ("+str(y)+","+str(x)+")"
            return 0
        pixel = image[yy][xx]
        return 0.30 * pixel[2] + 0.59 * pixel[1] + 0.11 * pixel[0]
    def connected(contour):
        first = contour[0][0]
        last = contour[len(contour) - 1][0]
        return abs(first[0] - last[0]) <= 1 and abs(first[1] - last[1]) <= 1

    """def keep(contour):
        return keep_box(contour) #and connected(contour)

    """

    # Whether we should keep the containing box of this
    # contour based on it's shape
    """def keep_box(contour):
        xx, yy, w_, h_ = cv2.boundingRect(contour)

        # width and height need to be floats
        w_ *= 1.0
        h_ *= 1.0

        # Test it's shape - if it's too oblong or tall it's
        # probably not a real character
        if w_ / h_ < 0.1 or w_ / h_ > 10:
            if DEBUG:
                print "\t Rejected because of shape: (" + str(xx) + "," + str(yy) + "," + str(w_) + "," + str(h_) + ")" + \
                      str(w_ / h_)
            return False
        
        # check size of the box
        if ((w_ * h_) > ((img_x * img_y) / 5)) or ((w_ * h_) < 15):
            if DEBUG:
                print "\t Rejected because of size"
            return False

        return True"""


    #binary
    ret,thresh = cv2.threshold(gray,127,255,cv2.THRESH_BINARY_INV)
    #cv2.imshow('second', thresh)
    #cv2.waitKey(0)
     
    #dilation
    kernel = np.ones((1,1), np.uint8)
    img_dilation = cv2.dilate(thresh, kernel, iterations=1)
    #ret1,thresh1 = cv2.threshold(img_dilation,10,255,cv2.THRESH_OTSU)
    #cv2.imshow('dilated', img_dilation)

    img1=cv2.bitwise_not(img_dilation)
    #cv2.imshow('inv', img1)
    #cv2.waitKey(0)
    #find contours

    im2,ctrs, hier = cv2.findContours(img_dilation.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    #print ctrs,hier
    #sort contours
    sorted_ctrs = sorted(ctrs, key=lambda ctr: cv2.boundingRect(ctr)[0])
    keepers=[]
    #print(sorted_ctrs)
    for i, ctr in enumerate(sorted_ctrs):
        """if(i==len(sorted_ctrs)-5):
            break;"""
        #print(ctr)
        # Get bounding box
        #if (w > 15 and h > 15):
        
        x, y, w, h = cv2.boundingRect(ctr)
        if((w<img_x-8) and (h<img_y-8))and (w > 5 and h > 5):
            keepers.append([ctr, [x, y, w, h]])
            #print(x,y,w,h)
            roi = image[y:y+h, x:x+w]
            cv2.rectangle(image,(x,y),( x + (w), y + (h) ),(0,255,0),2)
            if(w > 5 and h > 5):
                cv2.imwrite('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/pics/{}.png'.format(i), roi)
    #ret2,thresh2 = cv2.threshold(image,10,255,cv2.THRESH_OTSU)
    new_image = im2.copy()
    new_image.fill(255)
    #cv2.imwrite("C:/Users/hi/AppData/Local/Programs/Python/Python36/models/filledge.jpg",new_image)
    boxes = []

    # For each box, find the foreground and background intensities
    for index_, (contour_, box) in enumerate(keepers):

        # Find the average intensity of the edge pixels to
        # determine the foreground intensity
        fg_int = 0.0
        for p in contour_:
            fg_int += ii(p[0][0], p[0][1])

        fg_int /= len(contour_)
        if DEBUG:
            print("FG Intensity for #%d = %d" % (index_, fg_int))

        # Find the intensity of three pixels going around the
        # outside of each corner of the bounding box to determine
        # the background intensity
        x_, y_, width, height = box
        bg_int = \
            [
                # bottom left corner 3 pixels
                ii(x_ - 1, y_ - 1),
                ii(x_ - 1, y_),
                ii(x_, y_ - 1),

                # bottom right corner 3 pixels
                ii(x_ + width + 1, y_ - 1),
                ii(x_ + width, y_ - 1),
                ii(x_ + width + 1, y_),

                # top left corner 3 pixels
                ii(x_ - 1, y_ + height + 1),
                ii(x_ - 1, y_ + height),
                ii(x_, y_ + height + 1),

                # top right corner 3 pixels
                ii(x_ + width + 1, y_ + height + 1),
                ii(x_ + width, y_ + height + 1),
                ii(x_ + width + 1, y_ + height)
            ]

        # Find the median of the background
        # pixels determined above
        bg_int = np.median(bg_int)

        if DEBUG:
            print("BG Intensity for #%d = %s" % (index_, repr(bg_int)))

        # Determine if the box should be inverted
        if fg_int >= bg_int:
            fg = 255
            bg = 0
        else:
            fg = 0
            bg = 255

            # Loop through every pixel in the box and color the
            # pixel accordingly
        for x in range(x_, x_ + width):
            for y in range(y_, y_ + height):
                if y >= img_y or x >= img_x:
                    if DEBUG:
                        print("pixel out of bounds (%d,%d)" % (y, x))
                    continue
                if ii(x, y) > fg_int:
                    new_image[y][x] = bg
                else:
                    new_image[y][x] = fg

    # blur a bit to improve ocr accuracy
    #new_image = cv2.blur(new_image, (1, 1))
    ###kernel = np.ones((1,1), np.uint8)
    #new_image = cv2.dilate(new_image, kernel, iterations=1)
    ###new_image = cv2.blur(new_image, (1, 1))
    cv2.imwrite('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/text.jpg', new_image)
    #print(pytesseract.image_to_string(Image.open('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/text.jpg')))
    engine = pyttsx3.init()
    img11=Image.open('C:/Users/hi/AppData/Local/Programs/Python/Python36/models/text.jpg')
    print("Text Detected - ",pytesseract.image_to_string(img11))
    engine.say(pytesseract.image_to_string(img11))
    engine.runAndWait()
    cv2.imshow('marked areas',image)
    #cv2.imshow('marked',thresh2)
    img1=cv2.bitwise_not(image)
    #cv2.imshow('inv_img', img1)
    #cv2.imwrite('marked.png',image)
    #cv2.imshow('roi1.png',roi1)
    cv2.waitKey(0)
Beispiel #15
0
def main(args):
    ap = argparse.ArgumentParser()

    ap.add_argument("-f",
                    "--frozen_inference_graph",
                    help='Path to frozen_inference_graph .pb file')
    ap.add_argument(
        "-l",
        "--label_map",
        help=
        "A .pbtxt file that contains all unique classes and their int map by given format"
    )
    ap.add_argument("-n",
                    "--num_classes",
                    help="Path to number of classes txt file")
    ap.add_argument("-i",
                    "--images",
                    default=os.path.join(DATASET_DIR, 'test', 'images'),
                    help="Path to images")
    args = vars(ap.parse_args())

    PATH_TO_TEST_IMAGES_DIR = args['images']
    PATH_TO_CKPT = args['frozen_inference_graph']
    num_classes = args['num_classes']
    PATH_TO_LABELS = args['label_map']

    if PATH_TO_CKPT is None:
        PATH_TO_CKPT = get_last_frozen_inference_graph()
        print('frozen_inference_graph:', PATH_TO_CKPT)
        num_classes = os.path.abspath(
            os.path.join(PATH_TO_CKPT, '../', '../', 'ssd_num_classes.txt'))
        NUM_CLASSES = get_num_class(num_classes)
        PATH_TO_LABELS = os.path.abspath(
            os.path.join(PATH_TO_CKPT, '../', '../', 'ssd_label_map.pbtxt'))

    print('frozen_inference_graph:', PATH_TO_CKPT)
    print('NUM_CLASSES:', NUM_CLASSES)
    print('label_map:', PATH_TO_LABELS)
    print('PATH_TO_TEST_IMAGES_DIR:', PATH_TO_TEST_IMAGES_DIR)

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    TEST_IMAGE_PATHS = sorted([
        f for f in glob.glob(PATH_TO_TEST_IMAGES_DIR + "**/*.jpg",
                             recursive=True)
    ])

    # Size, in inches, of the output images.
    IMAGE_SIZE = (12, 8)

    counter = 0
    for image_path in TEST_IMAGE_PATHS:
        image = Image.open(image_path)
        # the array based representation of the image will be used later in order to prepare the
        # result image with boxes and labels on it.
        image_np = load_image_into_numpy_array(image)
        # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
        image_np_expanded = np.expand_dims(image_np, axis=0)
        # Actual detection.
        output_dict = run_inference_for_single_image(image_np, detection_graph)
        # Visualization of the results of a detection.
        vis_util.visualize_boxes_and_labels_on_image_array(
            image_np,
            output_dict['detection_boxes'],
            output_dict['detection_classes'],
            output_dict['detection_scores'],
            category_index,
            instance_masks=output_dict.get('detection_masks'),
            use_normalized_coordinates=True,
            line_thickness=8)
        plt.figure(figsize=IMAGE_SIZE)
        # plt.imshow(image_np)
        outputs = os.path.join(ROOT_DIR, 'outputs')
        plt.imsave(fname=(outputs + '/' + str(counter) + '.jpg'), arr=image_np)
        counter += 1
Beispiel #16
0
def get_category_index(path, number_of_classes):
    label_map = label_map_util.load_labelmap(path)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=number_of_classes, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    return category_index
Beispiel #17
0
def main():
    CWD_PATH = os.getcwd()

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03'
    PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME,
                                'frozen_inference_graph.pb')
    PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data',
                                  'mscoco_label_map.pbtxt')
    NUM_CLASSES = 90

    # Loading label map
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    # First test on images
    PATH_TO_TEST_IMAGES_DIR = '/Users/chinmayiprasad/Documents/DeepLearning/Project/object_detection/image_no_bags'
    TEST_IMAGE_PATHS = glob.glob(
        os.path.join(PATH_TO_TEST_IMAGES_DIR + "/*.jpg"))
    IMAGE_SIZE = (12, 8)
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    countBagImages = 0

    count = 0
    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            for image_path in TEST_IMAGE_PATHS:
                try:
                    image = Image.open(image_path)
                    image_np = load_image_into_numpy_array(image)
                    image_process, classes, scores, boxes = detect_objects(
                        image_np, sess, detection_graph, category_index)

                    if 1 in set(classes[:5]) and 31 in set(classes):
                        countBagImages += 1

                    classesBag = classes[np.where(scores >= 0.30)]
                    classesPerson = classes[np.where(scores >= 0.75)]
                    if 1 in set(classesPerson) and (31 in set(classesBag)
                                                    or 27 in set(classesBag)):
                        shutil.copy(
                            image_path,
                            '/Users/chinmayiprasad/Documents/DeepLearning/Project/object_detection/proc_humanNoBag'
                        )
                        count += 1
                        print(count)
                        if count % 100 == 0:
                            print(count)
                            plt.figure(figsize=IMAGE_SIZE)
                            plt.imshow(image_process)
                except Exception as e:
                    print("Skipping {} and {}".format(image_path, e))
                    time.sleep(1.5)
def main(input_path, output_path, config_path, ckpt_path):
    # we recover our saved model here

    cwd = os.path.abspath(os.getcwd())
    # gets the last ckpt from the ckpt folder automatically,
    # gets full paths for ckpt and pipeline files
    ckpt_name = sorted(os.listdir(ckpt_path))[1].split(".")[0]
    model_dir = ckpt_path + ckpt_name
    config_path = cwd + "/" + config_path
    model_dir = cwd + "/" + model_dir

    print("[INFO]: Last checkpoint is:", model_dir)
    print()
    print("[INFO]: Config path is:", config_path)
    print()

    configs = config_util.get_configs_from_pipeline_file(config_path)
    print(configs)
    print()
    model_config = configs["model"]

    detection_model = model_builder.build(model_config=model_config,
                                          is_training=False)

    # Restore checkpoint
    ckpt = tf.compat.v2.train.Checkpoint(model=detection_model)

    ckpt.restore(model_dir)
    print("[INFO]: Done restoring model...")
    detect_fn = get_model_detection_function(detection_model)

    #map labels for inference decoding
    label_map_path = configs['eval_input_config'].label_map_path
    label_map = label_map_util.load_labelmap(label_map_path)
    print("[INFO]: Done")

    categories = label_map_util.convert_label_map_to_categories(
        label_map,
        max_num_classes=label_map_util.get_max_label_map_index(label_map),
        use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    label_map_dict = label_map_util.get_label_map_dict(label_map,
                                                       use_display_name=True)

    #run detector on test image
    #it takes a little longer on the first run and then runs at normal speed.
    print("[INFO]: Loaded labels...")
    print()

    #input video for object detection inference
    if not isinstance(input_path, int):
        vid = WebcamVideoStream(src=0).start()  # run another while function
    else:
        vid = FileVideoStream(
            input_path).start()  # run another while in a function
    time.sleep(1.0)

    #output video name
    if output_path != None:

        fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
        videoOut = cv2.VideoWriter(output_path, fourcc, 30.0,
                                   (im.shape[1], im.shape[0]))

    print("[INFO] loading model...")
    print("[INFO] starting video play...")
    fps = FPS().start()

    while True:

        frame = vid.read()
        frame = imutils.resize(frame, width=450)

        (im_width, im_height) = (frame.shape[1], frame.shape[0])

        image_np = np.array(frame).reshape(
            (im_height, im_width, 3)).astype(np.uint8)

        input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0),
                                            dtype=tf.float32)
        detections, predictions_dict, shapes = detect_fn(input_tensor)

        label_id_offset = 1
        image_np_with_detections = image_np.copy()

        viz_utils.visualize_boxes_and_labels_on_image_array(
            image_np_with_detections,
            detections['detection_boxes'][0].numpy(),
            (detections['detection_classes'][0].numpy() +
             label_id_offset).astype(int),
            detections['detection_scores'][0].numpy(),
            category_index,
            use_normalized_coordinates=True,
            max_boxes_to_draw=100,
            min_score_thresh=.5,
            agnostic_mode=False,
        )

        cv2.imshow("frame", image_np_with_detections)

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

        if output_path != None:
            videoOut.write(image_np_with_detections)

        fps.update()

    fps.stop()

    print("[INFO] elapsed time: {:.2f}".format(fps.elapsed()))
    print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))

    cv2.destroyAllWindows()
    vid.stop()

    if output_path != None:
        videoOut.release()
class PersonLoB:
    # ## Env setup

    # In[ ]:

    # What model to download.
    MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
    MODEL_FILE = MODEL_NAME + '.tar.gz'
    DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

    # List of the strings that is used to add correct label for each box.
    PATH_TO_LABELS = os.path.join(os.environ["TENSORFLOW_MODELS"], 'research',
                                  'object_detection', 'data',
                                  'mscoco_label_map.pbtxt')

    NUM_CLASSES = 90

    # ## Download Model

    # In[ ]:
    if not os.path.isdir(MODEL_NAME):
        logger.info("The model directory %s does not yet exist", MODEL_NAME)
        if not os.path.isfile(MODEL_FILE):
            logger.info("The model file %s does not yet exist", MODEL_FILE)
            opener = urllib.request.URLopener()
            opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
        logger.info("The model file %s has been downloaded", MODEL_FILE)
        tar_file = tarfile.open(MODEL_FILE)
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, os.getcwd())
                logger.info("The model %s has been extracted", file)

    # ## Load a (frozen) Tensorflow model into memory.

    # In[ ]:
    logger.info("tf.Graph()")
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        logger.info("tf.GraphDef()")
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            logger.info("serialized_graph")
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    # ## Loading label map
    # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine

    # In[ ]:
    logger.info("Loading label maps")
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Allocate GPU memory
    logger.info("Initializing TensorFlow session")
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(graph=detection_graph, config=config)

    logger.info("PersonLoB class ready")

    # In[ ]:
    def lob(self,
            ins):  # inputData is image, fov, compass hdg, id class, threshhold
        resp = {
            "aob": None,
            "time": None,
            "object_found": None,
            "object_score": 0,
            "compass": None
        }
        start = datetime.now()

        # Identify the client
        peer = ins['peer']
        timestamp = ins['timestamp']
        idclass = float(ins['idclass'] or 1)
        threshhold = float(ins['threshhold'] or 0.20)

        fov = 120.0
        if 'fov' in ins:
            if ins['fov']:
                fov = float(ins['fov'])

        # logger.info("[%s/%ld] fov = %f", peer, timestamp, fov)

        ch = 0.0
        if 'compass' in ins:
            if ins['compass']:
                ch = float(ins['compass'])
        resp["compass"] = ch

        image_string = cStringIO.StringIO(
            ins['image'].split(",")[1].decode('base64'))
        image = Image.open(image_string)

        # This is needed to display the images.
        # get_ipython().magic(u'matplotlib inline')

        # This is needed since the notebook is stored in the object_detection folder.
        #        sys.path.append("..")

        # ## Object detection imports
        # Here are the imports from the object detection module.

        # In[ ]:

        # # Model preparation

        # ## Variables
        #
        # Any model exported using the `export_inference_graph.py` tool can be loaded here simply by changing `PATH_TO_CKPT` to point to a new .pb file.
        #
        # By default we use an "SSD with Mobilenet" model here. See the [detection model zoo](https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md) for a list of other models that can be run out-of-the-box with varying speeds and accuracies.

        # ## Helper code

        # In[ ]:

        def load_image_into_numpy_array(image):
            #logger.info("[%s/%ld] Loading image into numpy array", peer, timestamp)
            (im_width, im_height) = image.size
            return np.array(image.getdata()).reshape(
                (im_height, im_width, 3)).astype(np.uint8)

        # # Detection

        # Size, in inches, of the output images.
        IMAGE_SIZE = (12, 8)

        # In[ ]:

        # Apply algorithm to images

        with self.__class__.detection_graph.as_default():
            #logger.info("[%s/%ld] Applying algorithm to images", peer, timestamp)
            with self.__class__.session.as_default():
                #logger.info("[%s/%ld] Opened TensorFlow detection_graph session", peer, timestamp)
                # Definite input and output Tensors for detection_graph
                image_tensor = self.__class__.detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                detection_boxes = self.__class__.detection_graph.get_tensor_by_name(
                    'detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                detection_scores = self.__class__.detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                detection_classes = self.__class__.detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = self.__class__.detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Open Image and get height and width for angle of object
                # image = Image.open(image)
                width, height = image.size
                #logger.info("[%s/%ld] Loading image of size %d by %d", peer, timestamp, width, height)

                # the array based representation of the image will be used later in order to prepare the
                # result image with boxes and labels on it.
                image_np = load_image_into_numpy_array(image)
                #logger.info("[%s/%ld] Numpy array loaded", peer, timestamp)
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                # Actual detection.
                #logger.info("[%s/%ld] Running actual detection", peer, timestamp)
                (boxes, scores, classes, num) = self.__class__.session.run(
                    [
                        detection_boxes, detection_scores, detection_classes,
                        num_detections
                    ],
                    feed_dict={image_tensor: image_np_expanded})
                #logger.info("[%s/%ld] Detection run complete", peer, timestamp)
                # Visualization of the results of a detection.
                # vis_util.visualize_boxes_and_labels_on_image_array(
                # image_np,
                # np.squeeze(boxes),
                # np.squeeze(classes).astype(np.int32),
                # np.squeeze(scores),
                # category_index,
                # use_normalized_coordinates=True,
                # line_thickness=8)
                # plt.figure(figsize=IMAGE_SIZE)
                # plt.imshow(image_np)

        #logger.info("[%s/%ld] We have a result", peer, timestamp)
        # Angle of view, image height, image width, image height center pixel, image width center pixel,
        # and pixel degree,
        fov = fov
        imageHeight = height
        imageWidth = width
        imageHeightCenter = imageHeight / 2
        imageWidthCenter = imageWidth / 2
        pixelDegree = float(fov) / imageWidth

        # Convert tensorflow data to pandas data frams
        df = pd.DataFrame(boxes.reshape(100, 4),
                          columns=['y_min', 'x_min', 'y_max', 'x_max'])
        df1 = pd.DataFrame(classes.reshape(100, 1),
                           columns=['classes'],
                           dtype=int)
        df2 = pd.DataFrame(scores.reshape(100, 1), columns=['scores'])
        df5 = pd.concat([df, df1, df2], axis=1)

        # Transform box bound coordinates to pixel coordintate
        df5['y_min_t'] = df5['y_min'].apply(lambda x: x * imageHeight)
        df5['x_min_t'] = df5['x_min'].apply(lambda x: x * imageWidth)
        df5['y_max_t'] = df5['y_max'].apply(lambda x: x * imageHeight)
        df5['x_max_t'] = df5['x_max'].apply(lambda x: x * imageWidth)

        # Create objects pixel location

        # Create objects pixel location x and y
        # X
        df5['ob_wid_x'] = df5['x_max_t'] - df5["x_min_t"]
        df5['ob_mid_x'] = df5['ob_wid_x'] / 2
        df5['x_loc'] = df5["x_min_t"] + df5['ob_mid_x']
        # Y
        df5['ob_hgt_y'] = df5['y_max_t'] - df5["y_min_t"]
        df5['ob_mid_y'] = df5['ob_hgt_y'] / 2
        df5['y_loc'] = df5["y_min_t"] + df5['ob_mid_y']

        # Find object degree of angle, data is sorted by score, select person with highest score
        df5['object_angle'] = df5['x_loc'].apply(
            lambda x: -(imageWidthCenter - x) * pixelDegree)
        df6 = df5.loc[(df5['classes'] == idclass)
                      & (df5['scores'] > threshhold)]

        #         dfLabeled = pd.concat(category_index[df5['classes']], df5['scores'])
        #         resp['object_scores'] = dfLabeled
        resp['object_scores'] = df5[1:10].to_string(
            columns=['classes', 'scores'])
        #resp["object_scores"] = ' '.join(str(e) for e in df5['classes']) + ' : ' + ' '.join(str(e) for e in df5['scores'])
        #resp["object_classes"] = df6['classes']

        # session.close()

        #logger.info("[%s/%ld] Returning AOB", peer, timestamp)

        if df6.empty:

            AOB = None

        else:

            df7 = df6.iloc[0]['object_angle']
            AOB = df7 + ch

            resp["object_found"] = True
            try:
                resp["object_score"] = str(df6['scores'][0])
            except:
                resp["object_score"] = str(-1)
            resp["object_identified"] = "person"

    # Print AOB
    # print AOB
    # print AOB

        end = datetime.now()

        delta = end - start

        resp["aob"] = AOB
        resp["time"] = delta.total_seconds()

        logger.info("[%s/%ld] AOB response: %s", peer, timestamp,
                    json.dumps(resp))
        return resp
Beispiel #20
0
def processimages(path_images_dir, path_labels_map, save_directory):
    pathcpkt = 'data/output_inference_graph.pb/frozen_inference_graph.pb'
    csv_file = 'data/csvfile.csv'
    num_classes = 6

    detection_graph = tf.Graph()

    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(pathcpkt, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(path_labels_map)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=num_classes, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    f = open(csv_file, 'w')

    #f.write(
    #    'timestamp,number cars in bike lane, number trucks in bike lane, '
    #    'number cars in bus stop, number trucks in bus stop\n')

    def load_image_into_numpy_array(imageconvert):
        (im_width, im_height) = imageconvert.size
        try:
            return np.array(imageconvert.getdata()).reshape(
                (im_height, im_width, 3)).astype(np.uint8)
        except ValueError:
            return np.array([])

    with detection_graph.as_default():
        with tf.Session(graph=detection_graph) as sess:
            # Definite input and output Tensors for detection_graph
            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
            # Each box represents a part of the image where a particular object was detected.
            detection_boxes = detection_graph.get_tensor_by_name(
                'detection_boxes:0')
            # Each score represent how level of confidence for each of the objects.
            # Score is shown on the result image, together with the class label.
            detection_scores = detection_graph.get_tensor_by_name(
                'detection_scores:0')
            detection_classes = detection_graph.get_tensor_by_name(
                'detection_classes:0')
            num_detections = detection_graph.get_tensor_by_name(
                'num_detections:0')

            polygon_right_lane = [(178, 122), (188, 240), (231, 240),
                                  (187, 125)]
            polygon_left_lane = [(108, 143), (0, 215), (0, 233), (123, 142),
                                 (108, 97)]
            polygon_bus_lane = [(200, 155), (230, 240), (292, 240), (225, 157)]

            pathrightlane = mpltPath.Path(polygon_right_lane)
            pathleftlane = mpltPath.Path(polygon_left_lane)
            pathbuslane = mpltPath.Path(polygon_bus_lane)
            for testpath in os.listdir(path_images_dir):

                start_time = time.time()
                timestamp = testpath.split(".jpg")[0]

                try:
                    image = Image.open(path_images_dir + '/' + testpath)
                    image_np = load_image_into_numpy_array(image)
                except IOError:
                    print("Issue opening " + testpath)
                    continue

                if image_np.size == 0:
                    print("Skipping image " + testpath)
                    continue
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                # Actual detection.
                (boxes, scores, classes,
                 num) = sess.run([
                     detection_boxes, detection_scores, detection_classes,
                     num_detections
                 ],
                                 feed_dict={image_tensor: image_np_expanded})

                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    min_score_thresh=0.4,
                    use_normalized_coordinates=True,
                    line_thickness=2)
                scores = np.squeeze(scores)
                boxes = np.squeeze(boxes)
                num_cars_in_bikelane, num_cars_in_bus_stop, num_trucks_in_bike_lane, num_trucks_in_bus_stop = 0, 0, 0, 0
                for i in range(boxes.shape[0]):
                    if scores[i] > .4:
                        box = tuple(boxes[i].tolist())

                        ymin, xmin, ymax, xmax = box

                        center_x = (((xmax * 352) -
                                     (xmin * 352)) / 2) + (xmin * 352)
                        center_y = (((ymax * 240) -
                                     (ymin * 240)) / 2) + (ymin * 240)
                        classes = np.squeeze(classes).astype(np.int32)
                        if classes[i] in category_index.keys():
                            class_name = category_index[classes[i]]['name']
                        else:
                            class_name = 'N/A'

                        if class_name == 'car':
                            points = [(center_x, center_y)]
                            if pathrightlane.contains_points(
                                    points) or pathleftlane.contains_points(
                                        points):
                                num_cars_in_bikelane += 1
                            elif pathbuslane.contains_points(points):
                                num_cars_in_bus_stop += 1

                        elif class_name == 'truck' or class_name == 'police' or class_name == 'ups':
                            points = [(center_x, center_y)]
                            if pathrightlane.contains_points(
                                    points) or pathleftlane.contains_points(
                                        points):
                                num_trucks_in_bike_lane += 1
                            elif pathbuslane.contains_points(points):
                                num_trucks_in_bus_stop += 1

                # write to a csv file whenever there is a vehicle, how many and of what type with timestamp
                f.write(timestamp + ',' + str(num_cars_in_bikelane) + ',' +
                        str(num_trucks_in_bike_lane) + ',' +
                        str(num_cars_in_bus_stop) + ',' +
                        str(num_trucks_in_bus_stop) + '\n')
                print("Process Time " + str(time.time() - start_time))
                scipy.misc.imsave(save_directory + testpath, image_np)

        f.close()
        return csv_file
Beispiel #21
0
    def __init__(self):
        self.seq = 0
        self.ready = False
        self.counter = 0
        self.bridge = CvBridge()

        self.camera_topic = rospy.get_param('~camera_topic',
                                            "/image_raw")
        self.image_sub = rospy.Subscriber(self.camera_topic, Image,
                                          self.callback, queue_size=1)
        self.render = rospy.get_param('~render', True)
        if self.render:
            self.image_pub = rospy.Publisher("detections/image_raw/compressed", CompressedImage, queue_size=5)
        self.model_name = rospy.get_param('~model_name')
        self.models_dir = rospy.get_param('~models_dir')
        self.path_to_ckpt = self.models_dir + '/' + self.model_name + '/frozen_inference_graph.pb'
        self.path_to_labels =rospy.get_param('~path_to_labels')
        self.num_classes = rospy.get_param('~num_classes', 90)
        self.threshold = rospy.get_param('~threshold', 0.5)
        self.rotate = rospy.get_param('~rotate', False)
        self.debug = rospy.get_param('~debug', False)
        self.bbox_pub = rospy.Publisher(self.camera_topic+"/detections", BBoxArray, queue_size=5)

        print("path_to_ckpt:",self.path_to_ckpt)
        print("path_to_labels:",self.path_to_labels)

        if self.path_to_ckpt == '' or self.path_to_labels == '':
            print("\n\nProvide requiered args: path_to_ckpt, path_to_labels")
            print("Shutting down.")
            exit(-1)

        self.label_map = label_map_util.load_labelmap(self.path_to_labels)
        self.categories = label_map_util.convert_label_map_to_categories(self.label_map,
         max_num_classes=self.num_classes, use_display_name=True)
        self.category_index = label_map_util.create_category_index(self.categories)

        print("Category map loaded:")
        for i,n in zip (self.category_index.keys(),[str(_['name']) for _ in self.category_index.values()]):
            print("%4d %s"%(i,n))

        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            print("Loading model")

            with tf.gfile.GFile(self.path_to_ckpt, 'rb') as fid:
                serialized_graph = fid.read()
                print("Parsing")
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')
            config = tf.ConfigProto()
            config.gpu_options.allow_growth = True
            self.sess = tf.Session(config=config, graph=self.detection_graph)

            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            print("Outputs:")
            all_tensor_names = {output.name for op in ops for output in op.outputs}
            self.tensor_dict = {}
            for key in [
            'num_detections', 'detection_boxes', 'detection_scores',
            'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    print("  "+key)
                    self.tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
                    tensor_name)
            self.image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

        self.ready = True
        print("Model loaded. Waiting for messages on topic:",self.camera_topic)
Beispiel #22
0
def load_labels(path_to_labels, num_classes):
    label_map = label_map_util.load_labelmap(path_to_labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=num_classes, use_display_name=True
    )
    return label_map_util.create_category_index(categories)
def distance_function(cap):
    # # Model preparation
    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_FROZEN_GRAPH = 'object_detection/utils/ssd_mobilenet_v1_coco_2017_11_17/frozen_inference_graph.pb'

    # List of the strings that is used to add correct label for each box.
    PATH_TO_LABELS = 'object_detection/utils/data/mscoco_label_map.pbtxt'

    NUM_CLASSES = 90
    # ## Loading label map
    # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # ## Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')
        with tf.Session(graph=detection_graph) as sess:
            while True:
                #screen = cv2.resize(grab_screen(region=(0,40,1280,745)), (WIDTH,HEIGHT))
                # screen = cv2.resize(grab_screen(region=(0,40,1280,745)), (800,450))
                image_np = cv2.cvtColor(cap, cv2.COLOR_BGR2RGB)
                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Each box represents a part of the image where a particular object was detected.
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                # Each score represent how level of confidence for each of the objects.
                # Score is shown on the result image, together with the class label.
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Actual detection.
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=8)

                for i, b in enumerate(boxes[0]):
                    #                 car                    bus                  truck
                    if classes[0][i] == 3 or classes[0][i] == 6 or classes[0][
                            i] == 8:
                        if scores[0][i] >= 0.5:
                            mid_x = (boxes[0][i][1] + boxes[0][i][3]) / 2
                            mid_y = (boxes[0][i][0] + boxes[0][i][2]) / 2
                            apx_distance = round(
                                ((1 - (boxes[0][i][3] - boxes[0][i][1]))**4),
                                1)
                            cv2.putText(image_np, '{}'.format(apx_distance),
                                        (int(mid_x * 800), int(mid_y * 450)),
                                        cv2.FONT_HERSHEY_SIMPLEX, 0.7,
                                        (255, 255, 255), 2)

                            if apx_distance <= 0.5:
                                if mid_x > 0.3 and mid_x < 0.7:
                                    cv2.putText(image_np, 'WARNING!!!',
                                                (50, 50),
                                                cv2.FONT_HERSHEY_SIMPLEX, 1.0,
                                                (0, 0, 255), 3)
Beispiel #24
0
import tensorflow as tf
import object_detection
from object_detection.utils import label_map_util

import cv2
import numpy as np
import os
import sys

model_path = "./data/models/ssdlite_mobilenet_v2_coco_2018_05_09/frozen_inference_graph.pb"

NUM_CLASSES = 90
# label_map = label_map_util.load_labelmap('/home/ruth/Documents/Bumblebee/ML/models/label_map.pbtxt')
label_map = label_map_util.load_labelmap('./data/labels/mscoco_label_map.pbtxt')

class HumanDetector:
    def __init__ (self, min_score_thresh=.5):
        self.min_score_thresh = min_score_thresh
        self.load_model()


    def load_model(self):
        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(model_path, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

        categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
Beispiel #25
0
    def run(self):

        time1 = time.time()
        MIN_ratio = 0.8

        #MODEL_NAME = 'ssd_mobilenet_v1_coco_2017_11_17'
        MODEL_NAME = 'faster_rcnn_inception_v2_coco_2018_01_28'
        GRAPH_FILE_NAME = 'frozen_inference_graph.pb'
        LABEL_FILE = 'data/mscoco_label_map.pbtxt'
        NUM_CLASSES = 90
        #end define

        label_map = lmu.load_labelmap(LABEL_FILE)
        categories = lmu.convert_label_map_to_categories(
            label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
        categories_index = lmu.create_category_index(categories)

        print("call label_map & categories : %0.5f" % (time.time() - time1))

        graph_file = MODEL_NAME + '/' + GRAPH_FILE_NAME

        #thread function
        def find_detection_target(categories_index, classes, scores):
            time1_1 = time.time()  #스레드함수 시작시간
            print("스레드 시작")

            objects = []  #리스트 생성
            for index, value in enumerate(classes[0]):
                object_dict = {}  #딕셔너리
                if scores[0][index] > MIN_ratio:
                    object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \
                            scores[0][index]
                    objects.append(object_dict)  #리스트 추가
            print(objects)

            print("스레드 함수 처리시간 %0.5f" & (time.time() - time1_1))

        #end thread function

        detection_graph = tf.Graph()
        with detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(graph_file, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            sses = tf.Session(graph=detection_graph)

        print("store in memoey time : %0.5f" % (time.time() - time1))

        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        detection_boxes = detection_graph.get_tensor_by_name(
            'detection_boxes:0')

        detection_scores = detection_graph.get_tensor_by_name(
            'detection_scores:0')
        detection_classes = detection_graph.get_tensor_by_name(
            'detection_classes:0')

        num_detections = detection_graph.get_tensor_by_name('num_detections:0')

        print("make tensor time : %0.5f" % (time.time() - time1))

        #capture = cv2.VideoCapture(0)
        capture = cv2.VideoCapture("20190916_162900.mp4")
        prevtime = 0

        #thread_1 = Process(target = find_detection_target, args = (categories_index, classes, scores))#쓰레드 생성
        print("road Video time : %0.5f" % (time.time() - time1))

        while True:
            ret, frame = capture.read()
            frame_expanded = np.expand_dims(frame, axis=0)
            height, width, channel = frame.shape

            #프레임 표시
            curtime = time.time()
            sec = curtime - prevtime
            prevtime = curtime
            fps = 1 / sec
            str = "FPS : %0.1f" % fps
            cv2.putText(frame, str, (0, 30), cv2.FONT_HERSHEY_SIMPLEX, 1,
                        (0, 255, 0))
            #end 프레임

            (boxes, scores, classes, nums) = sses.run(  #np.ndarray
                [
                    detection_boxes, detection_scores, detection_classes,
                    num_detections
                ],
                feed_dict={image_tensor: frame_expanded})  #end sses.run()

            vis_util.visualize_boxes_and_labels_on_image_array(
                frame,
                np.squeeze(boxes),
                np.squeeze(classes).astype(np.int32),
                np.squeeze(scores),
                categories_index,
                use_normalized_coordinates=True,
                min_score_thresh=MIN_ratio,  #최소 인식률
                line_thickness=2)  #선두께

            # objects = [] #리스트 생성
            for index, value in enumerate(classes[0]):
                object_dict = {}  # 딕셔너리
                if scores[0][index] > MIN_ratio:
                    object_dict[(categories_index.get(value)).get('name').encode('utf8')] = \
                        scores[0][index]
                    # objects.append(object_dict) #리스트 추가
                    '''visualize_boxes_and_labels_on_image_array box_size_info 이미지 정
                    for box, color in box_to_color_map.items():
                        ymin, xmin, ymax, xmax = box
                    [index][0] [1]   [2]  [3]
                    '''

                    ymin = int((boxes[0][index][0] * height))
                    xmin = int((boxes[0][index][1] * width))
                    ymax = int((boxes[0][index][2] * height))
                    xmax = int((boxes[0][index][3] * width))

                    Result = frame[ymin:ymax, xmin:xmax]
                    cv2.imwrite('car.jpg', Result)
                    try:
                        # print(NP.check())
                        NP.number_recognition('car.jpg')
                    except:
                        print("응안돼")
                    cv2.imshow('re', Result)
            # print(objects)

            key = cv2.waitKey(1) & 0xFF

            if ret:
                # https://stackoverflow.com/a/55468544/6622587
                rgbImage = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
                h, w, ch = rgbImage.shape
                bytesPerLine = ch * w
                convertToQtFormat = QtGui.QImage(rgbImage.data, w, h,
                                                 bytesPerLine,
                                                 QtGui.QImage.Format_RGB888)
                p = convertToQtFormat.scaled(640, 480, Qt.KeepAspectRatio)
                self.changePixmap.emit(p)

            if key == ord("q"):
                break
        '''
Beispiel #26
0
min_confidence = 0.5
num_classes = 2

lr_model = joblib.load('output/models/model_LR.pkl')

model = tf.Graph()
with model.as_default():
    graphDef = tf.GraphDef()
    with tf.gfile.GFile(
            "C:/Users/SKS/Desktop/AAIC/Malaria_Detection_TS/experiments/exported_model/frozen_inference_graph.pb",
            "rb") as f:
        serializedGraph = f.read()
        graphDef.ParseFromString(serializedGraph)
        tf.import_graph_def(graphDef, name="")

labelMap = label_map_util.load_labelmap(labels_loc)
categories = label_map_util.convert_label_map_to_categories(
    labelMap, max_num_classes=num_classes, use_display_name=True)
categoryIdx = label_map_util.create_category_index(categories)
classes = ['gametocyte', 'leukocyte', 'ring', 'schizont', 'trophozoite']

testing_predicition_rnn = {}
testing_predicition_vgg = {}
predicted_boxes_stacked_test = defaultdict(dict)

with model.as_default():
    with tf.Session(graph=model) as sess:
        imageTensor = model.get_tensor_by_name("image_tensor:0")
        boxesTensor = model.get_tensor_by_name("detection_boxes:0")
        scoresTensor = model.get_tensor_by_name("detection_scores:0")
        classesTensor = model.get_tensor_by_name("detection_classes:0")
Beispiel #27
0
def main(unused_argv):
    assert FLAGS.checkpoint_dir, '`checkpoint_dir` is missing.'
    assert FLAGS.eval_dir, '`eval_dir` is missing.'
    tf.gfile.MakeDirs(FLAGS.eval_dir)

    wait_time = 300
    while wait_time > 0:
        latest_checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
        if latest_checkpoint:
            num_steps = latest_checkpoint.split('-')[-1]
            if int(num_steps) > 0:
                wait_time = 0
        if wait_time > 0:
            tf.logging.info("waiting for checkpoint...")
            time.sleep(wait_time)

    if FLAGS.pipeline_config_path:
        configs = config_util.get_configs_from_pipeline_file(
            FLAGS.pipeline_config_path)
        tf.gfile.Copy(FLAGS.pipeline_config_path,
                      os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                      overwrite=True)
    else:
        configs = config_util.get_configs_from_multiple_files(
            model_config_path=FLAGS.model_config_path,
            eval_config_path=FLAGS.eval_config_path,
            eval_input_config_path=FLAGS.input_config_path)
        for name, config in [('model.config', FLAGS.model_config_path),
                             ('eval.config', FLAGS.eval_config_path),
                             ('input.config', FLAGS.input_config_path)]:
            tf.gfile.Copy(config,
                          os.path.join(FLAGS.eval_dir, name),
                          overwrite=True)

    model_config = configs['model']
    eval_config = configs['eval_config']
    input_config = configs['eval_input_config']
    if FLAGS.eval_training_data:
        input_config = configs['train_input_config']

    do_augmentation = False

    if input_config.WhichOneof('input_reader') == 'tf_record_input_reader':
        input_reader_config = input_config.tf_record_input_reader
        input_path = input_reader_config.input_path
        if not input_path or not input_path[0]:
            do_augmentation = True
            train_input_config = configs['train_input_config']
            train_input_reader_config = train_input_config.tf_record_input_reader
            input_reader_config.input_path[:] = train_input_reader_config.input_path[:]

    model_fn = functools.partial(model_builder.build,
                                 model_config=model_config,
                                 is_training=False)

    def get_next(config):
        return dataset_util.make_initializable_iterator(
            dataset_builder.build(config)).get_next()

    create_input_dict_fn = functools.partial(get_next, input_config)

    label_map = label_map_util.load_labelmap(input_config.label_map_path)
    max_num_classes = max([item.id for item in label_map.item])
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes)

    if FLAGS.run_once:
        eval_config.max_evals = 1

    evaluator.evaluate(create_input_dict_fn,
                       model_fn,
                       eval_config,
                       categories,
                       FLAGS.checkpoint_dir,
                       FLAGS.eval_dir,
                       do_augmentation=do_augmentation)
def predictImages(modelArg, labelsArg, imagePathArg, num_classesArg,
                  min_confidenceArg, image_displayArg, pred_stagesArg):

    # initialize the model
    model = tf.Graph()

    # create a context manager that makes this model the default one for
    # execution
    with model.as_default():
        # initialize the graph definition
        graphDef = tf.GraphDef()

        # load the graph from disk
        with tf.gfile.GFile(modelArg, "rb") as f:
            serializedGraph = f.read()
            graphDef.ParseFromString(serializedGraph)
            tf.import_graph_def(graphDef, name="")

    # load the class labels from disk
    labelMap = label_map_util.load_labelmap(labelsArg)
    categories = label_map_util.convert_label_map_to_categories(
        labelMap, max_num_classes=num_classesArg, use_display_name=True)
    categoryIdx = label_map_util.create_category_index(categories)

    # create a plateFinder
    plateFinder = PlateFinder(min_confidenceArg,
                              categoryIdx,
                              rejectPlates=False,
                              charIOUMax=0.3)

    # create plate displayer
    plateDisplay = PlateDisplay()

    # create a session to perform inference
    with model.as_default():
        with tf.Session(graph=model) as sess:
            # create a predicter, used to predict plates and chars
            predicter = Predicter(model, sess, categoryIdx)

            imagePaths = paths.list_images(imagePathArg)
            frameCnt = 0
            start_time = time.time()
            # Loop over all the images
            for imagePath in imagePaths:
                frameCnt += 1

                # load the image from disk
                print("[INFO] Loading image \"{}\"".format(imagePath))
                image = cv2.imread(imagePath)
                (H, W) = image.shape[:2]

                # If prediction stages == 2, then perform prediction on full image, find the plates, crop the plates from the image,
                # and then perform prediction on the plate images
                if pred_stagesArg == 2:
                    # Perform inference on the full image, and then select only the plate boxes
                    boxes, scores, labels = predicter.predictPlates(
                        image, preprocess=True)
                    licensePlateFound_pred, plateBoxes_pred, plateScores_pred = plateFinder.findPlatesOnly(
                        boxes, scores, labels)
                    # loop over the plate boxes, find the chars inside the plate boxes,
                    # and then scrub the chars with 'processPlates', resulting in a list of final plateBoxes, char texts, char boxes, char scores and complete plate scores
                    plates = []
                    for plateBox in plateBoxes_pred:
                        boxes, scores, labels = predicter.predictChars(
                            image, plateBox)
                        chars = plateFinder.findCharsOnly(
                            boxes, scores, labels, plateBox, image.shape[0],
                            image.shape[1])
                        if len(chars) > 0:
                            plates.append(chars)
                        else:
                            plates.append(None)
                    plateBoxes_pred, charTexts_pred, charBoxes_pred, charScores_pred, plateAverageScores_pred = plateFinder.processPlates(
                        plates, plateBoxes_pred, plateScores_pred)

                # If prediction stages == 1, then predict the plates and characters in one pass
                elif pred_stagesArg == 1:
                    # Perform inference on the full image, and then find the plate text associated with each plate
                    boxes, scores, labels = predicter.predictPlates(
                        image, preprocess=False)
                    licensePlateFound_pred, plateBoxes_pred, charTexts_pred, charBoxes_pred, charScores_pred, plateScores_pred = plateFinder.findPlates(
                        boxes, scores, labels)
                else:
                    print(
                        "[ERROR] --pred_stages {}. The number of prediction stages must be either 1 or 2"
                        .format(pred_stagesArg))
                    quit()

                # Print plate text
                for charText in charTexts_pred:
                    print("    Found: ", charText)

                # Display the full image with predicted plates and chars
                if image_displayArg == True:
                    imageLabelled = plateDisplay.labelImage(
                        image, plateBoxes_pred, charBoxes_pred, charTexts_pred)
                    cv2.imshow("Labelled Image", imageLabelled)
                    cv2.waitKey(0)

            # print some performance statistics
            curTime = time.time()
            processingTime = curTime - start_time
            fps = frameCnt / processingTime
            print(
                "[INFO] Processed {} frames in {:.2f} seconds. Frame rate: {:.2f} Hz"
                .format(frameCnt, processingTime, fps))
            platesReturn = {}
            for i, plateBox in enumerate(plateBoxes_pred):
                #platesReturn[i] = { 'plateBoxLoc': plateBox, 'plateText': charTexts_pred[i], 'charBoxLocs': charBoxes_pred[i]}
                platesReturn[i] = {
                    'plateText': charTexts_pred[i],
                    'plateBoxLoc': list(plateBox),
                    'charBoxLocs': list([list(x) for x in charBoxes_pred[i]])
                }

            #results = results.encode('utf-8')
            return platesReturn
Beispiel #29
0
def _build_center_net_model(center_net_config, is_training, add_summaries):
    """Build a CenterNet detection model.

  Args:
    center_net_config: A CenterNet proto object with model configuration.
    is_training: True if this model is being built for training purposes.
    add_summaries: Whether to add tf summaries in the model.

  Returns:
    CenterNetMetaArch based on the config.

  """

    image_resizer_fn = image_resizer_builder.build(
        center_net_config.image_resizer)
    _check_feature_extractor_exists(center_net_config.feature_extractor.type)
    feature_extractor = _build_center_net_feature_extractor(
        center_net_config.feature_extractor)
    object_center_params = object_center_proto_to_params(
        center_net_config.object_center_params)

    object_detection_params = None
    if center_net_config.HasField('object_detection_task'):
        object_detection_params = object_detection_proto_to_params(
            center_net_config.object_detection_task)

    keypoint_params_dict = None
    if center_net_config.keypoint_estimation_task:
        label_map_proto = label_map_util.load_labelmap(
            center_net_config.keypoint_label_map_path)
        keypoint_map_dict = {
            item.name: item
            for item in label_map_proto.item if item.keypoints
        }
        keypoint_params_dict = {}
        keypoint_class_id_set = set()
        all_keypoint_indices = []
        for task in center_net_config.keypoint_estimation_task:
            kp_params = keypoint_proto_to_params(task, keypoint_map_dict)
            keypoint_params_dict[task.task_name] = kp_params
            all_keypoint_indices.extend(kp_params.keypoint_indices)
            if kp_params.class_id in keypoint_class_id_set:
                raise ValueError(
                    ('Multiple keypoint tasks map to the same class id is '
                     'not allowed: %d' % kp_params.class_id))
            else:
                keypoint_class_id_set.add(kp_params.class_id)
        if len(all_keypoint_indices) > len(set(all_keypoint_indices)):
            raise ValueError('Some keypoint indices are used more than once.')

    mask_params = None
    if center_net_config.HasField('mask_estimation_task'):
        mask_params = mask_proto_to_params(
            center_net_config.mask_estimation_task)

    densepose_params = None
    if center_net_config.HasField('densepose_estimation_task'):
        densepose_params = densepose_proto_to_params(
            center_net_config.densepose_estimation_task)

    track_params = None
    if center_net_config.HasField('track_estimation_task'):
        track_params = tracking_proto_to_params(
            center_net_config.track_estimation_task)

    temporal_offset_params = None
    if center_net_config.HasField('temporal_offset_task'):
        temporal_offset_params = temporal_offset_proto_to_params(
            center_net_config.temporal_offset_task)

    return center_net_meta_arch.CenterNetMetaArch(
        is_training=is_training,
        add_summaries=add_summaries,
        num_classes=center_net_config.num_classes,
        feature_extractor=feature_extractor,
        image_resizer_fn=image_resizer_fn,
        object_center_params=object_center_params,
        object_detection_params=object_detection_params,
        keypoint_params_dict=keypoint_params_dict,
        mask_params=mask_params,
        densepose_params=densepose_params,
        track_params=track_params,
        temporal_offset_params=temporal_offset_params,
        use_depthwise=center_net_config.use_depthwise,
        compute_heatmap_sparse=center_net_config.compute_heatmap_sparse)
from object_detection.utils import label_map_util
from object_detection.utils import visualization_utils as vis_util

CWD_PATH = os.getcwd()

# Path to frozen detection graph. This is the actual model that is used for the object detection.
MODEL_NAME = 'ssd_mobilenet_v1_coco_11_06_2017'
PATH_TO_CKPT = os.path.join(CWD_PATH, 'object_detection', MODEL_NAME, 'frozen_inference_graph.pb')

# List of the strings that is used to add correct label for each box.
PATH_TO_LABELS = os.path.join(CWD_PATH, 'object_detection', 'data', 'mscoco_label_map.pbtxt')

NUM_CLASSES = 90

# Loading label map
label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=NUM_CLASSES,
                                                            use_display_name=True)
category_index = label_map_util.create_category_index(categories)


def detect_objects(image_np, sess, detection_graph):
    # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
    image_np_expanded = np.expand_dims(image_np, axis=0)
    image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

    # Each box represents a part of the image where a particular object was detected.
    boxes = detection_graph.get_tensor_by_name('detection_boxes:0')

    # Each score represent how level of confidence for each of the objects.
    # Score is shown on the result image, together with the class label.
)
parser.add_argument(
    "-c",
    "--csv_path",
    help="Path of output .csv file. If none provided, then no file will be "
    "written.",
    type=str,
    default=None,
)

args = parser.parse_args()

if args.image_dir is None:
    args.image_dir = args.xml_dir

label_map = label_map_util.load_labelmap(args.labels_path)
label_map_dict = label_map_util.get_label_map_dict(label_map)


def xml_to_csv(path):
    """Iterates through all .xml files (generated by labelImg) in a given directory and combines
    them in a single Pandas dataframe.

    Parameters:
    ----------
    path : str
        The path containing the .xml files
    Returns
    -------
    Pandas DataFrame
        The produced dataframe
        tar_file.extract(file, os.getcwd())

# %%

import tensorflow.compat.v1 as tf
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

# %%

label_map = label_map_util.load_labelmap(os.path.join(PATH_TO_LABELS))

categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
category_index = label_map_util.create_category_index(categories)


# %%
def load_image_into_numpy_array(image):
    (im_width, im_height) = image.size
    return np.array(image.getdata()).reshape(
        (im_height, im_width, 3)).astype(np.uint8)


# %%
PATH_TO_TEST_IMAGES_DIR = 'test_images'
Beispiel #33
0
      output_dict['detection_classes'] = output_dict[
          'detection_classes'][0].astype(np.uint8)
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict

detection_graph = tf.Graph()
with detection_graph.as_default():
  od_graph_def = tf.GraphDef()
  with tf.gfile.GFile("output_inference_graph/frozen_inference_graph.pb", 'rb') as fid:
    serialized_graph = fid.read()
    od_graph_def.ParseFromString(serialized_graph)
    tf.import_graph_def(od_graph_def, name='')
label_map = label_map_util.load_labelmap("data/burgers_label_map.pb.txt")
                                      
categories = label_map_util.convert_label_map_to_categories(label_map, max_num_classes=6, use_display_name=True)
category_index = label_map_util.create_category_index(categories)
# Size, in inches, of the output images.
IMAGE_SIZE = (12, 8)

def eval(layer):
  image_path = os.path.join("canonical", layer + ".png")
  image = Image.open(image_path)
  # the array based representation of the image will be used later in order to prepare the
  # result image with boxes and labels on it.
  image_np = load_image_into_numpy_array(image)
  # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
  image_np_expanded = np.expand_dims(image_np, axis=0)
  # Actual detection.
Beispiel #34
0
import numpy as np
import argparse

ap = argparse.ArgumentParser()
ap.add_argument("-m", "--model", required=True, help="image")
ap.add_argument("-l", "--label_map", required=True, help="image")
args = vars(ap.parse_args())
with tf.gfile.FastGFile(args["model"], 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())

with tf.Session() as sess:
    # Restore session
    sess.graph.as_default()
    tf.import_graph_def(graph_def, name='')
    label_map = label_map_util.load_labelmap(args["label_map"])
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=3, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)
    # Read and preprocess an image.
    #img = cv.imread('example2.jpg')
    ##rows = img.shape[0]
    #cols = img.shape[1]
    #inp = cv.resize(img, (300, 300))
    #inp = inp[:, :, [2, 1, 0]]  # BGR2RGB
    vs = VideoStream(src=0).start()
    time.sleep(2.0)
    fps = FPS().start()
    while True:
        # grab the frame from the threaded video stream and resize it
        # to have a maximum width of 400 pixels
Beispiel #35
0
def setup_platform(hass, config, add_entities, discovery_info=None):
    """Set up the TensorFlow image processing platform."""
    model_config = config.get(CONF_MODEL)
    model_dir = model_config.get(CONF_MODEL_DIR) \
        or hass.config.path('tensorflow')
    labels = model_config.get(CONF_LABELS) \
        or hass.config.path('tensorflow', 'object_detection',
                            'data', 'mscoco_label_map.pbtxt')

    # Make sure locations exist
    if not os.path.isdir(model_dir) or not os.path.exists(labels):
        _LOGGER.error("Unable to locate tensorflow models or label map")
        return

    # append custom model path to sys.path
    sys.path.append(model_dir)

    try:
        # Verify that the TensorFlow Object Detection API is pre-installed
        # pylint: disable=unused-import,unused-variable
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
        import tensorflow as tf # noqa
        from object_detection.utils import label_map_util # noqa
    except ImportError:
        # pylint: disable=line-too-long
        _LOGGER.error(
            "No TensorFlow Object Detection library found! Install or compile "
            "for your system following instructions here: "
            "https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/installation.md") # noqa
        return

    try:
        # Display warning that PIL will be used if no OpenCV is found.
        # pylint: disable=unused-import,unused-variable
        import cv2 # noqa
    except ImportError:
        _LOGGER.warning(
            "No OpenCV library found. TensorFlow will process image with "
            "PIL at reduced resolution")

    # setup tensorflow graph, session, and label map to pass to processor
    # pylint: disable=no-member
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(model_config.get(CONF_GRAPH), 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    session = tf.Session(graph=detection_graph)
    label_map = label_map_util.load_labelmap(labels)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=90, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    entities = []

    for camera in config[CONF_SOURCE]:
        entities.append(TensorFlowImageProcessor(
            hass, camera[CONF_ENTITY_ID], camera.get(CONF_NAME),
            session, detection_graph, category_index, config))

    add_entities(entities)
Beispiel #36
0
    detection_thresh = cfg['pred_thresh']
    num_classes = cfg['pred_classes']
    csv_path = cfg['pred_csv_path']
    images_path = cfg['pred_image_path']
    dataset_name = cfg['pred_dataset_name']
    project_name = cfg['pred_project_name']
    client = GraphQLClient('https://api.labelbox.com/graphql')
    client.inject_token('Bearer ' + cfg['pred_api_key'])
    model_name = cfg['pred_model']
    classes_filter = cfg['pred_classes_filter']
    begin = cfg['pred_begin']
    end = cfg['pred_end']

    cv_bridge = CvBridge()

    label_map = label_map_util.load_labelmap("models/" + model_name +
                                             "/label_map.pbtxt")
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=num_classes, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.GraphDef()
        with tf.gfile.GFile(
                "models/" + model_name + "/frozen_inference_graph.pb",
                'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

        config = tf.ConfigProto()
Beispiel #37
0
OBJECT_DETECTION_LABELS_PATH = OBJECT_DETECTION_MODEL_PATH + object_detection_model_name + '/oid_v5_label_map_customised.pbtxt'

# define the max number of classes of objects to be detected
object_detection_setting_file.readline()
max_num_classes_object = int(object_detection_setting_file.readline())

# define which classes of objects to be detected
selected_classes_object = []
object_detection_setting_file.readline()
for i in range(max_num_classes_object):
    object_detection_setting_file.readline()
    class_setting = int(object_detection_setting_file.readline())
    if class_setting == 1:
        selected_classes_object.append(i + 1)

label_map_object = label_map_util.load_labelmap(OBJECT_DETECTION_LABELS_PATH)
categories_object = label_map_util.convert_label_map_to_categories(
    label_map_object,
    max_num_classes=max_num_classes_object,
    use_display_name=True)
category_index_object = label_map_util.create_category_index(categories_object)

# load the object detection model into memory
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(OBJECT_DETECTION_CKPT_PATH, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')
sess_object = tf.Session(graph=detection_graph)
def detect_images(img_paths,
                  save_detected_images=False,
                  detection_threshold=0.5):

    # Define the video stream
    #cap = cv2.VideoCapture(0)  # Change only if you have more than one webcams
    print(f'TensorFlow version {tf.__version__}')

    # What model to download.
    # Models can bee found here: https://github.com/tensorflow/models/blob/master/research/object_detection/g3doc/detection_model_zoo.md
    # {model name for downloading} {model name} {speed in ms} {detection in COCO measurement units}
    #MODEL_NAME = 'ssd_inception_v2_coco_2017_11_17' # ssd_inception_v2_coco 42ms 24COCO mAP
    #MODEL_NAME = 'ssd_resnet50_v1_fpn_shared_box_predictor_640x640_coco14_sync_2018_07_03' # slower than ssd_inception_v2_coco_2017_11_17 model, same detection #ssd_resnet_50_fpn_coco ☆76ms 	35 COCO mAP
    MODEL_NAME = 'ssdlite_mobilenet_v2_coco_2018_05_09'  # fastest # same detection as ssd_inception_v2_coco_2017_11_17 #ssdlite_mobilenet_v2_coco 27ms	22 COCO mAP[^1]
    #MODEL_NAME = 'faster_rcnn_nas_coco_2018_01_28' # faster_rcnn_nas 1833ms 43 COCO mAP # DOES NOT WORK, it gets killed for some unknown reason

    MODEL_FILE = MODEL_NAME + '.tar.gz'
    DOWNLOAD_BASE = 'http://download.tensorflow.org/models/object_detection/'

    # Path to frozen detection graph. This is the actual model that is used for the object detection.
    PATH_TO_CKPT = MODEL_NAME + '/frozen_inference_graph.pb'

    # List of the strings that is used to add correct label for each box.
    path_to_research_folder = "/home/nikola/Git/models/research/object_detection/data/"
    PATH_TO_LABELS = os.path.join(
        'data', path_to_research_folder + 'mscoco_label_map.pbtxt')

    # Number of classes to detect
    NUM_CLASSES = 90

    # Download Model
    if not os.path.exists(MODEL_FILE):
        print(f"Downloading {MODEL_NAME} model...")
        opener = urllib.request.URLopener()
        opener.retrieve(DOWNLOAD_BASE + MODEL_FILE, MODEL_FILE)
        tar_file = tarfile.open(MODEL_FILE)
        for file in tar_file.getmembers():
            file_name = os.path.basename(file.name)
            if 'frozen_inference_graph.pb' in file_name:
                tar_file.extract(file, os.getcwd())
    else:
        print(f"Model {MODEL_NAME} already downloaded")

    # Load a (frozen) Tensorflow model into memory.
    detection_graph = tf.Graph()
    with detection_graph.as_default():
        od_graph_def = tf.compat.v1.GraphDef()
        with tf.io.gfile.GFile(PATH_TO_CKPT, 'rb') as fid:
            serialized_graph = fid.read()
            od_graph_def.ParseFromString(serialized_graph)
            tf.import_graph_def(od_graph_def, name='')

    # Loading label map
    # Label maps map indices to category names, so that when our convolution network predicts `5`, we know that this corresponds to `airplane`.  Here we use internal utility functions, but anything that returns a dictionary mapping integers to appropriate string labels would be fine
    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)
    categories = label_map_util.convert_label_map_to_categories(
        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)
    category_index = label_map_util.create_category_index(categories)

    # Helper code
    def load_image_into_numpy_array(image):
        (im_width, im_height) = image.size
        return np.array(image.getdata()).reshape(
            (im_height, im_width, 3)).astype(np.uint8)

    def load_images(img_paths):
        ''' Load images via generator for less memory usage '''

        for img_path in img_paths:
            if not os.path.exists(img_path):
                print(
                    f"File could not be found. Check path and file extension. Entered path is {img_path}"
                )
                exit(0)

            if not os.path.isfile(img_path):
                print(
                    f"File is not a valid file. Check path and file extension. Entered path is {img_path}"
                )
                exit(0)

            #width, height =  img.size[0], img.size[1]
            #print('Frame size: width, height:', width, height)
            yield Image.open(img_path)

    # Detection
    with detection_graph.as_default():
        with tf.compat.v1.Session(graph=detection_graph) as sess:
            for counter, img in enumerate(load_images(img_paths), 1):

                if img is None:
                    print("Image is None")
                    exit(0)

                image_np = load_image_into_numpy_array(img)
                #image_np = load_image_into_numpy_array(image_np)
                #cv2.imshow('Loaded image', image_np)
                #cv2.waitKey(0)

                # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
                image_np_expanded = np.expand_dims(image_np, axis=0)

                # Extract image tensor
                image_tensor = detection_graph.get_tensor_by_name(
                    'image_tensor:0')
                # Extract detection boxes
                boxes = detection_graph.get_tensor_by_name('detection_boxes:0')
                # Extract detection scores
                scores = detection_graph.get_tensor_by_name(
                    'detection_scores:0')
                # Extract detection classes
                classes = detection_graph.get_tensor_by_name(
                    'detection_classes:0')
                # Extract number of detectionsd
                num_detections = detection_graph.get_tensor_by_name(
                    'num_detections:0')
                # Actual detection.
                (boxes, scores, classes, num_detections) = sess.run(
                    [boxes, scores, classes, num_detections],
                    feed_dict={image_tensor: image_np_expanded})
                # Visualization of the results of a detection.
                vis_util.visualize_boxes_and_labels_on_image_array(
                    image_np,
                    np.squeeze(boxes),
                    np.squeeze(classes).astype(np.int32),
                    np.squeeze(scores),
                    category_index,
                    use_normalized_coordinates=True,
                    line_thickness=4,
                    min_score_thresh=.5)

                # Print detected classes (above threshold level) # TODO: Count the same classes
                class_names = [
                    category_index[int(i)]['name'] for i in classes[0]
                ]
                above_threshold_scores = [
                    x for x in scores[0] if x > detection_threshold
                ]
                print(
                    f"Detected classes: {list(zip(class_names, above_threshold_scores))}"
                )

                img_filename_with_ext = img.filename.split('/')[-1]
                filename, file_ext = img_filename_with_ext.split(
                    '.')[0], img.format

                # Print current progress
                print_progress_bar(
                    counter,
                    len(img_paths),
                    prefix=f'Detecting image {img_filename_with_ext}')

                # Display output
                #cv2.imshow(f"{img_filename_with_ext} (press 'q' to exit)", cv2.resize(image_np, (800, 600)))

                # Save output
                if save_detected_images:
                    img_save_path = str(filename + '_detected_output(' +
                                        str(counter) + ').' + file_ext)
                    print(f'Saving detected output image to {img_save_path}')
                    ret = cv2.imwrite(img_save_path, image_np)

                    if ret == False:
                        print(f'Warning. imwrite returned: {ret}')
Beispiel #39
0
  def test_load_bad_label_map(self):
    label_map_string = """
      item {
        id:0
        name:'class that should not be indexed at zero'
      }
      item {
        id:2
        name:'cat'
      }
      item {
        id:1
        name:'dog'
      }
    """
    label_map_path = os.path.join(self.get_temp_dir(), 'label_map.pbtxt')
    with tf.gfile.Open(label_map_path, 'wb') as f:
      f.write(label_map_string)

    with self.assertRaises(ValueError):
      label_map_util.load_labelmap(label_map_path)

    def test_keep_categories_with_unique_id(self):
    label_map_proto = string_int_label_map_pb2.StringIntLabelMap()
    label_map_string = """
      item {
        id:2
        name:'cat'
      }
      item {
        id:1
        name:'child'
      }
      item {
        id:1
        name:'person'
      }
      item {
        id:1
        name:'n00007846'
      }
    """
    text_format.Merge(label_map_string, label_map_proto)
    categories = label_map_util.convert_label_map_to_categories(
        label_map_proto, max_num_classes=3)
    self.assertListEqual([{
        'id': 2,
        'name': u'cat'
    }, {
        'id': 1,
        'name': u'child'
    }], categories)

  def test_convert_label_map_to_categories_no_label_map(self):
    categories = label_map_util.convert_label_map_to_categories(
        None, max_num_classes=3)
    expected_categories_list = [{
        'name': u'category_1',
        'id': 1
    }, {
        'name': u'category_2',
        'id': 2
    }, {
        'name': u'category_3',
        'id': 3
    }]
    self.assertListEqual(expected_categories_list, categories)

  def test_convert_label_map_to_coco_categories(self):
    label_map_proto = self._generate_label_map(num_classes=4)
    categories = label_map_util.convert_label_map_to_categories(
        label_map_proto, max_num_classes=3)
    expected_categories_list = [{
        'name': u'1',
        'id': 1
    }, {
        'name': u'2',
        'id': 2
    }, {
        'name': u'3',
        'id': 3
    }]
    self.assertListEqual(expected_categories_list, categories)

  def test_convert_label_map_to_coco_categories_with_few_classes(self):
    label_map_proto = self._generate_label_map(num_classes=4)
    cat_no_offset = label_map_util.convert_label_map_to_categories(
        label_map_proto, max_num_classes=2)
    expected_categories_list = [{
        'name': u'1',
        'id': 1
    }, {
        'name': u'2',
        'id': 2
    }]
    self.assertListEqual(expected_categories_list, cat_no_offset)

  def test_create_category_index(self):
    categories = [{'name': u'1', 'id': 1}, {'name': u'2', 'id': 2}]
    category_index = label_map_util.create_category_index(categories)
    self.assertDictEqual({
        1: {
            'name': u'1',
            'id': 1
        },
        2: {
            'name': u'2',
            'id': 2
        }
    }, category_index)
Beispiel #40
0
    for file in files:
        if file.endswith(".jpg"):
            images.append(os.path.join(root, file))
images.sort()

#Load a (frozen) Tensorflow model into memory.
detection_graph = tf.Graph()
with detection_graph.as_default():
    od_graph_def = tf.GraphDef()
    with tf.gfile.GFile(model_path, 'rb') as fid:
        serialized_graph = fid.read()
        od_graph_def.ParseFromString(serialized_graph)
        tf.import_graph_def(od_graph_def, name='')

#Load Label Map
label_map = label_map_util.load_labelmap(label_path)
categories = label_map_util.convert_label_map_to_categories(
    label_map, max_num_classes=num_classes, use_display_name=True)
category_index = label_map_util.create_category_index(categories)

#Detection
with detection_graph.as_default():
    with tf.Session(graph=detection_graph) as sess:
        # Definite input and output Tensors for detection_graph
        image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')
        # Each box represents a part of the image where a particular object was detected.
        detection_boxes = detection_graph.get_tensor_by_name(
            'detection_boxes:0')
        # Each score represent how level of confidence for each of the objects.
        # Score is shown on the result image, together with the class label.
        detection_scores = detection_graph.get_tensor_by_name(
Beispiel #41
0
def main(_):
  assert FLAGS.train_dir, '`train_dir` is missing.'
  assert FLAGS.pipeline_config_path, '`pipeline_config_path` is missing'
  assert FLAGS.eval_dir, '`eval_dir` is missing.'

  configs = config_util.get_configs_from_pipeline_file(
      FLAGS.pipeline_config_path)
  if FLAGS.task == 0:
    tf.gfile.MakeDirs(FLAGS.train_dir)
    tf.gfile.Copy(FLAGS.pipeline_config_path,
                  os.path.join(FLAGS.train_dir, 'pipeline.config'),
                  overwrite=True)

  tf.gfile.MakeDirs(FLAGS.eval_dir)
  tf.gfile.Copy(FLAGS.pipeline_config_path,
                os.path.join(FLAGS.eval_dir, 'pipeline.config'),
                overwrite=True)

  model_config = configs['model']

  train_config = configs['train_config']
  train_input_config = configs['train_input_config']

  eval_config = configs['eval_config']
  if FLAGS.eval_training_data:
    eval_input_config = configs['train_input_config']
  else:
    eval_input_config = configs['eval_input_config']

  # setting to run evaluation after EPOCHS_BETWEEN_EVALS epochs of training.
  # total number of training is set to total_num_epochs provided in the config
  if train_config.num_steps:
    total_num_epochs = train_config.num_steps
    train_config.num_steps = FLAGS.epochs_between_evals
    total_training_cycle = total_num_epochs // train_config.num_steps
  else:
    # TODO(mehdi): make it run indef
    total_num_epochs = 20000000
    train_config.num_steps = FLAGS.epochs_between_evals
    total_training_cycle = total_num_epochs // train_config.num_steps

  train_model_fn = functools.partial(model_builder.build,
                                     model_config=model_config,
                                     is_training=True)
  eval_model_fn = functools.partial(model_builder.build,
                                    model_config=model_config,
                                    is_training=False)

  def get_next(config):
    return dataset_util.make_initializable_iterator(
        dataset_builder.build(config)).get_next()

  # functions to create a tensor input dictionary for both training & evaluation
  train_input_dict_fn = functools.partial(get_next, train_input_config)
  eval_input_dict_fn = functools.partial(get_next, eval_input_config)

  # If not explicitly specified in the constructor and the TF_CONFIG
  # environment variable is present, load cluster_spec from TF_CONFIG.
  env = json.loads(os.environ.get('TF_CONFIG', '{}'))
  cluster_data = env.get('cluster', None)
  cluster = tf.train.ClusterSpec(cluster_data) if cluster_data else None
  task_data = env.get('task', {'type': 'master', 'index': 0})
  task_info = type('TaskSpec', (object,), task_data)

  # Parameters for a single worker.
  parameter_server_tasks = 0
  worker_replicas = 1
  worker_job_name = 'lonely_worker'
  task = 0
  is_chief = True
  master = ''

  if cluster_data and 'worker' in cluster_data:
    # Number of total worker replicas include "worker"s and the "master".
    worker_replicas = len(cluster_data['worker']) + 1
  if cluster_data and 'ps' in cluster_data:
    parameter_server_tasks = len(cluster_data['ps'])

  if worker_replicas > 1 and parameter_server_tasks < 1:
    raise ValueError('At least 1 ps task is needed for distributed training.')

  if worker_replicas >= 1 and parameter_server_tasks > 0:
    # Set up distributed training.
    server = tf.train.Server(tf.train.ClusterSpec(cluster), protocol='grpc',
                             job_name=task_info.type,
                             task_index=task_info.index)
    if task_info.type == 'ps':
      server.join()
      return

    worker_job_name = '%s/task:%d' % (task_info.type, task_info.index)
    task = task_info.index
    is_chief = (task_info.type == 'master')
    master = server.target

  label_map = label_map_util.load_labelmap(eval_input_config.label_map_path)
  max_num_classes = max([item.id for item in label_map.item])
  categories = label_map_util.convert_label_map_to_categories(label_map,
                                                              max_num_classes)

  if FLAGS.run_once:
    eval_config.max_evals = 1

  train_graph_rewriter_fn = eval_graph_rewriter_fn = None
  if 'graph_rewriter_config' in configs:
    train_graph_rewriter_fn = graph_rewriter_builder.build(
        configs['graph_rewriter_config'], is_training=True)
    eval_graph_rewriter_fn = graph_rewriter_builder.build(
        configs['eval_rewriter_config'], is_training=False)

  def train():
    return trainer.train(create_tensor_dict_fn=train_input_dict_fn,
                         create_model_fn=train_model_fn,
                         train_config=train_config, master=master, task=task,
                         num_clones=FLAGS.num_clones,
                         worker_replicas=worker_replicas,
                         clone_on_cpu=FLAGS.clone_on_cpu,
                         ps_tasks=parameter_server_tasks,
                         worker_job_name=worker_job_name,
                         is_chief=is_chief, train_dir=FLAGS.train_dir,
                         graph_hook_fn=train_graph_rewriter_fn)

  def evaluate():
    return evaluator.evaluate(eval_input_dict_fn, eval_model_fn, eval_config,
                              categories, FLAGS.train_dir, FLAGS.eval_dir,
                              graph_hook_fn=eval_graph_rewriter_fn)

  for cycle_index in range(total_training_cycle):
    tf.logging.info('Starting a training cycle: %d/%d',
                    cycle_index, total_training_cycle)
    train()
    tf.logging.info('Starting to evaluate.')
    eval_metrics = evaluate()
    if stopping_criteria_met(eval_metrics, FLAGS.mask_min_ap, FLAGS.box_min_ap):
      tf.logging.info('Stopping criteria met. Training stopped')
      break
Beispiel #42
0
    def __init__(self, model_name, label_file='data/mscoco_label_map.pbtxt'):
        # Initialize some variables
        print("ObjectDetector('%s', '%s')" % (model_name, label_file))
        self.process_this_frame = True

        # download model
        self.graph_file = model_name + '/' + self.GRAPH_FILE_NAME
        if not os.path.isfile(self.graph_file):
            self.download_model(model_name)

        # Load a (frozen) Tensorflow model into memory.
        self.detection_graph = tf.Graph()
        with self.detection_graph.as_default():
            od_graph_def = tf.GraphDef()
            with tf.gfile.GFile(self.graph_file, 'rb') as fid:
                serialized_graph = fid.read()
                od_graph_def.ParseFromString(serialized_graph)
                tf.import_graph_def(od_graph_def, name='')

            graph = self.detection_graph

            ops = graph.get_operations()
            all_tensor_names = {
                output.name
                for op in ops for output in op.outputs
            }
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
            ]:
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = graph.get_tensor_by_name(tensor_name)

            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                                             [0])
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                                             [0])
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                                             tf.int32)
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, 480, 640)
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)

            self.tensor_dict = tensor_dict

        self.sess = tf.Session(graph=self.detection_graph)

        # Loading label map
        # Label maps map indices to category names,
        # so that when our convolution network predicts `5`,
        # we know that this corresponds to `airplane`.
        # Here we use internal utility functions,
        # but anything that returns a dictionary mapping integers to appropriate string labels would be fine
        label_map = label_map_util.load_labelmap(label_file)
        categories = label_map_util.convert_label_map_to_categories(
            label_map, max_num_classes=self.NUM_CLASSES, use_display_name=True)
        self.category_index = label_map_util.create_category_index(categories)
        self.output_dict = None

        self.last_inference_time = 0