예제 #1
 def testZeroImageOnEmptyMask(self):
   box_masks = tf.constant([[[0, 0],
                             [0, 0]]], dtype=tf.float32)
   boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32)
   image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
   np_expected_image_masks = np.array([[[0, 0, 0, 0],
                                        [0, 0, 0, 0],
                                        [0, 0, 0, 0],
                                        [0, 0, 0, 0]]], dtype=np.float32)
   with self.test_session() as sess:
     np_image_masks = sess.run(image_masks)
     self.assertAllClose(np_image_masks, np_expected_image_masks)
예제 #2
 def testMaskIsCenteredInImageWhenBoxIsCentered(self):
   box_masks = tf.constant([[[1, 1],
                             [1, 1]]], dtype=tf.float32)
   boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32)
   image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
   np_expected_image_masks = np.array([[[0, 0, 0, 0],
                                        [0, 1, 1, 0],
                                        [0, 1, 1, 0],
                                        [0, 0, 0, 0]]], dtype=np.float32)
   with self.test_session() as sess:
     np_image_masks = sess.run(image_masks)
     self.assertAllClose(np_image_masks, np_expected_image_masks)
예제 #3
 def testMaskOffCenterRemainsOffCenterInImage(self):
   box_masks = tf.constant([[[1, 0],
                             [0, 1]]], dtype=tf.float32)
   boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32)
   image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes,
   np_expected_image_masks = np.array([[[0, 0, 0, 0],
                                        [0, 0, 0.6111111, 0.16666669],
                                        [0, 0, 0.3888889, 0.83333337],
                                        [0, 0, 0, 0]]], dtype=np.float32)
   with self.test_session() as sess:
     np_image_masks = sess.run(image_masks)
     self.assertAllClose(np_image_masks, np_expected_image_masks)
def run_inference_for_single_image(image, graph):
    with graph.as_default():

        ops = tf.get_default_graph().get_operations()
        all_tensor_names = {output.name for op in ops for output in op.outputs}
        tensor_dict = {}
        for key in [
                'num_detections', 'detection_boxes', 'detection_scores',
                'detection_classes', 'detection_masks'
            tensor_name = key + ':0'
            if tensor_name in all_tensor_names:
                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
        if 'detection_masks' in tensor_dict:

            detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
            detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
            real_num_detection = tf.cast(tensor_dict['num_detections'][0],
            detection_boxes = tf.slice(detection_boxes, [0, 0],
                                       [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                       [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, image.shape[0],
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)
        image_tensor = tf.get_default_graph().get_tensor_by_name(

        output_dict = sess.run(
            tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)})

        output_dict['num_detections'] = int(output_dict['num_detections'][0])
        output_dict['detection_classes'] = output_dict['detection_classes'][
        output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
        output_dict['detection_scores'] = output_dict['detection_scores'][0]
        if 'detection_masks' in output_dict:
            output_dict['detection_masks'] = output_dict['detection_masks'][0]
    return output_dict
예제 #5
def run_inference_for_single_image(model, image):
    """Runs detection on a single image
        model (model): Model
        image (byte): Numpy image array
        dict: output_dict with labels/confidence
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]

    # Run inference
    output_dict = model(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {
        key: value[0, :num_detections].numpy()
        for key, value in output_dict.items()
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(

    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
            'detection_masks_reframed'] = detection_masks_reframed.numpy()

    return output_dict
예제 #6
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
      if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: np.expand_dims(image, 0)})

      # all outputs are float32 numpy arrays, so convert types as appropriate
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict
def run_inference_for_single_image(image, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
      if 'detection_masks' in tensor_dict:
        # individual image processing
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[1], image.shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Batch Dim
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

      # Processing inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: image})

      # numpy arrays
      output_dict['num_detections'] = int(output_dict['num_detections'][0])
      output_dict['detection_classes'] = output_dict[
      output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
      output_dict['detection_scores'] = output_dict['detection_scores'][0]
      if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
  return output_dict
예제 #8
def expand_this_graph_by_process_detection_masks(tensor_dict, image_H,
    # The following processing is only for single image
    detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
    detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
    real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
    detection_boxes = tf.slice(detection_boxes, [0, 0],
                               [real_num_detection, -1])
    detection_masks = tf.slice(detection_masks, [0, 0, 0],
                               [real_num_detection, -1, -1])
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_H, image_W)
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    # Follow the convention by adding back the batch dimension
    tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed,
예제 #9
def detection(model):
    # the array based representation of the image will be used later in order to prepare the
    # result image with boxes and labels on it.
    while True:
        ret, image_np = cap.read()
        input_tensor = tf.convert_to_tensor(image_np)
        input_tensor = input_tensor[tf.newaxis, ...]
        # Run inference
        model_fn = model.signatures['serving_default']
        output_dict = model_fn(input_tensor)
        # All outputs are batches tensors.
        # Convert to numpy arrays, and take index [0] to remove the batch dimension.
        # We're only interested in the first num_detections.
        num_detections = int(output_dict.pop('num_detections'))
        output_dict = {
            key: value[0, :num_detections].numpy()
            for key, value in output_dict.items()
        output_dict['num_detections'] = num_detections
        # detection_classes should be ints.
        output_dict['detection_classes'] = output_dict[
        # Handle models with masks:
        if 'detection_masks' in output_dict:
            # Reframe the the bbox mask to the image size.
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                output_dict['detection_masks'], output_dict['detection_boxes'],
                image.shape[0], image.shape[1])
            detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                'detection_masks_reframed'] = detection_masks_reframed.numpy()
            instance_masks=output_dict.get('detection_masks_reframed', None),
        cv2.imshow('object detection', cv2.resize(image_np, (800, 600)))
        if cv2.waitKey(25) & 0xFF == ord('q'):
예제 #10
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]

    # Run inference
    # start = time.time_ns()
    start = time.time()
    output_dict = model(input_tensor)
    # end = time.time_ns()
    end = time.time()
    # print("Inference time: {}".format((end-start)/(10**9)))
    print("Inference time: {} s".format(end - start))

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {
        key: value[0, :num_detections].numpy()
        for key, value in output_dict.items()
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(

    # return output_dict

    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
            'detection_masks_reframed'] = detection_masks_reframed.numpy()

    return output_dict
예제 #11
def get_output_dict(image, detection_graph, sess):
    with detection_graph.as_default():
        ops = tf.get_default_graph().get_operations()
        all_tensor_names = {output.name for op in ops for output in op.outputs}
        tensor_dict = {}
        for key in [
            'num_detections', 'detection_boxes', 'detection_scores',
            'detection_classes', 'detection_masks'
            tensor_name = key + ':0'
            if tensor_name in all_tensor_names:
                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        # real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        real_num_detection = tf.cast(100, tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_scores = tf.slice(tensor_dict['detection_scores'],  [0, 0], [-1, real_num_detection])
        detection_classes = tf.slice(tensor_dict['detection_classes'],  [0, 0], [-1, real_num_detection])
        tensor_dict['detection_scores'] = detection_scores
        tensor_dict['detection_classes'] = detection_classes
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[1], image.shape[2])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
        image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
        # Run inference
        output_dict = sess.run(tensor_dict,
                               feed_dict={image_tensor: image})

        # all outputs are float32 numpy arrays, so convert types as appropriate
        output_dict['num_detections'] = int(output_dict['num_detections'][0])
        output_dict['detection_classes'] = output_dict[
        output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
        output_dict['detection_scores'] = output_dict['detection_scores'][0]
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
    return output_dict
예제 #12
def run_inference_for_single_image(model, image):
  image = np.asarray(image)
  input_tensor = tf.convert_to_tensor(image)
  input_tensor = input_tensor[tf.newaxis,...]

  output_dict = model(input_tensor)
  num_detections = int(output_dict.pop('num_detections'))
  output_dict = {key:value[0, :num_detections].numpy() 
                 for key,value in output_dict.items()}
  output_dict['num_detections'] = num_detections
  output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)
  if 'detection_masks' in output_dict:
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
              output_dict['detection_masks'], output_dict['detection_boxes'],
               image.shape[0], image.shape[1])      
    detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
    output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()
  return output_dict
예제 #13
    def run_inference_for_single_image(self, image):
        image = np.asarray(image)
        input_tensor = tf.convert_to_tensor(image)
        input_tensor = input_tensor[tf.newaxis, ...]

        # Run inference
        model_fn = self.model.signatures['serving_default']
        output_dict = model_fn(input_tensor)
        # All outputs are batches tensors.
        num_detections = int(output_dict.pop('num_detections'))
        output_dict = {key: value[0, :num_detections].numpy()
                       for key, value in output_dict.items()}

        output_dict['num_detections'] = num_detections

        # detection_classes should be ints.
        output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64)

        # Handle models with masks:
        if 'detection_masks' in output_dict:
            # Reframe the the bbox mask to the image size.
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                output_dict['detection_masks'], output_dict['detection_boxes'],
                image.shape[0], image.shape[1])
            detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
            output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy()

        # We only want to detect cats.
        box = np.asarray(output_dict['detection_boxes'])
        cls = np.asarray(output_dict['detection_classes']).astype(np.int64)
        scr = np.asarray(output_dict['detection_scores'])
        boxes = [] 
        for i in range(1, num_detections):    
            if cls[i] == 17:
        output_dict['detection_boxes'] = np.array(boxes)
        bl = (cls == 17)
        output_dict['detection_classes'] = np.extract(bl, cls)
        output_dict['detection_scores'] = np.extract(bl, scr)

        return output_dict
예제 #14
    def load_tensors(self, image_shape):
        with self.graph.as_default():
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                for op in ops for output in op.outputs
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image_shape[0],
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(

            self.tensor_dict = tensor_dict
            self.image_tensor = image_tensor
예제 #15
def detection(image, sess):
    ops = detection_graph.get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    tensor_dict = {}
    for key in ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks']:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
            tensor_dict[key] = detection_graph.get_tensor_by_name(tensor_name)

    if 'detection_masks' in tensor_dict:
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(detection_masks, detection_boxes, image.shape[1], image.shape[2])
        detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)

    return tensor_dict
def run_inference_for_single_image(image, graph):
    if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0],
        detection_boxes = tf.slice(detection_boxes, [0, 0],
                                   [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                   [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
            tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(
            detection_masks_reframed, 0)
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    # Run inference
    output_dict = sess.run(tensor_dict,
                           feed_dict={image_tensor: np.expand_dims(image, 0)})

    # all outputs are float32 numpy arrays, so convert types as appropriate
    output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict['detection_classes'] = output_dict['detection_classes'][
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]

    #print("Top (min 0.0): " + str(output_dict['detection_boxes'][0][0]))
    #print("Bot (max 1): " + str(output_dict['detection_boxes'][0][2]))
    #print("Left Bar: " + str(output_dict['detection_boxes'][0][1]))
    #print("Right Bar: " + str(output_dict['detection_boxes'][0][3]))

    output_dict['detection_scores'] = output_dict['detection_scores'][0]
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0]
    return output_dict
예제 #17
    def inference_image(self, image):
        # image = np.asarray(image)

        # 입력은 텐서여야하며 'tf.convert_to_tensor'를 사용하여 변환
        input_tensor = tf.convert_to_tensor(image)

        # 모델은 이미지 배치를 예상하므로 tf.newaxis로 축을 추가
        input_tensor = input_tensor[tf.newaxis, ...]

        # 추론 실행
        model_fn = self.model.signatures['serving_default']
        output_dict = model_fn(input_tensor)

        # 모든 출력은 배치 텐서
        # numpy 배열로 변환하고 인덱스 [0]을 사용하여 배치 차원을 제거
        # 우리는 처음 num_detections에만 관심이 있음
        num_detections = int(output_dict.pop('num_detections'))
        output_dict = {
            key: value[0, :num_detections].numpy()
            for key, value in output_dict.items()
        output_dict['num_detections'] = num_detections

        # detection_classes should be ints.
        output_dict['detection_classes'] = output_dict[

        # Handle models with masks:
        if 'detection_masks' in output_dict:
            # Reframe the the bbox mask to the image size.
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                output_dict['detection_masks'], output_dict['detection_boxes'],
                image.shape[0], image.shape[1])
            detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
                'detection_masks_reframed'] = detection_masks_reframed.numpy()

        return output_dict
예제 #18
def run_inference(images, graph):
  with graph.as_default():
    with tf.Session() as sess:
      # Get handles to input and output tensors
      ops = tf.get_default_graph().get_operations()
      all_tensor_names = {output.name for op in ops for output in op.outputs}
      tensor_dict = {}
      for key in [
          'num_detections', 'detection_boxes', 'detection_scores',
          'detection_classes', 'detection_masks'
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
          tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
      if 'detection_masks' in tensor_dict:
          for i in range(len(images)):
            detection_boxes = tensor_dict['detection_boxes'][i]
            detection_masks = tensor_dict['detection_masks'][i]
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][i], tf.int32)
            detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, images.shape[1], images.shape[2])
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            tensor_dict['detection_masks'+str(i)] = detection_masks_reframed

      image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')
      # Run inference
      output_dict = sess.run(tensor_dict,
                             feed_dict={image_tensor: images})
      output_dict['num_detections'] = output_dict['num_detections'].astype(np.int64)
      output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.uint8)

  return output_dict
예제 #19
    def predict(self, input_image, size=None, threshold=0.5, top_k=10):
        img_arr, img_width, img_height, scale_factor = self._process_input_image(
            input_image, size)
        output_dict = self.output_dict(img_arr)
        boxes = output_dict["detection_boxes"]
        scores = output_dict["detection_scores"]
        classes = output_dict["detection_classes"].astype(np.int64)
        num_detections = output_dict['num_detections']
        masks = [None for _ in range(num_detections)]
        predictions = []
        if 'detection_masks' in output_dict:
            # get masks
            masks = output_dict["detection_masks"]
            # adjust mask coordinates based on the images dimensions
            masks = utils_ops.reframe_box_masks_to_image_masks(
                masks, boxes, img_height, img_width)
            # check eager execution mode
            if tf.executing_eagerly():
                masks = tf.cast(masks > threshold, tf.uint8).numpy()
                masks_tensor = tf.cast(masks > threshold, tf.uint8)
                masks = masks_tensor.eval(session=tf.Session())

        for box, mask, score, label in zip(boxes, masks, scores, classes):
            if score >= threshold:
                if self._labels_map_dict and label in self._labels_map_dict:
                    label = self._labels_map_dict[label]["name"]
                start_y, start_x, end_y, end_x = box
                start_x = int(start_x * img_width)
                start_y = int(start_y * img_height)
                end_x = int(end_x * img_width)
                end_y = int(end_y * img_height)
                    BoundingBox(start_x, start_y, end_x, end_y, label,
                                round(float(score), 2), scale_factor, mask))
        if len(predictions) > 0 and len(predictions) > top_k:
            predictions = predictions[:top_k]
        return img_arr, predictions
예제 #20
def detect_objects_in_image(image_np, detect_fn):
	input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32)

	detections = detect_fn(input_tensor)

	# All outputs are batches tensors.
	# Convert to numpy arrays, and take index [0] to remove the batch dimension.
	# We're only interested in the first num_detections.
	num_detections = int(detections.pop('num_detections'))

	detections = dict(itertools.islice(detections.items(), num_detections))
	detections['num_detections'] = num_detections
	# Handle models with masks:
	if "detection_masks" in detections:
			# Reframe the the bbox mask to the image size.
			detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
						detections["detection_masks"][0], detections["detection_boxes"][0],
						image_np.shape[0], image_np.shape[1])      
			detections['detection_masks_reframed'] = tf.cast(detection_masks_reframed > 0.5,

	return detections
예제 #21
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor, convert it using `tf.convert_to_tensor`.
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so add an axis with `tf.newaxis`.
    input_tensor = input_tensor[tf.newaxis, ...]
    tensorboard_callback = keras.callbacks.TensorBoard(
    # Run inference
    output_dict = model(input_tensor, callback=[tensorboard_callback])

    # All outputs are batches tensors.
    # Convert to numpy arrays, and take index [0] to remove the batch dimension.
    # We're only interested in the first num_detections.
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {
        key: value[0, :num_detections].numpy()
        for key, value in output_dict.items()
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(

    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
            'detection_masks_reframed'] = detection_masks_reframed.numpy()

    return output_dict
예제 #22
    def run_inference_for_single_image(self, image):
        if 'detection_masks' in self.tensor_dict:
            # The following processing is only for single image
            detection_boxes = tf.squeeze(self.tensor_dict['detection_boxes'],
            detection_masks = tf.squeeze(self.tensor_dict['detection_masks'],
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(self.tensor_dict['num_detections'][0],
            detection_boxes = tf.slice(detection_boxes, [0, 0],
                                       [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                       [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, image.shape[0],
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            # Follow the convention by adding back the batch dimension
            self.tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)

        # Run inference
        output_dict = self.sess.run(
            feed_dict={self.image_tensor: np.expand_dims(image, 0)})
        # all outputs are float32 numpy arrays, so convert types as appropriate
        output_dict['num_detections'] = int(output_dict['num_detections'][0])
        output_dict['detection_classes'] = output_dict['detection_classes'][
        output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
        output_dict['detection_scores'] = output_dict['detection_scores'][0]
        if 'detection_masks' in output_dict:
            output_dict['detection_masks'] = output_dict['detection_masks'][0]
        return output_dict
예제 #23
def run_inference_for_single_image(model, image):
    image = np.asarray(image)
    # The input needs to be a tensor
    input_tensor = tf.convert_to_tensor(image)
    # The model expects a batch of images, so adding a useless axis
    input_tensor = input_tensor[tf.newaxis, ...]

    # Run inference
    model_fn = model.signatures['serving_default']
    output_dict = model_fn(input_tensor)

    # All outputs are batches tensors.
    # Convert to numpy arrays, and taking index [0] to remove the batch dimension
    num_detections = int(output_dict.pop('num_detections'))
    output_dict = {
        key: value[0, :num_detections].numpy()
        for key, value in output_dict.items()
    output_dict['num_detections'] = num_detections

    # detection_classes should be ints.
    output_dict['detection_classes'] = output_dict['detection_classes'].astype(

    # Handle models with masks:
    if 'detection_masks' in output_dict:
        # Reframe the the bbox mask to the image size.
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
            output_dict['detection_masks'], output_dict['detection_boxes'],
            image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5,
            'detection_masks_reframed'] = detection_masks_reframed.numpy()

    return output_dict
예제 #24
            tensor_name = key + ':0'
            if tensor_name in all_tensor_names:
                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
        if 'detection_masks' in tensor_dict:
            # The following processing is only for single image
            detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
            detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][0],
            detection_boxes = tf.slice(detection_boxes, [0, 0],
                                       [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                       [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, IMAGE_SIZE[0], IMAGE_SIZE[1])
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            # Follow the convention by adding back the batch dimension
            tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)
        image_tensor = tf.get_default_graph().get_tensor_by_name(

def helloworld():
    return '<h1>Hello World!</h1>'

# 图片上传
예제 #25
def result_dict_for_single_example(image,
  """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

    image: A single 4D uint8 image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks

  label_id_offset = 1  # Applying label id offset (b/63711816)

  input_data_fields = fields.InputDataFields
  output_dict = {
      input_data_fields.original_image: image,
      input_data_fields.key: key,

  detection_fields = fields.DetectionResultFields
  detection_boxes = detections[detection_fields.detection_boxes][0]
  image_shape = tf.shape(image)
  detection_scores = detections[detection_fields.detection_scores][0]

  if class_agnostic:
    detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    detection_classes = (
        tf.to_int64(detections[detection_fields.detection_classes][0]) +

  num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
  detection_boxes = tf.slice(
      detection_boxes, begin=[0, 0], size=[num_detections, -1])
  detection_classes = tf.slice(
      detection_classes, begin=[0], size=[num_detections])
  detection_scores = tf.slice(
      detection_scores, begin=[0], size=[num_detections])

  if scale_to_absolute:
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
    output_dict[detection_fields.detection_boxes] = (
    output_dict[detection_fields.detection_boxes] = detection_boxes
  output_dict[detection_fields.detection_classes] = detection_classes
  output_dict[detection_fields.detection_scores] = detection_scores

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks][0]
    # TODO(rathodv): This should be done in model's postprocess
    # function ideally.
    detection_masks = tf.slice(
        detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_shape[1], image_shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    output_dict[detection_fields.detection_masks] = detection_masks_reframed
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints][0]
    output_dict[detection_fields.detection_keypoints] = detection_keypoints
    if scale_to_absolute:
      absolute_detection_keypoints = keypoint_ops.scale(
          detection_keypoints, image_shape[1], image_shape[2])
      output_dict[detection_fields.detection_keypoints] = (

  if groundtruth:
    if input_data_fields.groundtruth_instance_masks in groundtruth:
      groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
          groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
    if scale_to_absolute:
      groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
      absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
          box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
      output_dict[input_data_fields.groundtruth_boxes] = (
    # For class-agnostic models, groundtruth classes all become 1.
    if class_agnostic:
      groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
      groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
      output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes

  return output_dict
예제 #26
tensor_dict = {}
for key in [
    'num_detections', 'detection_boxes', 'detection_scores',
    'detection_classes', 'detection_masks']:
    tensor_name = key + ':0'
    if tensor_name in all_tensor_names:
        tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
if 'detection_masks' in tensor_dict:
    # The following processing is only for single image
    detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
    detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
    # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
    real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
    detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
    detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
    detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image.shape[1], image.shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
    tensor_dict['detection_masks'] = tf.expand_dims(
        detection_masks_reframed, 0)
image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

cap = cv2.VideoCapture('/share/jproject/sakkas/smoke-detection/videos/FAST FORWARD 102nd Running of the Indianapolis 500.mp4')
total_time = 0
capture_time = 0
preproc_time = 0
infer_time = 0
postproc_time = 0
예제 #27
        for key in [
            'num_detections', 'detection_boxes', 'detection_scores',
            'detection_classes', 'detection_masks'
            tensor_name = key + ':0'
            if tensor_name in all_tensor_names:
                tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
        if 'detection_masks' in tensor_dict:
            detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
            detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
            # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
            real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
            detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
            detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
            detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, height, width)
            detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
            # Follow the convention by adding back the batch dimension
            tensor_dict['detection_masks'] = tf.expand_dims(
                detection_masks_reframed, 0)
        image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

        while True:
            ret, image_np = cap.read()
            if ret == False:
            frame = frame + 1
            print("frame", frame)

            image_np_expanded = np.expand_dims(image_np, axis=0)
예제 #28
def build_inference_graph(image_tensor, inference_graph_path, override_num_detections=None):
  """Loads the inference graph and connects it to the input image.

    image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3]
    inference_graph_path: Path to the inference graph with embedded weights

    detected_boxes_tensor: Detected boxes. Float tensor,
        shape=[num_detections, 4]
    detected_scores_tensor: Detected scores. Float tensor,
    detected_labels_tensor: Detected labels. Int64 tensor,
  with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file:
    graph_content = graph_def_file.read()
  graph_def = tf.GraphDef()

      graph_def, name='', input_map={'image_tensor': image_tensor})

  g = tf.get_default_graph()

  if override_num_detections is not None:
    num_detections_tensor = tf.cast(override_num_detections, tf.int32)
    num_detections_tensor = tf.squeeze(
        g.get_tensor_by_name('num_detections:0'), 0)
    num_detections_tensor = tf.cast(num_detections_tensor, tf.int32)

  detected_boxes_tensor = tf.squeeze(
      g.get_tensor_by_name('detection_boxes:0'), 0)
  detected_boxes_tensor = detected_boxes_tensor[:num_detections_tensor]

  detected_scores_tensor = tf.squeeze(
      g.get_tensor_by_name('detection_scores:0'), 0)
  detected_scores_tensor = detected_scores_tensor[:num_detections_tensor]

  detected_labels_tensor = tf.squeeze(
      g.get_tensor_by_name('detection_classes:0'), 0)
  detected_labels_tensor = tf.cast(detected_labels_tensor, tf.int64)
  detected_labels_tensor = detected_labels_tensor[:num_detections_tensor]

  detected_masks_tensor = tf.squeeze(
      g.get_tensor_by_name('detection_masks:0'), 0)
  # detected_masks_tensor = tf.cast(detected_masks_tensor, tf.int32)

  image_shape = tf.shape(image_tensor)
  detected_masks_tensor = tf.slice(
    detected_masks_tensor, begin=[0, 0, 0], size=[num_detections_tensor, -1, -1])
  detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
    detected_masks_tensor, detected_boxes_tensor, image_shape[1], image_shape[2])
  detection_masks_reframed = tf.cast(
    tf.greater(detection_masks_reframed, 0.5), tf.uint8)

  # For some unknown reason directly feeding in detection_masks_reframed makes tensorflow stuck...
  ph = tf.placeholder(dtype=tf.uint8, shape=[None, None, None])
  detection_masks_encoded = tf.map_fn(lambda x: tf.image.encode_png(x), tf.expand_dims(ph, axis=-1), dtype=tf.string)

  return detected_boxes_tensor, detected_scores_tensor, detected_labels_tensor, detection_masks_reframed, detection_masks_encoded, ph
예제 #29
def execute_inference_for_image(image, graph):
    """Call the tensors to process the images using the graph.

       This method may be called concurrently from multiple threads.

         image: The image to pass to the tensor
         graph:  The model to process the image.
    # Create a default context manager
    with graph.as_default():
        # Create a new Tensor Flow Session from the generated graph
        with tf.Session() as sess:
            # Get operations from the input graph.
            ops = tf.get_default_graph().get_operations()
            # Get the names of the tensors
            all_tensor_names = {
                for op in ops for output in op.outputs
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
                tensor_name = key + ':0'
                # If any of the built-in visualization tensors exist in the operations of the graph
                if tensor_name in all_tensor_names:
                    # Add it to our dictionary
                    tensor_dict[key] = tf.get_default_graph(
            # If a detection_masks tensor exists in the dictionary
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0],
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            # Get the image tensor, and run the inference on it.
            image_tensor = tf.get_default_graph().get_tensor_by_name(

            # Run inference
            output_dict = sess.run(
                feed_dict={image_tensor: np.expand_dims(image, 0)})

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(
            output_dict['detection_classes'] = output_dict[
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[
    return output_dict
예제 #30
def run_inference_for_single_image(image, graph, evaluator):
    with graph.as_default():
        with tf.Session() as sess:
            configs = config_util.get_configs_from_pipeline_file(

            def get_next(config):
                return dataset_util.make_initializable_iterator(

            create_input_dict_fn = functools.partial(
                get_next, configs['eval_input_config'])

            eval_config = configs['eval_config']
            model_fn = functools.partial(model_builder.build,
            model = model_fn()
            tensor_dict = evtor._extract_prediction_tensors(

            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                for op in ops for output in op.outputs
            #tensor_dict = {}
            # for key in [
            #     'num_detections', 'detection_boxes', 'detection_scores',
            #     'detection_classes', 'detection_masks'
            # ]:
            #   tensor_name = key + ':0'
            #   if tensor_name in all_tensor_names:
            #     tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(
            #         tensor_name)
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0],
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
            # Run inference
            output_dict = sess.run(
                feed_dict={image_tensor: np.expand_dims(image, 0)})

            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(
            output_dict['detection_classes'] = output_dict[
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[

                image_id=1, groundtruth_dict=output_dict)
                image_id=1, detections_dict=output_dict)
            metrics = evaluator.evaluate()
    return output_dict
예제 #31
def infer(path_list,
    ''' Use your exported model to infer on a path list of images. 

                path_list: A list of images paths to infer on.
                exported_model_dir: The path used to saved your model.
                label_mapt_path: The path to your label_map file.
                results_dir: The directory where you want to save your infered images.

                disp: Set to false if you are not in an interactive python environment. Will display image in the environment if set to True.
                num_infer: The number of images you want to infer on. 
                min_score_tresh: The minimal confidence treshold to keep the detection.

    saved_model_path = os.path.join(exported_model_dir, "saved_model")
    predict_fn = tf.contrib.predictor.from_saved_model(saved_model_path)
    path_list = path_list[:num_infer]
    with tf.Session() as sess:
        category_index = label_map_util.create_category_index_from_labelmap(
        for img_path in path_list:
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img_tensor = np.expand_dims(img, 0)
            output_dict = predict_fn({"inputs": img_tensor})

            num_detections = int(output_dict.pop('num_detections'))
            output_dict = {
                key: value[0, :num_detections]
                for key, value in output_dict.items()
            output_dict['num_detections'] = num_detections
            output_dict['detection_classes'] = output_dict[

            if 'detection_masks' in output_dict:
                # Reframe the the bbox mask to the image size.
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    output_dict['detection_boxes'], img.shape[0], img.shape[1])
                detection_masks_reframed = tf.cast(
                    detection_masks_reframed > 0.5, tf.uint8)
                mask_refr = sess.run(detection_masks_reframed)
                output_dict['detection_masks_reframed'] = mask_refr
            masks = output_dict.get('detection_masks_reframed', None)
            boxes = output_dict["detection_boxes"]
            classes = output_dict["detection_classes"]
            scores = output_dict["detection_scores"]

            b = []
            c = []
            s = []
            m = []
            k = 0
            for classe in classes:
                if masks is not None:
                k += 1
            boxes = np.array(b)
            classes = np.array(c)
            scores = np.array(s)
            if masks is not None:
                masks = np.array(m)


            img_name = img_path.split("/")[-1]
            Image.fromarray(img).save(os.path.join(results_dir, img_name))

            if disp == True:
def run_inference_for_single_image(image, graph):
    with graph.as_default():
        with tf.Session() as sess:
            results = []
            labels = []
            # Get handles to input and output tensors
            ops = tf.get_default_graph().get_operations()
            all_tensor_names = {
                for op in ops for output in op.outputs
            tensor_dict = {}
            for key in [
                    'num_detections', 'detection_boxes', 'detection_scores',
                    'detection_classes', 'detection_masks'
                tensor_name = key + ':0'
                if tensor_name in all_tensor_names:
                    tensor_dict[key] = tf.get_default_graph(
            if 'detection_masks' in tensor_dict:
                # The following processing is only for single image
                detection_boxes = tf.squeeze(tensor_dict['detection_boxes'],
                detection_masks = tf.squeeze(tensor_dict['detection_masks'],
                # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
                real_num_detection = tf.cast(tensor_dict['num_detections'][0],
                detection_boxes = tf.slice(detection_boxes, [0, 0],
                                           [real_num_detection, -1])
                detection_masks = tf.slice(detection_masks, [0, 0, 0],
                                           [real_num_detection, -1, -1])
                detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                    detection_masks, detection_boxes, image.shape[0],
                detection_masks_reframed = tf.cast(
                    tf.greater(detection_masks_reframed, 0.5), tf.uint8)
                # Follow the convention by adding back the batch dimension
                tensor_dict['detection_masks'] = tf.expand_dims(
                    detection_masks_reframed, 0)
            image_tensor = tf.get_default_graph().get_tensor_by_name(
            image = Image.open(image).convert('RGB')
            # the array based representation of the image will be used later in order to prepare the
            # result image with boxes and labels on it.
            image_np = load_image_into_numpy_array(image)
            # Expand dimensions since the model expects images to have shape: [1, None, None, 3]
            image_np_expanded = np.expand_dims(image_np, axis=0)
            # Run inference
            start = time.time()
            output_dict = sess.run(
                feed_dict={image_tensor: np.expand_dims(image, 0)})
            end = time.time()
            print("Inference Time: %s" % str(end - start))
            # all outputs are float32 numpy arrays, so convert types as appropriate
            output_dict['num_detections'] = int(
            output_dict['detection_classes'] = output_dict[
            output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
            output_dict['detection_scores'] = output_dict['detection_scores'][
            if 'detection_masks' in output_dict:
                output_dict['detection_masks'] = output_dict[
            for i in range(output_dict['num_detections']):
                label = category_index[output_dict['detection_classes']
                score = output_dict['detection_scores'][i]
                if label not in labels:
                    results.append({'label_text': label, 'score': str(score)})
                '%d objects found in the given image, plotting the image... ' %
            print([result for result in results])
            print('close the image to continue')
            return output_dict
예제 #33
def _resize_detection_masks(args):
    detection_boxes, detection_masks, image_shape = args
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_shape[0], image_shape[1])
    return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
예제 #34
def run_inference_for_single_image(image, sess, graph, class_id=None):
    """Feed forward an image into the object detection model.
        image (ndarray): Input image in numpy format (OpenCV format).
        sess: TF session.
        graph: Object detection model loaded before.
        class_id (list): Optional. Id's of the classes you want to detect. 
            Refer to mscoco_label_map.pbtxt' to find out more.
        output_dict (dict): Contains the info related to the detections.
    # Get handles to input and output tensors
    ops = tf.get_default_graph().get_operations()
    all_tensor_names = {output.name for op in ops for output in op.outputs}
    tensor_dict = {}
    for key in ['num_detections', 'detection_boxes', 'detection_scores', 
                'detection_classes', 'detection_masks']:
        tensor_name = key + ':0'
        if tensor_name in all_tensor_names:
            tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name)
    if 'detection_masks' in tensor_dict:
        # The following processing is only for single image
        detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0])
        detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0])
        # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size.
        real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32)
        detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1])
        detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1])
        detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(
                detection_masks, detection_boxes, image.shape[0], image.shape[1])
        detection_masks_reframed = tf.cast(
                tf.greater(detection_masks_reframed, 0.5), tf.uint8)
        # Follow the convention by adding back the batch dimension
        tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)
    image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0')

    # Run inference
    output_dict = sess.run(tensor_dict,
                           feed_dict={image_tensor: np.expand_dims(image, 0)})
    # All outputs are float32 numpy arrays, so convert types as appropriate
    output_dict['num_detections'] = int(output_dict['num_detections'][0])
    output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8)
    output_dict['detection_boxes'] = output_dict['detection_boxes'][0]
    output_dict['detection_scores'] = output_dict['detection_scores'][0]
    if 'detection_masks' in output_dict:
        output_dict['detection_masks'] = output_dict['detection_masks'][0].astype(np.float32)
    if class_id is not None:
        discrimine_class(class_id, output_dict)
    #print("{}, {}, {}".format(output_dict['num_detections'], output_dict['detection_scores'], output_dict['detection_boxes'][0]))
    return output_dict
예제 #35
def result_dict_for_single_example(image,
  """Merges all detection and groundtruth information for a single example.

  Note that evaluation tools require classes that are 1-indexed, and so this
  function performs the offset. If `class_agnostic` is True, all output classes
  have label 1.

    image: A single 4D uint8 image tensor of shape [1, H, W, C].
    key: A single string tensor identifying the image.
    detections: A dictionary of detections, returned from
    groundtruth: (Optional) Dictionary of groundtruth items, with fields:
      'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
        normalized coordinates.
      'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
      'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
      'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
      'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
      'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
      'groundtruth_instance_masks': 3D int64 tensor of instance masks
    class_agnostic: Boolean indicating whether the detections are class-agnostic
      (i.e. binary). Default False.
    scale_to_absolute: Boolean indicating whether boxes and keypoints should be
      scaled to absolute coordinates. Note that for IoU based evaluations, it
      does not matter whether boxes are expressed in absolute or relative
      coordinates. Default False.

    A dictionary with:
    'original_image': A [1, H, W, C] uint8 image tensor.
    'key': A string tensor with image identifier.
    'detection_boxes': [max_detections, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
    'detection_scores': [max_detections] float32 tensor of scores.
    'detection_classes': [max_detections] int64 tensor of 1-indexed classes.
    'detection_masks': [max_detections, H, W] float32 tensor of binarized
      masks, reframed to full image masks.
    'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in
      normalized or absolute coordinates, depending on the value of
      `scale_to_absolute`. (Optional)
    'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes.
    'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional)
    'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional)
    'groundtruth_difficult': [num_boxes] int64 tensor. (Optional)
    'groundtruth_group_of': [num_boxes] int64 tensor. (Optional)
    'groundtruth_instance_masks': 3D int64 tensor of instance masks

  label_id_offset = 1  # Applying label id offset (b/63711816)

  input_data_fields = fields.InputDataFields
  output_dict = {
      input_data_fields.original_image: image,
      input_data_fields.key: key,

  detection_fields = fields.DetectionResultFields
  detection_boxes = detections[detection_fields.detection_boxes][0]
  image_shape = tf.shape(image)
  detection_scores = detections[detection_fields.detection_scores][0]

  if class_agnostic:
    detection_classes = tf.ones_like(detection_scores, dtype=tf.int64)
    detection_classes = (
        tf.to_int64(detections[detection_fields.detection_classes][0]) +

  num_detections = tf.to_int32(detections[detection_fields.num_detections][0])
  detection_boxes = tf.slice(
      detection_boxes, begin=[0, 0], size=[num_detections, -1])
  detection_classes = tf.slice(
      detection_classes, begin=[0], size=[num_detections])
  detection_scores = tf.slice(
      detection_scores, begin=[0], size=[num_detections])

  if scale_to_absolute:
    absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
        box_list.BoxList(detection_boxes), image_shape[1], image_shape[2])
    output_dict[detection_fields.detection_boxes] = (
    output_dict[detection_fields.detection_boxes] = detection_boxes
  output_dict[detection_fields.detection_classes] = detection_classes
  output_dict[detection_fields.detection_scores] = detection_scores

  if detection_fields.detection_masks in detections:
    detection_masks = detections[detection_fields.detection_masks][0]
    # TODO(rathodv): This should be done in model's postprocess
    # function ideally.
    detection_masks = tf.slice(
        detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1])
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        detection_masks, detection_boxes, image_shape[1], image_shape[2])
    detection_masks_reframed = tf.cast(
        tf.greater(detection_masks_reframed, 0.5), tf.uint8)
    output_dict[detection_fields.detection_masks] = detection_masks_reframed
  if detection_fields.detection_keypoints in detections:
    detection_keypoints = detections[detection_fields.detection_keypoints][0]
    output_dict[detection_fields.detection_keypoints] = detection_keypoints
    if scale_to_absolute:
      absolute_detection_keypoints = keypoint_ops.scale(
          detection_keypoints, image_shape[1], image_shape[2])
      output_dict[detection_fields.detection_keypoints] = (

  if groundtruth:
    if input_data_fields.groundtruth_instance_masks in groundtruth:
      groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast(
          groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8)
    if scale_to_absolute:
      groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes]
      absolute_gt_boxlist = box_list_ops.to_absolute_coordinates(
          box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2])
      output_dict[input_data_fields.groundtruth_boxes] = (
    # For class-agnostic models, groundtruth classes all become 1.
    if class_agnostic:
      groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes]
      groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64)
      output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes

  return output_dict
예제 #36
def _resize_detection_masks(args):
  detection_boxes, detection_masks, image_shape = args
  detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
      detection_masks, detection_boxes, image_shape[0], image_shape[1])
  return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
예제 #37
def _extract_prediction_tensors(model,
  """Restores the model in a tensorflow session.

    model: model to perform predictions with.
    create_input_dict_fn: function to create input tensor dictionaries.
    ignore_groundtruth: whether groundtruth should be ignored.

    tensor_dict: A tensor dictionary with evaluations.
  input_dict = create_input_dict_fn()
  prefetch_queue = prefetcher.prefetch(input_dict, capacity=500)
  input_dict = prefetch_queue.dequeue()
  original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0)
  preprocessed_image = model.preprocess(tf.to_float(original_image))
  prediction_dict = model.predict(preprocessed_image)
  detections = model.postprocess(prediction_dict)

  original_image_shape = tf.shape(original_image)
  absolute_detection_boxlist = box_list_ops.to_absolute_coordinates(
      box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)),
      original_image_shape[1], original_image_shape[2])
  label_id_offset = 1
  tensor_dict = {
      'original_image': original_image,
      'image_id': input_dict[fields.InputDataFields.source_id],
      'detection_boxes': absolute_detection_boxlist.get(),
      'detection_scores': tf.squeeze(detections['detection_scores'], axis=0),
      'detection_classes': (
          tf.squeeze(detections['detection_classes'], axis=0) +
  if 'detection_masks' in detections:
    detection_masks = tf.squeeze(detections['detection_masks'],
    detection_boxes = tf.squeeze(detections['detection_boxes'],
    # TODO: This should be done in model's postprocess function ideally.
    detection_masks_reframed = ops.reframe_box_masks_to_image_masks(
        original_image_shape[1], original_image_shape[2])
    detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed,

    tensor_dict['detection_masks'] = detection_masks_reframed
  # load groundtruth fields into tensor_dict
  if not ignore_groundtruth:
    normalized_gt_boxlist = box_list.BoxList(
    gt_boxlist = box_list_ops.scale(normalized_gt_boxlist,
    groundtruth_boxes = gt_boxlist.get()
    groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes]
    tensor_dict['groundtruth_boxes'] = groundtruth_boxes
    tensor_dict['groundtruth_classes'] = groundtruth_classes
    tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area]
    tensor_dict['is_crowd'] = input_dict[
    tensor_dict['difficult'] = input_dict[
    if 'detection_masks' in tensor_dict:
      tensor_dict['groundtruth_instance_masks'] = input_dict[
  return tensor_dict