def testZeroImageOnEmptyMask(self): box_masks = tf.constant([[[0, 0], [0, 0]]], dtype=tf.float32) boxes = tf.constant([[0.0, 0.0, 1.0, 1.0]], dtype=tf.float32) image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes, image_height=4, image_width=4) np_expected_image_masks = np.array([[[0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0], [0, 0, 0, 0]]], dtype=np.float32) with self.test_session() as sess: np_image_masks = sess.run(image_masks) self.assertAllClose(np_image_masks, np_expected_image_masks)
def testMaskIsCenteredInImageWhenBoxIsCentered(self): box_masks = tf.constant([[[1, 1], [1, 1]]], dtype=tf.float32) boxes = tf.constant([[0.25, 0.25, 0.75, 0.75]], dtype=tf.float32) image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes, image_height=4, image_width=4) np_expected_image_masks = np.array([[[0, 0, 0, 0], [0, 1, 1, 0], [0, 1, 1, 0], [0, 0, 0, 0]]], dtype=np.float32) with self.test_session() as sess: np_image_masks = sess.run(image_masks) self.assertAllClose(np_image_masks, np_expected_image_masks)
def testMaskOffCenterRemainsOffCenterInImage(self): box_masks = tf.constant([[[1, 0], [0, 1]]], dtype=tf.float32) boxes = tf.constant([[0.25, 0.5, 0.75, 1.0]], dtype=tf.float32) image_masks = ops.reframe_box_masks_to_image_masks(box_masks, boxes, image_height=4, image_width=4) np_expected_image_masks = np.array([[[0, 0, 0, 0], [0, 0, 0.6111111, 0.16666669], [0, 0, 0.3888889, 0.83333337], [0, 0, 0, 0]]], dtype=np.float32) with self.test_session() as sess: np_image_masks = sess.run(image_masks) self.assertAllClose(np_image_masks, np_expected_image_masks)
def run_inference_for_single_image(image, graph): with graph.as_default(): ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][ 0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict
def run_inference_for_single_image(model, image): """Runs detection on a single image Args: model (model): Model image (byte): Numpy image array Returns: dict: output_dict with labels/confidence """ image = np.asarray(image) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. input_tensor = tf.convert_to_tensor(image) # The model expects a batch of images, so add an axis with `tf.newaxis`. input_tensor = input_tensor[tf.newaxis, ...] # Run inference output_dict = model(input_tensor) # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict['detection_classes'].astype( np.int64) # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict[ 'detection_masks_reframed'] = detection_masks_reframed.numpy() return output_dict
def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict
def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: # input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # individual image processing detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Batch Dim tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Processing inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image}) # numpy arrays output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.int64) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict
def expand_this_graph_by_process_detection_masks(tensor_dict, image_H, image_W): # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_H, image_W) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0)
def detection(model): # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. while True: ret, image_np = cap.read() input_tensor = tf.convert_to_tensor(image_np) input_tensor = input_tensor[tf.newaxis, ...] # Run inference model_fn = model.signatures['serving_default'] output_dict = model_fn(input_tensor) # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict[ 'detection_classes'].astype(np.int64) # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict[ 'detection_masks_reframed'] = detection_masks_reframed.numpy() vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks_reframed', None), use_normalized_coordinates=True, line_thickness=8) cv2.imshow('object detection', cv2.resize(image_np, (800, 600))) if cv2.waitKey(25) & 0xFF == ord('q'): break
def run_inference_for_single_image(model, image): image = np.asarray(image) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. input_tensor = tf.convert_to_tensor(image) # The model expects a batch of images, so add an axis with `tf.newaxis`. input_tensor = input_tensor[tf.newaxis, ...] # Run inference # start = time.time_ns() start = time.time() output_dict = model(input_tensor) # end = time.time_ns() end = time.time() # print("Inference time: {}".format((end-start)/(10**9))) print("Inference time: {} s".format(end - start)) # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict['detection_classes'].astype( np.int64) # return output_dict # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict[ 'detection_masks_reframed'] = detection_masks_reframed.numpy() return output_dict
def get_output_dict(image, detection_graph, sess): with detection_graph.as_default(): ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. # real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) real_num_detection = tf.cast(100, tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_scores = tf.slice(tensor_dict['detection_scores'], [0, 0], [-1, real_num_detection]) detection_classes = tf.slice(tensor_dict['detection_classes'], [0, 0], [-1, real_num_detection]) tensor_dict['detection_scores'] = detection_scores tensor_dict['detection_classes'] = detection_classes detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: image}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict
def run_inference_for_single_image(model, image): image = np.asarray(image) input_tensor = tf.convert_to_tensor(image) input_tensor = input_tensor[tf.newaxis,...] output_dict = model(input_tensor) num_detections = int(output_dict.pop('num_detections')) output_dict = {key:value[0, :num_detections].numpy() for key,value in output_dict.items()} output_dict['num_detections'] = num_detections output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64) if 'detection_masks' in output_dict: detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy() return output_dict
def run_inference_for_single_image(self, image): image = np.asarray(image) input_tensor = tf.convert_to_tensor(image) input_tensor = input_tensor[tf.newaxis, ...] # Run inference model_fn = self.model.signatures['serving_default'] output_dict = model_fn(input_tensor) # All outputs are batches tensors. num_detections = int(output_dict.pop('num_detections')) output_dict = {key: value[0, :num_detections].numpy() for key, value in output_dict.items()} output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.int64) # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict['detection_masks_reframed'] = detection_masks_reframed.numpy() # We only want to detect cats. box = np.asarray(output_dict['detection_boxes']) cls = np.asarray(output_dict['detection_classes']).astype(np.int64) scr = np.asarray(output_dict['detection_scores']) boxes = [] for i in range(1, num_detections): if cls[i] == 17: boxes.append(box[i]) output_dict['detection_boxes'] = np.array(boxes) bl = (cls == 17) output_dict['detection_classes'] = np.extract(bl, cls) output_dict['detection_scores'] = np.extract(bl, scr) return output_dict
def load_tensors(self, image_shape): with self.graph.as_default(): # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[0], image_shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') self.tensor_dict = tensor_dict self.image_tensor = image_tensor
def detection(image, sess): ops = detection_graph.get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks']: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = detection_graph.get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks(detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8) tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0) return tensor_dict
def run_inference_for_single_image(image, graph): if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][ 0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] #print("Top (min 0.0): " + str(output_dict['detection_boxes'][0][0])) #print("Bot (max 1): " + str(output_dict['detection_boxes'][0][2])) #print("Left Bar: " + str(output_dict['detection_boxes'][0][1])) #print("Right Bar: " + str(output_dict['detection_boxes'][0][3])) output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict
def inference_image(self, image): # image = np.asarray(image) # 입력은 텐서여야하며 'tf.convert_to_tensor'를 사용하여 변환 input_tensor = tf.convert_to_tensor(image) # 모델은 이미지 배치를 예상하므로 tf.newaxis로 축을 추가 input_tensor = input_tensor[tf.newaxis, ...] # 추론 실행 model_fn = self.model.signatures['serving_default'] output_dict = model_fn(input_tensor) # 모든 출력은 배치 텐서 # numpy 배열로 변환하고 인덱스 [0]을 사용하여 배치 차원을 제거 # 우리는 처음 num_detections에만 관심이 있음 num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict[ 'detection_classes'].astype(np.int64) # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict[ 'detection_masks_reframed'] = detection_masks_reframed.numpy() return output_dict
def run_inference(images, graph): with graph.as_default(): with tf.Session() as sess: # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: for i in range(len(images)): detection_boxes = tensor_dict['detection_boxes'][i] detection_masks = tensor_dict['detection_masks'][i] # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][i], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, images.shape[1], images.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) tensor_dict['detection_masks'+str(i)] = detection_masks_reframed image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: images}) output_dict['num_detections'] = output_dict['num_detections'].astype(np.int64) output_dict['detection_classes'] = output_dict['detection_classes'].astype(np.uint8) return output_dict
def predict(self, input_image, size=None, threshold=0.5, top_k=10): img_arr, img_width, img_height, scale_factor = self._process_input_image( input_image, size) output_dict = self.output_dict(img_arr) boxes = output_dict["detection_boxes"] scores = output_dict["detection_scores"] classes = output_dict["detection_classes"].astype(np.int64) num_detections = output_dict['num_detections'] masks = [None for _ in range(num_detections)] predictions = [] if 'detection_masks' in output_dict: # get masks masks = output_dict["detection_masks"] # adjust mask coordinates based on the images dimensions masks = utils_ops.reframe_box_masks_to_image_masks( masks, boxes, img_height, img_width) # check eager execution mode if tf.executing_eagerly(): masks = tf.cast(masks > threshold, tf.uint8).numpy() else: masks_tensor = tf.cast(masks > threshold, tf.uint8) masks = masks_tensor.eval(session=tf.Session()) for box, mask, score, label in zip(boxes, masks, scores, classes): if score >= threshold: if self._labels_map_dict and label in self._labels_map_dict: label = self._labels_map_dict[label]["name"] start_y, start_x, end_y, end_x = box start_x = int(start_x * img_width) start_y = int(start_y * img_height) end_x = int(end_x * img_width) end_y = int(end_y * img_height) predictions.append( BoundingBox(start_x, start_y, end_x, end_y, label, round(float(score), 2), scale_factor, mask)) if len(predictions) > 0 and len(predictions) > top_k: predictions = predictions[:top_k] return img_arr, predictions
def detect_objects_in_image(image_np, detect_fn): print('================================') input_tensor = tf.convert_to_tensor(np.expand_dims(image_np, 0), dtype=tf.float32) detections = detect_fn(input_tensor) # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. num_detections = int(detections.pop('num_detections')) detections = dict(itertools.islice(detections.items(), num_detections)) detections['num_detections'] = num_detections # Handle models with masks: if "detection_masks" in detections: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detections["detection_masks"][0], detections["detection_boxes"][0], image_np.shape[0], image_np.shape[1]) detections['detection_masks_reframed'] = tf.cast(detection_masks_reframed > 0.5, tf.uint8) return detections
def run_inference_for_single_image(model, image): image = np.asarray(image) # The input needs to be a tensor, convert it using `tf.convert_to_tensor`. input_tensor = tf.convert_to_tensor(image) # The model expects a batch of images, so add an axis with `tf.newaxis`. input_tensor = input_tensor[tf.newaxis, ...] tensorboard_callback = keras.callbacks.TensorBoard( log_dir='object_detection/log') # Run inference output_dict = model(input_tensor, callback=[tensorboard_callback]) # All outputs are batches tensors. # Convert to numpy arrays, and take index [0] to remove the batch dimension. # We're only interested in the first num_detections. num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict['detection_classes'].astype( np.int64) # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict[ 'detection_masks_reframed'] = detection_masks_reframed.numpy() return output_dict
def run_inference_for_single_image(self, image): if 'detection_masks' in self.tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(self.tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(self.tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(self.tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension self.tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) # Run inference output_dict = self.sess.run( self.tensor_dict, feed_dict={self.image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][ 0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0] return output_dict
def run_inference_for_single_image(model, image): image = np.asarray(image) # The input needs to be a tensor input_tensor = tf.convert_to_tensor(image) # The model expects a batch of images, so adding a useless axis input_tensor = input_tensor[tf.newaxis, ...] # Run inference model_fn = model.signatures['serving_default'] output_dict = model_fn(input_tensor) # All outputs are batches tensors. # Convert to numpy arrays, and taking index [0] to remove the batch dimension num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections].numpy() for key, value in output_dict.items() } output_dict['num_detections'] = num_detections # detection_classes should be ints. output_dict['detection_classes'] = output_dict['detection_classes'].astype( np.int64) # Handle models with masks: if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast(detection_masks_reframed > 0.5, tf.uint8) output_dict[ 'detection_masks_reframed'] = detection_masks_reframed.numpy() return output_dict
tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, IMAGE_SIZE[0], IMAGE_SIZE[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') @app.route("/") def helloworld(): return '<h1>Hello World!</h1>' # 图片上传
def result_dict_for_single_example(image, key, detections, groundtruth=None, class_agnostic=False, scale_to_absolute=False): """Merges all detection and groundtruth information for a single example. Note that evaluation tools require classes that are 1-indexed, and so this function performs the offset. If `class_agnostic` is True, all output classes have label 1. Args: image: A single 4D uint8 image tensor of shape [1, H, W, C]. key: A single string tensor identifying the image. detections: A dictionary of detections, returned from DetectionModel.postprocess(). groundtruth: (Optional) Dictionary of groundtruth items, with fields: 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized coordinates. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). class_agnostic: Boolean indicating whether the detections are class-agnostic (i.e. binary). Default False. scale_to_absolute: Boolean indicating whether boxes and keypoints should be scaled to absolute coordinates. Note that for IoU based evaluations, it does not matter whether boxes are expressed in absolute or relative coordinates. Default False. Returns: A dictionary with: 'original_image': A [1, H, W, C] uint8 image tensor. 'key': A string tensor with image identifier. 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. 'detection_scores': [max_detections] float32 tensor of scores. 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. 'detection_masks': [max_detections, H, W] float32 tensor of binarized masks, reframed to full image masks. 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. (Optional) 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. (Optional) 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). """ label_id_offset = 1 # Applying label id offset (b/63711816) input_data_fields = fields.InputDataFields output_dict = { input_data_fields.original_image: image, input_data_fields.key: key, } detection_fields = fields.DetectionResultFields detection_boxes = detections[detection_fields.detection_boxes][0] image_shape = tf.shape(image) detection_scores = detections[detection_fields.detection_scores][0] if class_agnostic: detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) else: detection_classes = ( tf.to_int64(detections[detection_fields.detection_classes][0]) + label_id_offset) num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) detection_boxes = tf.slice( detection_boxes, begin=[0, 0], size=[num_detections, -1]) detection_classes = tf.slice( detection_classes, begin=[0], size=[num_detections]) detection_scores = tf.slice( detection_scores, begin=[0], size=[num_detections]) if scale_to_absolute: absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) output_dict[detection_fields.detection_boxes] = ( absolute_detection_boxlist.get()) else: output_dict[detection_fields.detection_boxes] = detection_boxes output_dict[detection_fields.detection_classes] = detection_classes output_dict[detection_fields.detection_scores] = detection_scores if detection_fields.detection_masks in detections: detection_masks = detections[detection_fields.detection_masks][0] # TODO(rathodv): This should be done in model's postprocess # function ideally. detection_masks = tf.slice( detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[1], image_shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) output_dict[detection_fields.detection_masks] = detection_masks_reframed if detection_fields.detection_keypoints in detections: detection_keypoints = detections[detection_fields.detection_keypoints][0] output_dict[detection_fields.detection_keypoints] = detection_keypoints if scale_to_absolute: absolute_detection_keypoints = keypoint_ops.scale( detection_keypoints, image_shape[1], image_shape[2]) output_dict[detection_fields.detection_keypoints] = ( absolute_detection_keypoints) if groundtruth: if input_data_fields.groundtruth_instance_masks in groundtruth: groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) output_dict.update(groundtruth) if scale_to_absolute: groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) output_dict[input_data_fields.groundtruth_boxes] = ( absolute_gt_boxlist.get()) # For class-agnostic models, groundtruth classes all become 1. if class_agnostic: groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes return output_dict
tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks']: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[1], image.shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') cap = cv2.VideoCapture('/share/jproject/sakkas/smoke-detection/videos/FAST FORWARD 102nd Running of the Indianapolis 500.mp4') total_time = 0 capture_time = 0 preproc_time = 0 infer_time = 0 postproc_time = 0
for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( tensor_name) if 'detection_masks' in tensor_dict: detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, height, width) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') while True: ret, image_np = cap.read() if ret == False: continue frame = frame + 1 print("frame", frame) image_np_expanded = np.expand_dims(image_np, axis=0)
def build_inference_graph(image_tensor, inference_graph_path, override_num_detections=None): """Loads the inference graph and connects it to the input image. Args: image_tensor: The input image. uint8 tensor, shape=[1, None, None, 3] inference_graph_path: Path to the inference graph with embedded weights Returns: detected_boxes_tensor: Detected boxes. Float tensor, shape=[num_detections, 4] detected_scores_tensor: Detected scores. Float tensor, shape=[num_detections] detected_labels_tensor: Detected labels. Int64 tensor, shape=[num_detections] """ with tf.gfile.Open(inference_graph_path, 'r') as graph_def_file: graph_content = graph_def_file.read() graph_def = tf.GraphDef() graph_def.MergeFromString(graph_content) tf.import_graph_def( graph_def, name='', input_map={'image_tensor': image_tensor}) g = tf.get_default_graph() if override_num_detections is not None: num_detections_tensor = tf.cast(override_num_detections, tf.int32) else: num_detections_tensor = tf.squeeze( g.get_tensor_by_name('num_detections:0'), 0) num_detections_tensor = tf.cast(num_detections_tensor, tf.int32) detected_boxes_tensor = tf.squeeze( g.get_tensor_by_name('detection_boxes:0'), 0) detected_boxes_tensor = detected_boxes_tensor[:num_detections_tensor] detected_scores_tensor = tf.squeeze( g.get_tensor_by_name('detection_scores:0'), 0) detected_scores_tensor = detected_scores_tensor[:num_detections_tensor] detected_labels_tensor = tf.squeeze( g.get_tensor_by_name('detection_classes:0'), 0) detected_labels_tensor = tf.cast(detected_labels_tensor, tf.int64) detected_labels_tensor = detected_labels_tensor[:num_detections_tensor] detected_masks_tensor = tf.squeeze( g.get_tensor_by_name('detection_masks:0'), 0) # detected_masks_tensor = tf.cast(detected_masks_tensor, tf.int32) image_shape = tf.shape(image_tensor) detected_masks_tensor = tf.slice( detected_masks_tensor, begin=[0, 0, 0], size=[num_detections_tensor, -1, -1]) detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detected_masks_tensor, detected_boxes_tensor, image_shape[1], image_shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # For some unknown reason directly feeding in detection_masks_reframed makes tensorflow stuck... ph = tf.placeholder(dtype=tf.uint8, shape=[None, None, None]) detection_masks_encoded = tf.map_fn(lambda x: tf.image.encode_png(x), tf.expand_dims(ph, axis=-1), dtype=tf.string) return detected_boxes_tensor, detected_scores_tensor, detected_labels_tensor, detection_masks_reframed, detection_masks_encoded, ph
def execute_inference_for_image(image, graph): """Call the tensors to process the images using the graph. This method may be called concurrently from multiple threads. Args: image: The image to pass to the tensor graph: The model to process the image. """ # Create a default context manager with graph.as_default(): # Create a new Tensor Flow Session from the generated graph with tf.Session() as sess: # Get operations from the input graph. ops = tf.get_default_graph().get_operations() # Get the names of the tensors all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' # If any of the built-in visualization tensors exist in the operations of the graph if tensor_name in all_tensor_names: # Add it to our dictionary tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) # If a detection_masks tensor exists in the dictionary if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) # Get the image tensor, and run the inference on it. image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][ 0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] return output_dict
def run_inference_for_single_image(image, graph, evaluator): with graph.as_default(): with tf.Session() as sess: configs = config_util.get_configs_from_pipeline_file( PIPELINE_CONFIG_PATH) def get_next(config): return dataset_util.make_initializable_iterator( dataset_builder.build(config)).get_next() create_input_dict_fn = functools.partial( get_next, configs['eval_input_config']) eval_config = configs['eval_config'] model_fn = functools.partial(model_builder.build, model_config=configs['model'], is_training=False) model = model_fn() tensor_dict = evtor._extract_prediction_tensors( model=model, create_input_dict_fn=create_input_dict_fn, ignore_groundtruth=eval_config.ignore_groundtruth) # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } #tensor_dict = {} # for key in [ # 'num_detections', 'detection_boxes', 'detection_scores', # 'detection_classes', 'detection_masks' # ]: # tensor_name = key + ':0' # if tensor_name in all_tensor_names: # tensor_dict[key] = tf.get_default_graph().get_tensor_by_name( # tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') # Run inference output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][ 0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] evaluator.add_single_ground_truth_image_info( image_id=1, groundtruth_dict=output_dict) evaluator.add_single_detected_image_info( image_id=1, detections_dict=output_dict) metrics = evaluator.evaluate() evaluator.clear() return output_dict
def infer(path_list, exported_model_dir, label_map_path, results_dir, disp=True, num_infer=5, min_score_thresh=0.7): ''' Use your exported model to infer on a path list of images. Args: Required: path_list: A list of images paths to infer on. exported_model_dir: The path used to saved your model. label_mapt_path: The path to your label_map file. results_dir: The directory where you want to save your infered images. Optional: disp: Set to false if you are not in an interactive python environment. Will display image in the environment if set to True. num_infer: The number of images you want to infer on. min_score_tresh: The minimal confidence treshold to keep the detection. ''' saved_model_path = os.path.join(exported_model_dir, "saved_model") predict_fn = tf.contrib.predictor.from_saved_model(saved_model_path) random.shuffle(path_list) path_list = path_list[:num_infer] with tf.Session() as sess: category_index = label_map_util.create_category_index_from_labelmap( label_map_path) for img_path in path_list: img = cv2.imread(img_path) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img_tensor = np.expand_dims(img, 0) output_dict = predict_fn({"inputs": img_tensor}) num_detections = int(output_dict.pop('num_detections')) output_dict = { key: value[0, :num_detections] for key, value in output_dict.items() } output_dict['num_detections'] = num_detections output_dict['detection_classes'] = output_dict[ 'detection_classes'].astype(np.int64) if 'detection_masks' in output_dict: # Reframe the the bbox mask to the image size. detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( output_dict['detection_masks'], output_dict['detection_boxes'], img.shape[0], img.shape[1]) detection_masks_reframed = tf.cast( detection_masks_reframed > 0.5, tf.uint8) mask_refr = sess.run(detection_masks_reframed) output_dict['detection_masks_reframed'] = mask_refr masks = output_dict.get('detection_masks_reframed', None) boxes = output_dict["detection_boxes"] classes = output_dict["detection_classes"] scores = output_dict["detection_scores"] b = [] c = [] s = [] m = [] k = 0 for classe in classes: b.append(boxes[k]) c.append(classe) s.append(scores[k]) if masks is not None: m.append(masks[k]) k += 1 boxes = np.array(b) classes = np.array(c) scores = np.array(s) if masks is not None: masks = np.array(m) vis_util.visualize_boxes_and_labels_on_image_array( img, boxes, classes, scores, category_index, instance_masks=masks, use_normalized_coordinates=True, line_thickness=3, min_score_thresh=min_score_thresh, max_boxes_to_draw=None) img_name = img_path.split("/")[-1] Image.fromarray(img).save(os.path.join(results_dir, img_name)) if disp == True: display(Image.fromarray(img))
def run_inference_for_single_image(image, graph): with graph.as_default(): with tf.Session() as sess: results = [] labels = [] # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = { output.name for op in ops for output in op.outputs } tensor_dict = {} for key in [ 'num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks' ]: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph( ).get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims( detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name( 'image_tensor:0') image = Image.open(image).convert('RGB') # the array based representation of the image will be used later in order to prepare the # result image with boxes and labels on it. image_np = load_image_into_numpy_array(image) # Expand dimensions since the model expects images to have shape: [1, None, None, 3] image_np_expanded = np.expand_dims(image_np, axis=0) # Run inference start = time.time() output_dict = sess.run( tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) end = time.time() print("Inference Time: %s" % str(end - start)) # all outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int( output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict[ 'detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][ 0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict[ 'detection_masks'][0] for i in range(output_dict['num_detections']): label = category_index[output_dict['detection_classes'] [i]]['name'] score = output_dict['detection_scores'][i] if label not in labels: results.append({'label_text': label, 'score': str(score)}) labels.append(label) print( '%d objects found in the given image, plotting the image... ' % (output_dict['num_detections'])) print([result for result in results]) print('close the image to continue') ''' vis_util.visualize_boxes_and_labels_on_image_array( image_np, output_dict['detection_boxes'], output_dict['detection_classes'], output_dict['detection_scores'], category_index, instance_masks=output_dict.get('detection_masks'), use_normalized_coordinates=True, line_thickness=8) plt.figure(figsize=IMAGE_SIZE) plt.imshow(image_np) plt.show() ''' return output_dict
def _resize_detection_masks(args): detection_boxes, detection_masks, image_shape = args detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[0], image_shape[1]) return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
def run_inference_for_single_image(image, sess, graph, class_id=None): """Feed forward an image into the object detection model. Args: image (ndarray): Input image in numpy format (OpenCV format). sess: TF session. graph: Object detection model loaded before. class_id (list): Optional. Id's of the classes you want to detect. Refer to mscoco_label_map.pbtxt' to find out more. Returns: output_dict (dict): Contains the info related to the detections. """ # Get handles to input and output tensors ops = tf.get_default_graph().get_operations() all_tensor_names = {output.name for op in ops for output in op.outputs} tensor_dict = {} for key in ['num_detections', 'detection_boxes', 'detection_scores', 'detection_classes', 'detection_masks']: tensor_name = key + ':0' if tensor_name in all_tensor_names: tensor_dict[key] = tf.get_default_graph().get_tensor_by_name(tensor_name) if 'detection_masks' in tensor_dict: # The following processing is only for single image detection_boxes = tf.squeeze(tensor_dict['detection_boxes'], [0]) detection_masks = tf.squeeze(tensor_dict['detection_masks'], [0]) # Reframe is required to translate mask from box coordinates to image coordinates and fit the image size. real_num_detection = tf.cast(tensor_dict['num_detections'][0], tf.int32) detection_boxes = tf.slice(detection_boxes, [0, 0], [real_num_detection, -1]) detection_masks = tf.slice(detection_masks, [0, 0, 0], [real_num_detection, -1, -1]) detection_masks_reframed = utils_ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image.shape[0], image.shape[1]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) # Follow the convention by adding back the batch dimension tensor_dict['detection_masks'] = tf.expand_dims(detection_masks_reframed, 0) image_tensor = tf.get_default_graph().get_tensor_by_name('image_tensor:0') # Run inference output_dict = sess.run(tensor_dict, feed_dict={image_tensor: np.expand_dims(image, 0)}) # All outputs are float32 numpy arrays, so convert types as appropriate output_dict['num_detections'] = int(output_dict['num_detections'][0]) output_dict['detection_classes'] = output_dict['detection_classes'][0].astype(np.uint8) output_dict['detection_boxes'] = output_dict['detection_boxes'][0] output_dict['detection_scores'] = output_dict['detection_scores'][0] if 'detection_masks' in output_dict: output_dict['detection_masks'] = output_dict['detection_masks'][0].astype(np.float32) if class_id is not None: discrimine_class(class_id, output_dict) ############ #print("{}, {}, {}".format(output_dict['num_detections'], output_dict['detection_scores'], output_dict['detection_boxes'][0])) return output_dict
def result_dict_for_single_example(image, key, detections, groundtruth=None, class_agnostic=False, scale_to_absolute=False): """Merges all detection and groundtruth information for a single example. Note that evaluation tools require classes that are 1-indexed, and so this function performs the offset. If `class_agnostic` is True, all output classes have label 1. Args: image: A single 4D uint8 image tensor of shape [1, H, W, C]. key: A single string tensor identifying the image. detections: A dictionary of detections, returned from DetectionModel.postprocess(). groundtruth: (Optional) Dictionary of groundtruth items, with fields: 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized coordinates. 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). class_agnostic: Boolean indicating whether the detections are class-agnostic (i.e. binary). Default False. scale_to_absolute: Boolean indicating whether boxes and keypoints should be scaled to absolute coordinates. Note that for IoU based evaluations, it does not matter whether boxes are expressed in absolute or relative coordinates. Default False. Returns: A dictionary with: 'original_image': A [1, H, W, C] uint8 image tensor. 'key': A string tensor with image identifier. 'detection_boxes': [max_detections, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. 'detection_scores': [max_detections] float32 tensor of scores. 'detection_classes': [max_detections] int64 tensor of 1-indexed classes. 'detection_masks': [max_detections, H, W] float32 tensor of binarized masks, reframed to full image masks. 'groundtruth_boxes': [num_boxes, 4] float32 tensor of boxes, in normalized or absolute coordinates, depending on the value of `scale_to_absolute`. (Optional) 'groundtruth_classes': [num_boxes] int64 tensor of 1-indexed classes. (Optional) 'groundtruth_area': [num_boxes] float32 tensor of bbox area. (Optional) 'groundtruth_is_crowd': [num_boxes] int64 tensor. (Optional) 'groundtruth_difficult': [num_boxes] int64 tensor. (Optional) 'groundtruth_group_of': [num_boxes] int64 tensor. (Optional) 'groundtruth_instance_masks': 3D int64 tensor of instance masks (Optional). """ label_id_offset = 1 # Applying label id offset (b/63711816) input_data_fields = fields.InputDataFields output_dict = { input_data_fields.original_image: image, input_data_fields.key: key, } detection_fields = fields.DetectionResultFields detection_boxes = detections[detection_fields.detection_boxes][0] image_shape = tf.shape(image) detection_scores = detections[detection_fields.detection_scores][0] if class_agnostic: detection_classes = tf.ones_like(detection_scores, dtype=tf.int64) else: detection_classes = ( tf.to_int64(detections[detection_fields.detection_classes][0]) + label_id_offset) num_detections = tf.to_int32(detections[detection_fields.num_detections][0]) detection_boxes = tf.slice( detection_boxes, begin=[0, 0], size=[num_detections, -1]) detection_classes = tf.slice( detection_classes, begin=[0], size=[num_detections]) detection_scores = tf.slice( detection_scores, begin=[0], size=[num_detections]) if scale_to_absolute: absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(detection_boxes), image_shape[1], image_shape[2]) output_dict[detection_fields.detection_boxes] = ( absolute_detection_boxlist.get()) else: output_dict[detection_fields.detection_boxes] = detection_boxes output_dict[detection_fields.detection_classes] = detection_classes output_dict[detection_fields.detection_scores] = detection_scores if detection_fields.detection_masks in detections: detection_masks = detections[detection_fields.detection_masks][0] # TODO(rathodv): This should be done in model's postprocess # function ideally. detection_masks = tf.slice( detection_masks, begin=[0, 0, 0], size=[num_detections, -1, -1]) detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[1], image_shape[2]) detection_masks_reframed = tf.cast( tf.greater(detection_masks_reframed, 0.5), tf.uint8) output_dict[detection_fields.detection_masks] = detection_masks_reframed if detection_fields.detection_keypoints in detections: detection_keypoints = detections[detection_fields.detection_keypoints][0] output_dict[detection_fields.detection_keypoints] = detection_keypoints if scale_to_absolute: absolute_detection_keypoints = keypoint_ops.scale( detection_keypoints, image_shape[1], image_shape[2]) output_dict[detection_fields.detection_keypoints] = ( absolute_detection_keypoints) if groundtruth: if input_data_fields.groundtruth_instance_masks in groundtruth: groundtruth[input_data_fields.groundtruth_instance_masks] = tf.cast( groundtruth[input_data_fields.groundtruth_instance_masks], tf.uint8) output_dict.update(groundtruth) if scale_to_absolute: groundtruth_boxes = groundtruth[input_data_fields.groundtruth_boxes] absolute_gt_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(groundtruth_boxes), image_shape[1], image_shape[2]) output_dict[input_data_fields.groundtruth_boxes] = ( absolute_gt_boxlist.get()) # For class-agnostic models, groundtruth classes all become 1. if class_agnostic: groundtruth_classes = groundtruth[input_data_fields.groundtruth_classes] groundtruth_classes = tf.ones_like(groundtruth_classes, dtype=tf.int64) output_dict[input_data_fields.groundtruth_classes] = groundtruth_classes return output_dict
def _resize_detection_masks(args): detection_boxes, detection_masks, image_shape = args detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, image_shape[0], image_shape[1]) return tf.cast(tf.greater(detection_masks_reframed, 0.5), tf.uint8)
def _extract_prediction_tensors(model, create_input_dict_fn, ignore_groundtruth=False): """Restores the model in a tensorflow session. Args: model: model to perform predictions with. create_input_dict_fn: function to create input tensor dictionaries. ignore_groundtruth: whether groundtruth should be ignored. Returns: tensor_dict: A tensor dictionary with evaluations. """ input_dict = create_input_dict_fn() prefetch_queue = prefetcher.prefetch(input_dict, capacity=500) input_dict = prefetch_queue.dequeue() original_image = tf.expand_dims(input_dict[fields.InputDataFields.image], 0) preprocessed_image = model.preprocess(tf.to_float(original_image)) prediction_dict = model.predict(preprocessed_image) detections = model.postprocess(prediction_dict) original_image_shape = tf.shape(original_image) absolute_detection_boxlist = box_list_ops.to_absolute_coordinates( box_list.BoxList(tf.squeeze(detections['detection_boxes'], axis=0)), original_image_shape[1], original_image_shape[2]) label_id_offset = 1 tensor_dict = { 'original_image': original_image, 'image_id': input_dict[fields.InputDataFields.source_id], 'detection_boxes': absolute_detection_boxlist.get(), 'detection_scores': tf.squeeze(detections['detection_scores'], axis=0), 'detection_classes': ( tf.squeeze(detections['detection_classes'], axis=0) + label_id_offset), } if 'detection_masks' in detections: detection_masks = tf.squeeze(detections['detection_masks'], axis=0) detection_boxes = tf.squeeze(detections['detection_boxes'], axis=0) # TODO: This should be done in model's postprocess function ideally. detection_masks_reframed = ops.reframe_box_masks_to_image_masks( detection_masks, detection_boxes, original_image_shape[1], original_image_shape[2]) detection_masks_reframed = tf.to_float(tf.greater(detection_masks_reframed, 0.5)) tensor_dict['detection_masks'] = detection_masks_reframed # load groundtruth fields into tensor_dict if not ignore_groundtruth: normalized_gt_boxlist = box_list.BoxList( input_dict[fields.InputDataFields.groundtruth_boxes]) gt_boxlist = box_list_ops.scale(normalized_gt_boxlist, tf.shape(original_image)[1], tf.shape(original_image)[2]) groundtruth_boxes = gt_boxlist.get() groundtruth_classes = input_dict[fields.InputDataFields.groundtruth_classes] tensor_dict['groundtruth_boxes'] = groundtruth_boxes tensor_dict['groundtruth_classes'] = groundtruth_classes tensor_dict['area'] = input_dict[fields.InputDataFields.groundtruth_area] tensor_dict['is_crowd'] = input_dict[ fields.InputDataFields.groundtruth_is_crowd] tensor_dict['difficult'] = input_dict[ fields.InputDataFields.groundtruth_difficult] if 'detection_masks' in tensor_dict: tensor_dict['groundtruth_instance_masks'] = input_dict[ fields.InputDataFields.groundtruth_instance_masks] return tensor_dict