def test_inference_with_bad_input_size(self): engine = BasicEngine( test_utils.test_data_path('mobilenet_v1_1.0_224_quant.tflite')) expected_size = engine.required_input_array_size() input_data = test_utils.generate_random_input(1, expected_size - 1) error_message = None try: engine.run_inference(input_data, expected_size - 1) except AssertionError as e: error_message = str(e) self.assertEqual( 'Wrong input size={}, expected={}.'.format(expected_size - 1, expected_size), error_message)
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='Path of the segmentation model.', required=True) parser.add_argument( '--input', help='File path of the input image.', required=True) parser.add_argument('--output', help='File path of the output image.') parser.add_argument( '--keep_aspect_ratio', dest='keep_aspect_ratio', action='store_true', help=( 'keep the image aspect ratio when down-sampling the image by adding ' 'black pixel padding (zeros) on bottom or right. ' 'By default the image is resized and reshaped without cropping. This ' 'option should be the same as what is applied on input images during ' 'model training. Otherwise the accuracy may be affected and the ' 'bounding box of detection result may be stretched.')) parser.set_defaults(keep_aspect_ratio=False) args = parser.parse_args() if not args.output: output_name = 'semantic_segmentation_result.jpg' else: output_name = args.output # Initialize engine. engine = BasicEngine(args.model) _, height, width, _ = engine.get_input_tensor_shape() # Open image. img = Image.open(args.input) if args.keep_aspect_ratio: resized_img, ratio = image_processing.resampling_with_original_ratio( img, (width, height), Image.NEAREST) else: resized_img = img.resize((width, height)) ratio = (1., 1.) input_tensor = np.asarray(resized_img).flatten() _, raw_result = engine.run_inference(input_tensor) result = np.reshape(raw_result, (height, width)) new_width, new_height = int(width * ratio[0]), int(height * ratio[1]) # If keep_aspect_ratio, we need to remove the padding area. result = result[:new_height, :new_width] vis_result = label_to_color_image(result.astype(int)).astype(np.uint8) vis_result = Image.fromarray(vis_result) vis_img = resized_img.crop((0, 0, new_width, new_height)) # Concat resized input image and processed segmentation results. concated_image = Image.new('RGB', (new_width*2, new_height)) concated_image.paste(vis_img, (0, 0)) concated_image.paste(vis_result, (width, 0)) concated_image.save(output_name) print('Please check ', output_name)
def _run_benchmark_for_model(model_name): """Runs benchmark for given model with a random input. Args: model_name: string, file name of the model. Returns: float, average inference time. """ iterations = 200 if ('edgetpu' in model_name) else 20 print('Benchmark for [', model_name, ']') print('model path = ', test_utils.test_data_path(model_name)) engine = BasicEngine(test_utils.test_data_path(model_name)) print('Shape of input tensor : ', engine.get_input_tensor_shape()) # Prepare a random generated input. input_size = engine.required_input_array_size() random_input = test_utils.generate_random_input(1, input_size) # Convert it to a numpy.array. input_data = np.array(random_input, dtype=np.uint8) benchmark_time = timeit.timeit( lambda: engine.run_inference(input_data), number=iterations) # Time consumed for each iteration (milliseconds). time_per_inference = (benchmark_time / iterations) * 1000 print(time_per_inference, 'ms (iterations = ', iterations, ')') return time_per_inference
def test_append_fully_connected_and_softmax_layer_to_model(self): in_model_name = ( 'mobilenet_v1_1.0_224_quant_embedding_extractor_edgetpu.tflite') in_model_path = test_utils.test_data_path(in_model_name) in_engine = BasicEngine(in_model_path) # Generate random input tensor. np.random.seed(12345) input_tensor = np.random.randint( 0, 255, size=in_engine.get_input_tensor_shape(), dtype=np.uint8).flatten() # Set up weights, biases and FC output tensor range. _, embedding_vector = in_engine.run_inference(input_tensor) embedding_vector_dim = embedding_vector.shape[0] num_classes = 10 weights = np.random.randn(embedding_vector_dim, num_classes).astype(np.float32) biases = np.random.randn(num_classes).astype(np.float32) fc_output = embedding_vector.dot(weights) + biases fc_output_min = float(np.min(fc_output)) fc_output_max = float(np.max(fc_output)) try: # Create temporary directory, and remove this folder when test # finishes. Otherwise, test may fail because of generated files from # previous run. tmp_dir = tempfile.mkdtemp() # Append FC and softmax layers and save model. out_model_path = os.path.join( tmp_dir, in_model_name[:-7] + '_bp_retrained.tflite') AppendFullyConnectedAndSoftmaxLayerToModel( in_model_path, out_model_path, weights.transpose().flatten(), biases, fc_output_min, fc_output_max) self.assertTrue(os.path.exists(out_model_path)) # Run with saved model on same input. out_engine = BasicEngine(out_model_path) _, result = out_engine.run_inference(input_tensor) # Calculate expected result. expected = np.exp(fc_output - np.max(fc_output)) expected = expected / np.sum(expected) np.testing.assert_almost_equal(result, expected, decimal=2) finally: shutil.rmtree(tmp_dir)
class MobileNet: def __init__(self, tfLiteFilename): self.basic_engine = BasicEngine(tfLiteFilename) def predict(self, images, max=5): raw = self.basic_engine.run_inference(images) predictions = raw[1] results = {'predictions': []} for i in range(len(predictions)): ordered = (-predictions).argsort() topn = [] for j in ordered[:max]: # convert numpy.int64 to int for JSON serialization later topn.append(int(j - 1)) results['predictions'].append(topn) return results
def test_run_inference(self): for model in test_utils.get_model_list(): print('Testing model :', model) engine = BasicEngine(test_utils.test_data_path(model)) input_data = test_utils.generate_random_input( 1, engine.required_input_array_size()) latency, ret = engine.run_inference(input_data) self.assertEqual(ret.size, engine.total_output_array_size()) # Check debugging functions. self.assertLess(math.fabs(engine.get_inference_time() - latency), 0.001) raw_output = engine.get_raw_output() self.assertEqual(ret.size, raw_output.size) for i in range(ret.size): if math.isnan(ret[i]) and math.isnan(raw_output[i]): continue self.assertLess(math.fabs(ret[i] - raw_output[i]), 0.001)
def run_benchmark(model): """Returns average inference time in ms on specified model on random input.""" print('Benchmark for [%s]' % model) print('model path = %s' % test_utils.test_data_path(model)) engine = BasicEngine(test_utils.test_data_path(model)) print('input tensor shape = %s' % engine.get_input_tensor_shape()) iterations = 200 if 'edgetpu' in model else 20 input_size = engine.required_input_array_size() random_input = test_utils.generate_random_input(1, input_size) input_data = np.array(random_input, dtype=np.uint8) result = 1000 * timeit.timeit(lambda: engine.run_inference(input_data), number=iterations) / iterations print('%.2f ms (iterations = %d)' % (result, iterations)) return result
class SsdMobileNet: def __init__(self, tfLiteFilename): #self.engine = DetectionEngine(tfLiteFilename) self.basic_engine = BasicEngine(tfLiteFilename) def predict(self, images, max=5): #predictions = self.engine.detect_with_input_tensor(images, threshold=0.3, top_k=10) raw = self.basic_engine.run_inference(images) raw_split = np.split( raw[1], [40, 50, 60, 61] ) #Output is an array with 61 elements. first 40 are bounding box coordinates, next 10 are classes,next 10 are their respective confidence values and last 1 is number of detections) #print(raw_split[0]) #boxes #print(raw_split[1]) #classes #print(raw_split[2]) #confidences/scores #print(raw_split[3]) #num of detections threshold = float(0.3) count = (raw_split[2] > threshold).sum() #number of scores above selected threshold boxes = raw_split[0] classes = raw_split[1] scores = raw_split[2] results = {"predictions": []} # Currently only support batch size of 1 for i in range(1): thisResult = [] for d in range(int(count)): x = { 'score': float(scores[d]), 'box': [ float(boxes[4 * d + 1]), float(boxes[4 * d + 0]), float(boxes[4 * d + 3]), float(boxes[4 * d + 2]) ], 'class': int(classes[d] + 1) } thisResult.append(x) results['predictions'].append(thisResult) return results
def deep_inferencer(results, frameBuffer, model, device): deep_engine = None deep_engine = BasicEngine(model, device) print("Loaded Graphs!!! (Deeplab)") while True: if frameBuffer.empty(): continue # Run inference. color_image = frameBuffer.get() prepimg_deep = color_image[:, :, ::-1].copy() prepimg_deep = prepimg_deep.flatten() tinf = time.perf_counter() latency, result_deep = deep_engine.run_inference(prepimg_deep) print(time.perf_counter() - tinf, "sec (Deeplab)") results.put(result_deep)
class image_segmentation(): MODEL_PASCAL = 'models/deeplabv3_mnv2_pascal_quant_edgetpu.tflite' MODEL_DM05 = 'models/deeplabv3_mnv2_dm05_pascal_quant_edgetpu.tflite' def __init__(self, model=MODEL_PASCAL): self.engine = BasicEngine(model) _, self.width, self.height, _ = self.engine.get_input_tensor_shape() def segment(self, img): self.input_image = cv2.resize(img, (self.width, self.height)) _, raw_result = self.engine.run_inference(self.input_image.flatten()) result = np.reshape(raw_result, (self.height, self.width)) self.segmented_img = self.label_to_color_image( result.astype(int)).astype(np.uint8) def get_original_image(self): return self.input_image def get_segmented_image(self): return self.segmented_img def label_to_color_image(self, label): colormap = np.zeros((256, 3), dtype=int) indices = np.arange(256, dtype=int) for shift in reversed(range(8)): for channel in range(3): colormap[:, channel] |= ((indices >> channel) & 1) << shift indices >>= 3 return colormap[label] def write_image_to_file(self, filename): orig = Image.fromarray(self.input_image) segm = Image.fromarray(self.segmented_img) concated_image = Image.new('RGB', (self.width * 2, self.height)) concated_image.paste(orig, (0, 0)) concated_image.paste(segm, (self.width, 0)) concated_image.save(filename)
import collections import math import enum print(ListEdgeTpuPaths(EDGE_TPU_STATE_UNASSIGNED)) engine = BasicEngine( 'posenet_mobilenet_v1_075_481_641_quant_decoder_edgetpu.tflite') _, height, width, _ = engine.get_input_tensor_shape() cam = cv2.VideoCapture(2) while True: ret, frame = cam.read() frame = cv2.resize(frame, (width, height), cv2.INTER_NEAREST) canvas = frame.copy() start = time.time() _, result = engine.run_inference(frame[:, :, ::-1].flatten()) end = time.time() for i in range(0, len(result) - 1, 2): cv2.circle(canvas, (int(result[i + 1]), int(result[i])), 10, (0, 255, 0), -1) print(1 / (end - start)) cv2.imshow("Result", canvas) if cv2.waitKey(1) == ord('q'): break cv2.destroyAllWindows() cam.release()
image = cv2.resize(image, (xdim, ydim)) ##### CONVERT TO GRAYSCALE #image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) ##### INVERT #image = cv2.bitwise_not(image) ##### CONVERT TO BINARY (OTHER OPTIONS MAY MAKE MORE SENSE) #_, image = cv2.threshold(image,180,255,cv2.THRESH_BINARY) ##### FLATTEN INPUT (TPU REQUIREMENT) input = image.flatten() ##### RUN ON TPU results = engine.run_inference(input) ##### REFORMAT RESULTS results = (results[1].reshape(xdim, ydim) * 255).astype(np.uint8) ##### CONVERT TO BINARY (OTHER OPTIONS MAY MAKE MORE SENSE) #_, results = cv2.threshold(results,128,255,cv2.THRESH_BINARY) ##### PLOT RESULTS #mp.gca().cla() #mp.bar(np.arange(10),engine.get_raw_output()) #mp.axis([-0.5,9.5,0,1]) #mp.pause(0.001) ##### SHOW IMAGE THAT WAS FORWARDED TO TPU MODEL #image = cv2.resize(image, (560, 560))
class EdgeTPUSemanticSegmenter(ConnectionBasedTransport): def __init__(self, namespace='~'): # get image_trasport before ConnectionBasedTransport subscribes ~input self.transport_hint = rospy.get_param(namespace + 'image_transport', 'raw') rospy.loginfo("Using transport {}".format(self.transport_hint)) super(EdgeTPUSemanticSegmenter, self).__init__() rospack = rospkg.RosPack() pkg_path = rospack.get_path('coral_usb') self.bridge = CvBridge() self.classifier_name = rospy.get_param(namespace + 'classifier_name', rospy.get_name()) model_file = os.path.join( pkg_path, './models/deeplabv3_mnv2_pascal_quant_edgetpu.tflite') model_file = rospy.get_param(namespace + 'model_file', model_file) label_file = rospy.get_param(namespace + 'label_file', None) if model_file is not None: self.model_file = get_filename(model_file, False) if label_file is not None: label_file = get_filename(label_file, False) self.duration = rospy.get_param(namespace + 'visualize_duration', 0.1) self.enable_visualization = rospy.get_param( namespace + 'enable_visualization', True) device_id = rospy.get_param(namespace + 'device_id', None) if device_id is None: device_path = None else: device_path = ListEdgeTpuPaths(EDGE_TPU_STATE_NONE)[device_id] assigned_device_paths = ListEdgeTpuPaths(EDGE_TPU_STATE_ASSIGNED) if device_path in assigned_device_paths: rospy.logwarn('device {} is already assigned: {}'.format( device_id, device_path)) self.engine = BasicEngine(self.model_file, device_path) self.input_shape = self.engine.get_input_tensor_shape()[1:3] if label_file is None: self.label_names = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] self.label_ids = list(range(len(self.label_names))) else: self.label_ids, self.label_names = self._load_labels(label_file) self.namespace = namespace self.pub_label = self.advertise(namespace + 'output/label', Image, queue_size=1) # visualize timer if self.enable_visualization: self.lock = threading.Lock() self.pub_image = self.advertise(namespace + 'output/image', Image, queue_size=1) self.pub_image_compressed = self.advertise( namespace + 'output/image/compressed', CompressedImage, queue_size=1) self.timer = rospy.Timer(rospy.Duration(self.duration), self.visualize_cb) self.img = None self.header = None self.label = None def start(self): self.engine = BasicEngine(self.model_file) self.input_shape = self.engine.get_input_tensor_shape()[1:3] self.subscribe() if self.enable_visualization: self.timer = rospy.Timer(rospy.Duration(self.duration), self.visualize_cb) def stop(self): self.unsubscribe() del self.sub_image if self.enable_visualization: self.timer.shutdown() del self.timer del self.engine def subscribe(self): if self.transport_hint == 'compressed': self.sub_image = rospy.Subscriber('{}/compressed'.format( rospy.resolve_name('~input')), CompressedImage, self.image_cb, queue_size=1, buff_size=2**26) else: self.sub_image = rospy.Subscriber('~input', Image, self.image_cb, queue_size=1, buff_size=2**26) def unsubscribe(self): self.sub_image.unregister() @property def visualize(self): return self.pub_image.get_num_connections() > 0 or \ self.pub_image_compressed.get_num_connections() > 0 def config_callback(self, config, level): self.score_thresh = config.score_thresh self.top_k = config.top_k return config def _load_labels(self, path): p = re.compile(r'\s*(\d+)(.+)') with open(path, 'r', encoding='utf-8') as f: lines = (p.match(line).groups() for line in f.readlines()) labels = {int(num): text.strip() for num, text in lines} return list(labels.keys()), list(labels.values()) def _segment_step(self, img): H, W = img.shape[:2] input_H, input_W = self.input_shape input_tensor = cv2.resize(img, (input_W, input_H)) input_tensor = input_tensor.flatten() _, label = self.engine.run_inference(input_tensor) label = label.reshape(self.input_shape) label = cv2.resize(label, (W, H), interpolation=cv2.INTER_NEAREST) label = label.astype(np.int32) return label def _segment(self, img): return self._segment_step(img) def image_cb(self, msg): if not hasattr(self, 'engine'): return if self.transport_hint == 'compressed': np_arr = np.fromstring(msg.data, np.uint8) img = cv2.imdecode(np_arr, cv2.IMREAD_COLOR) img = img[:, :, ::-1] else: img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='rgb8') label = self._segment(img) label_msg = self.bridge.cv2_to_imgmsg(label, '32SC1') label_msg.header = msg.header self.pub_label.publish(label_msg) if self.enable_visualization: with self.lock: self.img = img self.header = msg.header self.label = label def visualize_cb(self, event): if (not self.visualize or self.img is None or self.header is None or self.label is None): return with self.lock: img = self.img.copy() header = copy.deepcopy(self.header) label = self.label.copy() fig = plt.figure(tight_layout={'pad': 0}) ax = plt.subplot(1, 1, 1) ax.axis('off') ax, legend_handles = vis_semantic_segmentation( img.transpose((2, 0, 1)), label, label_names=self.label_names, alpha=0.7, all_label_names_in_legend=True, ax=ax) ax.legend(handles=legend_handles, bbox_to_anchor=(1, 1), loc=2) fig.canvas.draw() w, h = fig.canvas.get_width_height() vis_img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8) vis_img.shape = (h, w, 3) fig.clf() plt.close() if self.pub_image.get_num_connections() > 0: vis_msg = self.bridge.cv2_to_imgmsg(vis_img, 'rgb8') # BUG: https://answers.ros.org/question/316362/sensor_msgsimage-generates-float-instead-of-int-with-python3/ # NOQA vis_msg.step = int(vis_msg.step) vis_msg.header = header self.pub_image.publish(vis_msg) if self.pub_image_compressed.get_num_connections() > 0: # publish compressed http://wiki.ros.org/rospy_tutorials/Tutorials/WritingImagePublisherSubscriber # NOQA vis_compressed_msg = CompressedImage() vis_compressed_msg.header = header vis_compressed_msg.format = "jpeg" vis_img_rgb = cv2.cvtColor(vis_img, cv2.COLOR_BGR2RGB) vis_compressed_msg.data = np.array( cv2.imencode('.jpg', vis_img_rgb)[1]).tostring() self.pub_image_compressed.publish(vis_compressed_msg)
# if our frame dimensions are None, we still need to compute the # ratio of old frame dimensions to new frame dimensions if W is None or H is None: (H, W) = frame.shape[:2] rW = W / float(newW) rH = H / float(newH) # resize the frame, this time ignoring aspect ratio frame = cv2.resize(frame, (newW, newH)) # construct a blob from the frame and then perform a forward pass # of the model to obtain the two output layer sets frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame = np.expand_dims(frame, axis=0) frame = frame.astype(np.uint8) inference_time, output = engine.run_inference(frame.flatten()) outputs = [output[i:j] for i, j in zip(output_offsets, output_offsets[1:])] scores = outputs[0].reshape(1, int(args["height"] / 4), int(args["width"] / 4), 1) geometry1 = outputs[1].reshape(1, int(args["height"] / 4), int(args["width"] / 4), 4) geometry2 = outputs[2].reshape(1, int(args["height"] / 4), int(args["width"] / 4), 1) scores = np.transpose(scores, [0, 3, 1, 2]) geometry1 = np.transpose(geometry1, [0, 3, 1, 2]) geometry2 = np.transpose(geometry2, [0, 3, 1, 2]) # decode the predictions, then apply non-maxima suppression to # suppress weak, overlapping bounding boxes (rects, confidences, angles) = decode_predictions(scores, geometry1, geometry2)
class FaceEmbeddingEngine: ''' class FaceEmbeddingEngine Purpose: generate embeddings for images of faces ''' def __init__(self, embedding_model): ''' function constructor Constructor for FaceEmbeddingEngine Args: embedding_model (FaceEmbeddingModelEnum): The model to use for generating embeddings for face images Returns: None ''' # We only want to import these modules at run-time since # they will only be installed on certain platforms. # pylint: disable=import-outside-toplevel, import-error self.embedding_model = embedding_model self.required_image_shape = get_image_dimensions_for_embedding_model( embedding_model) + (3, ) # need 3 arrays for RGB if self.embedding_model == FaceEmbeddingModelEnum.CELEBRITY_KERAS: print("Using Celebrity trained Keras model for face embeddings") from keras.models import load_model self.face_embedding_engine = load_model( FACE_EMBEDDING_CELEBRITY_KERAS_MODEL_PATH, compile=False) elif self.embedding_model == FaceEmbeddingModelEnum.CELEBRITY_TFLITE: print("Using Celebrity trained tflite model for face embeddings") from edgetpu.basic.basic_engine import BasicEngine self.face_embedding_engine = BasicEngine( FACE_EMBEDDING_CELEBRITY_TFLITE_MODEL_PATH) print("Embedding model input tensor shape: {}".format( self.face_embedding_engine.get_input_tensor_shape())) print("Embedding model input size: {}".format( self.face_embedding_engine.required_input_array_size())) else: raise Exception( "Invalid embedding mode method: {}".format(embedding_model)) def get_embedding_model(self): ''' function get_embedding_model Get the embedding model being used by this instance of the FaceEmbeddingEngine Args: None Returns: The FaceEmbeddingModelEnum being used by this instance of FaceEmbeddingEngine ''' return self.embedding_model # get the face embedding for one face def get_embedding(self, face_pixels): ''' function get_embedding Generate an embedding for the given face Args: face_pixels (cv2 image): The image of the face to generate the embedding for. The dimensions of the image must match the dimensions required by the selected embedding model. Returns: A numpy array with the embedding that was generated ''' # Confirm we're using a proper sized image to generate the embedding with if face_pixels.shape != self.required_image_shape: raise Exception( "Invalid shape: {} for embedding mode method: {}".format( face_pixels.shape, self.embedding_model)) # scale pixel values face_pixels = face_pixels.astype('float32') # standardize pixel values across channels (global) mean, std = face_pixels.mean(), face_pixels.std() face_pixels = (face_pixels - mean) / std # transform face into one sample sample = expand_dims(face_pixels, axis=0) # get embedding if self.embedding_model == FaceEmbeddingModelEnum.CELEBRITY_KERAS: embeddings = self.face_embedding_engine.predict(sample) result = embeddings[0] else: sample = sample.flatten() # normalize values to between 0 and 255 (UINT) sample *= 255.0 / sample.max() # convert to UNIT8 sample = sample.astype(np_uint8) embeddings = self.face_embedding_engine.run_inference(sample) result = embeddings[1] return result
while True: t1 = time.perf_counter() ret, color_image = cam.read() if not ret: continue # Normalization prepimg_deep = cv2.resize(color_image, (model_width, model_height)) prepimg_deep = cv2.cvtColor(prepimg_deep, cv2.COLOR_BGR2RGB) prepimg_deep = np.expand_dims(prepimg_deep, axis=0) # Run model prepimg_deep = prepimg_deep.astype(np.uint8) inference_time, predictions = engine.run_inference(prepimg_deep.flatten()) # Segmentation predictions = predictions.reshape(model_height, model_width, 19) predictions = np.argmax(predictions, axis=-1) imdraw = decode_prediction_mask(predictions) imdraw = cv2.cvtColor(imdraw, cv2.COLOR_RGB2BGR) imdraw = cv2.resize(imdraw, (camera_width, camera_height)) cv2.putText(imdraw, fps, (camera_width-170,15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (38,0,255), 1, cv2.LINE_AA) cv2.imshow(window_name, imdraw) if cv2.waitKey(waittime)&0xFF == ord('q'): break # FPS calculation
class EdgeTPUSemanticSegmenter(ConnectionBasedTransport): def __init__(self): super(EdgeTPUSemanticSegmenter, self).__init__() rospack = rospkg.RosPack() pkg_path = rospack.get_path('coral_usb') self.bridge = CvBridge() self.classifier_name = rospy.get_param('~classifier_name', rospy.get_name()) model_file = os.path.join( pkg_path, './models/deeplabv3_mnv2_pascal_quant_edgetpu.tflite') model_file = rospy.get_param('~model_file', model_file) label_file = rospy.get_param('~label_file', None) self.engine = BasicEngine(model_file) self.input_shape = self.engine.get_input_tensor_shape()[1:3] if label_file is None: self.label_names = [ 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ] self.label_ids = list(range(len(self.label_names))) else: self.label_ids, self.label_names = self._load_labels(label_file) self.pub_label = self.advertise('~output/label', Image, queue_size=1) self.pub_image = self.advertise('~output/image', Image, queue_size=1) def subscribe(self): self.sub_image = rospy.Subscriber('~input', Image, self.image_cb, queue_size=1, buff_size=2**26) def unsubscribe(self): self.sub_image.unregister() @property def visualize(self): return self.pub_image.get_num_connections() > 0 def config_callback(self, config, level): self.score_thresh = config.score_thresh self.top_k = config.top_k return config def _load_labels(self, path): p = re.compile(r'\s*(\d+)(.+)') with open(path, 'r', encoding='utf-8') as f: lines = (p.match(line).groups() for line in f.readlines()) labels = {int(num): text.strip() for num, text in lines} return list(labels.keys()), list(labels.values()) def image_cb(self, msg): img = self.bridge.imgmsg_to_cv2(msg, desired_encoding='rgb8') H, W = img.shape[:2] input_H, input_W = self.input_shape input_tensor = cv2.resize(img, (input_W, input_H)) input_tensor = input_tensor.flatten() _, label = self.engine.run_inference(input_tensor) label = label.reshape(self.input_shape) label = cv2.resize(label, (W, H), interpolation=cv2.INTER_NEAREST) label = label.astype(np.int32) label_msg = self.bridge.cv2_to_imgmsg(label, '32SC1') label_msg.header = msg.header self.pub_label.publish(label_msg) if self.visualize: fig = plt.figure(tight_layout={'pad': 0}) ax = plt.subplot(1, 1, 1) ax.axis('off') ax, legend_handles = vis_semantic_segmentation( img.transpose((2, 0, 1)), label, label_names=self.label_names, alpha=0.7, all_label_names_in_legend=True, ax=ax) ax.legend(handles=legend_handles, bbox_to_anchor=(1, 1), loc=2) fig.canvas.draw() w, h = fig.canvas.get_width_height() vis_img = np.fromstring(fig.canvas.tostring_rgb(), dtype=np.uint8) vis_img.shape = (h, w, 3) fig.clf() plt.close() vis_msg = self.bridge.cv2_to_imgmsg(vis_img, 'rgb8') # BUG: https://answers.ros.org/question/316362/sensor_msgsimage-generates-float-instead-of-int-with-python3/ # NOQA vis_msg.step = int(vis_msg.step) vis_msg.header = msg.header self.pub_image.publish(vis_msg)