def validate_yolo_model_onnx(model_path, image_file, anchors, class_names, model_image_size, loop_count): sess = onnxruntime.InferenceSession(model_path) # prepare input image img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) image_shape = img.size image_data = image_data if isinstance(image_data, list) else [image_data] feed = dict([(input.name, image_data[i]) for i, input in enumerate(sess.get_inputs())]) # predict once first to bypass the model building time prediction = sess.run(None, feed) start = time.time() for i in range(loop_count): prediction = sess.run(None, feed) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size)
def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' image_data = preprocess_image(image, self.model_image_size) # prepare origin image shape, (height, width) format image_shape = np.array([image.size[1], image.size[0]]) image_shape = np.expand_dims(image_shape, 0) start = time.time() out_boxes, out_classes, out_scores = self.predict( image_data, image_shape) end = time.time() print('Found {} boxes for {}'.format(len(out_boxes), 'img')) print("Inference time: {:.8f}s".format(end - start)) #draw result on input image image_array = np.array(image, dtype='uint8') image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors) return Image.fromarray(image_array)
def yolo_predict_keras(model, image, anchors, num_classes, model_image_size, conf_threshold): image_data = preprocess_image(image, model_image_size) image_shape = image.size prediction = model.predict([image_data]) if len(anchors) == 5: # YOLOv2 use 5 anchors pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def yolo_predict_onnx(model, image, anchors, num_classes, conf_threshold): input_tensors = [] for i, input_tensor in enumerate(model.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' batch, height, width, channel = input_tensors[0].shape model_image_size = (height, width) # prepare input image image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) feed = {input_tensors[0].name: image_data} prediction = model.run(None, feed) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def validate_yolo_model(model_path, image_file, anchors, class_names, model_image_size, loop_count): custom_object_dict = get_custom_objects() model = load_model(model_path, compile=False, custom_objects=custom_object_dict) img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) image_shape = img.size # predict once first to bypass the model building time model.predict([image_data]) start = time.time() for i in range(loop_count): prediction = model.predict([image_data]) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size) return
def detect_image(self, image): image_data = preprocess_image(image, self.model_image_size) image_size = image.size scale = (image_size[0] * 1.0 / self.model_image_size[0], image_size[1] * 1.0 / self.model_image_size[1]) start = time.time() keypoints = self.predict(image_data) end = time.time() print("Inference time: {:.8f}s".format(end - start)) # rescale keypoints back to origin image size keypoints_dict = dict() for i, keypoint in enumerate(keypoints): keypoints_dict[self.class_names[i]] = (keypoint[0] * scale[0] * 4, keypoint[1] * scale[1] * 4, keypoint[2]) # draw the keypoint skeleton on image image_array = np.array(image, dtype='uint8') image_array = render_skeleton(image_array, keypoints_dict, self.skeleton_lines, self.conf_threshold) return Image.fromarray(image_array)
def validate_yolo_model(model_path, image_file, anchors, class_names, model_image_size, loop_count): custom_object_dict = get_custom_objects() model = load_model(model_path, compile=False, custom_objects=custom_object_dict) img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(img.size)) # predict once first to bypass the model building time model.predict([image_data]) start = time.time() for i in range(loop_count): prediction = model.predict([image_data]) end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) if type(prediction) is not list: prediction = [prediction] prediction.sort(key=lambda x: len(x[0])) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size) return
def validate_yolo_model_onnx(model_path, image_file, anchors, class_names, loop_count): sess = onnxruntime.InferenceSession(model_path) input_tensors = [] for i, input_tensor in enumerate(sess.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' batch, height, width, channel = input_tensors[0].shape model_image_size = (height, width) # prepare input image img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(img.size)) feed = {input_tensors[0].name: image_data} # predict once first to bypass the model building time prediction = sess.run(None, feed) start = time.time() for i in range(loop_count): prediction = sess.run(None, feed) end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) prediction.sort(key=lambda x: len(x[0])) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size)
def yolo_predict_tflite(interpreter, image, anchors, num_classes, conf_threshold): input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # check the type of the input tensor #if input_details[0]['dtype'] == np.float32: #floating_model = True height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] model_image_size = (height, width) image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() prediction = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) prediction.append(output_data) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def validate_classifier_model_torch(model, device, image_file, class_names, model_input_shape, loop_count, output_path): # prepare input image image = Image.open(image_file).convert('RGB') image_data = preprocess_image(image, target_size=model_input_shape, return_tensor=True) image_data = image_data.unsqueeze(0).to(device) with torch.no_grad(): # predict once first to bypass the model building time prediction = model(image_data) num_classes = list(prediction.shape)[-1] if class_names: # check if classes number match with model prediction assert num_classes == len( class_names), 'classes number mismatch with model.' # get predict output start = time.time() for i in range(loop_count): prediction = model(image_data).cpu().numpy() end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) handle_prediction(prediction, image_file, np.array(image), class_names, output_path)
def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' image_data = preprocess_image(image, self.model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) start = time.time() out_boxes, out_classes, out_scores = self.predict( image_data, image_shape) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) end = time.time() print("Inference time: {:.8f}s".format(end - start)) #draw result on input image image_array = np.array(image, dtype='uint8') image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors) out_classnames = [self.class_names[c] for c in out_classes] print("Fine detect image 1") return Image.fromarray( image_array), out_boxes, out_classnames, out_scores
def validate_hourglass_model(model_path, image_file, class_names, skeleton_lines, model_image_size, loop_count): model = load_model(model_path, compile=False) img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) image_size = img.size scale = (image_size[0] * 1.0 / model_image_size[0], image_size[1] * 1.0 / model_image_size[1]) # predict once first to bypass the model building time model.predict(image_data) start = time.time() for i in range(loop_count): prediction = model.predict(image_data) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) # check to handle multi-output model if isinstance(prediction, list): prediction = prediction[-1] heatmap = prediction[0] process_heatmap(heatmap, img, scale, class_names, skeleton_lines) return
def yolo_predict_pb(model, image, anchors, num_classes, model_image_size, conf_threshold): # NOTE: TF 1.x frozen pb graph need to specify input/output tensor name # so we need to hardcode the input/output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = [ 'graph/conv2d_1/BiasAdd:0', 'graph/conv2d_3/BiasAdd:0' ] elif len(anchors) == 9: output_tensor_names = [ 'graph/conv2d_3/BiasAdd:0', 'graph/conv2d_8/BiasAdd:0', 'graph/conv2d_13/BiasAdd:0' ] elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction output_tensor_names = ['graph/predict_conv/BiasAdd:0'] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor_name = 'graph/image_input:0' # prepare input image image_data = preprocess_image(image, model_image_size) image_shape = image.size # get input/output tensors image_input = model.get_tensor_by_name(input_tensor_name) output_tensors = [ model.get_tensor_by_name(output_tensor_name) for output_tensor_name in output_tensor_names ] with tf.Session(graph=model) as sess: prediction = sess.run(output_tensors, feed_dict={image_input: image_data}) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np( prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np( prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def validate_deeplab_model_tflite(model_path, image_file, class_names, do_crf, label_file, loop_count): interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() #print(input_details) #print(output_details) # check the type of the input tensor if input_details[0]['dtype'] == np.float32: floating_model = True height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] model_image_size = (height, width) num_classes = output_details[0]['shape'][-1] if class_names: # check if classes number match with model prediction assert num_classes == len( class_names), 'classes number mismatch with model.' # prepare input image img = Image.open(image_file) image_data = preprocess_image(img, model_image_size) image = image_data[0].astype('uint8') #origin image shape, in (width, height) format origin_image_size = img.size # predict once first to bypass the model building time interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() start = time.time() for i in range(loop_count): interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) prediction = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) prediction.append(output_data) handle_prediction(prediction, image, np.array(img), num_classes, class_names, model_image_size, origin_image_size, do_crf, label_file) return
def yolo_predict_keras(model, image, anchors, num_classes, model_image_size, conf_threshold): image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) prediction = model.predict([image_data]) if len(anchors) == 5: # YOLOv2 use 5 anchors pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def validate_yolo_model_tflite(model_path, image_file, anchors, class_names, elim_grid_sense, v5_decode, loop_count): interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() #print(input_details) #print(output_details) # check the type of the input tensor if input_details[0]['dtype'] == np.float32: floating_model = True img = Image.open(image_file) image = np.array(img, dtype='uint8') height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] model_image_size = (height, width) image_data = preprocess_image(img, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(img.size)) # predict once first to bypass the model building time interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() start = time.time() for i in range(loop_count): interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) prediction = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) prediction.append(output_data) prediction.sort(key=lambda x: len(x[0])) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size, elim_grid_sense, v5_decode) return
def validate_deeplab_model_onnx(model_path, image_file, class_names, do_crf, label_file, loop_count): sess = onnxruntime.InferenceSession(model_path) input_tensors = [] for i, input_tensor in enumerate(sess.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' batch, height, width, channel = input_tensors[0].shape model_image_size = (height, width) output_tensors = [] for i, output_tensor in enumerate(sess.get_outputs()): output_tensors.append(output_tensor) # assume only 1 output tensor assert len(output_tensors) == 1, 'invalid output tensor number.' num_classes = output_tensors[0].shape[-1] if class_names: # check if classes number match with model prediction assert num_classes == len( class_names), 'classes number mismatch with model.' # prepare input image img = Image.open(image_file) image_data = preprocess_image(img, model_image_size) image = image_data[0].astype('uint8') #origin image shape, in (width, height) format origin_image_size = img.size feed = {input_tensors[0].name: image_data} # predict once first to bypass the model building time prediction = sess.run(None, feed) start = time.time() for i in range(loop_count): prediction = sess.run(None, feed) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) handle_prediction(prediction, image, np.array(img), num_classes, class_names, model_image_size, origin_image_size, do_crf, label_file)
def segment_image(self, image): image_data = preprocess_image(image, self.model_input_shape) # origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) start = time.time() out_mask = self.predict(image_data, image_shape) end = time.time() print("Inference time: {:.8f}s".format(end - start)) # show segmentation result image_array = visualize_segmentation(np.array(image), out_mask, class_names=self.class_names, ignore_count_threshold=500) return Image.fromarray(image_array)
def validate_hourglass_model_tflite(model_path, image_file, class_names, skeleton_lines, loop_count): interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() #print(input_details) #print(output_details) # check the type of the input tensor if input_details[0]['dtype'] == np.float32: floating_model = True img = Image.open(image_file) image = np.array(img, dtype='uint8') height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] model_image_size = (height, width) image_data = preprocess_image(img, model_image_size) image_size = img.size scale = (image_size[0] * 1.0 / model_image_size[0], image_size[1] * 1.0 / model_image_size[1]) # predict once first to bypass the model building time interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() start = time.time() for i in range(loop_count): interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) prediction = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) prediction.append(output_data) heatmap = prediction[-1][0] process_heatmap(heatmap, img, scale, class_names, skeleton_lines) return
def validate_classifier_model_onnx(model, image_file, class_names, loop_count, output_path): input_tensors = [] for i, input_tensor in enumerate(model.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' batch, channel, height, width = input_tensors[0].shape model_input_shape = (height, width) output_tensors = [] for i, output_tensor in enumerate(model.get_outputs()): output_tensors.append(output_tensor) # assume only 1 output tensor assert len(output_tensors) == 1, 'invalid output tensor number.' num_classes = output_tensors[0].shape[-1] if class_names: # check if classes number match with model prediction assert num_classes == len( class_names), 'classes number mismatch with model.' # prepare input image image = Image.open(image_file).convert('RGB') image_data = preprocess_image(image, target_size=model_input_shape, return_tensor=False) image_data = np.expand_dims(image_data, axis=0) feed = {input_tensors[0].name: image_data} # predict once first to bypass the model building time prediction = model.run(None, feed) start = time.time() for i in range(loop_count): prediction = model.run(None, feed) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) handle_prediction(prediction[0], image_file, np.array(image), class_names, output_path)
def validate_hourglass_model_onnx(model_path, image_file, class_names, skeleton_lines, loop_count): sess = onnxruntime.InferenceSession(model_path) input_tensors = [] for i, input_tensor in enumerate(sess.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' batch, height, width, channel = input_tensors[0].shape model_image_size = (height, width) output_tensors = [] for i, output_tensor in enumerate(sess.get_outputs()): output_tensors.append(output_tensor) # assume only 1 output tensor assert len(output_tensors) == 1, 'invalid output tensor number.' # prepare input image img = Image.open(image_file) image_data = preprocess_image(img, model_image_size) image_size = img.size scale = (image_size[0] * 1.0 / model_image_size[0], image_size[1] * 1.0 / model_image_size[1]) feed = {input_tensors[0].name: image_data} # predict once first to bypass the model building time prediction = sess.run(None, feed) start = time.time() for i in range(loop_count): prediction = sess.run(None, feed) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) # check to handle multi-output model if isinstance(prediction, list): prediction = prediction[-1] heatmap = prediction[0] process_heatmap(heatmap, img, scale, class_names, skeleton_lines) return
def detect_image(self, image, apply_constraints=False): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' image_data = preprocess_image(image, self.model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) start = time.time() out_boxes, out_classes, out_scores = self.predict( image_data, image_shape) print('Found {} boxes for {}'.format(len(out_boxes), 'img')) end = time.time() print("Inference time: {:.8f}s".format(end - start)) start_c = 0 end_c = 0 if (apply_constraints): start_c = time.time() print('Applying constraints...') out_boxes, out_classes, out_scores = constraints.apply_constraints( self.class_names, out_boxes, out_classes, out_scores) end_c = time.time() print("Constraints time: {:.8f}s".format(end_c - start_c)) if (not os.path.exists('results')): os.mkdir('results') time_file = open('results/time.txt', 'a') time_file.write('{}\n'.format([end - start, end_c - start_c])) time_file.close() #draw bounding boxes on input image image_array = np.array(image, dtype='uint8') image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors) return Image.fromarray(image_array), [ out_boxes, out_classes, self.class_names ]
def validate_yolo_model_onnx(model, image_file, anchors, class_names, elim_grid_sense, v5_decode, loop_count, output_path): input_tensors = [] for i, input_tensor in enumerate(model.get_inputs()): input_tensors.append(input_tensor) # assume only 1 input tensor for image assert len(input_tensors) == 1, 'invalid input tensor number.' # check if input layout is NHWC or NCHW if input_tensors[0].shape[1] == 3: print("NCHW input layout") batch, channel, height, width = input_tensors[0].shape #NCHW else: print("NHWC input layout") batch, height, width, channel = input_tensors[0].shape #NHWC model_input_shape = (height, width) # prepare input image img = Image.open(image_file).convert('RGB') image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_input_shape) #origin image shape, in (height, width) format image_shape = img.size[::-1] if input_tensors[0].shape[1] == 3: # transpose image for NCHW layout image_data = image_data.transpose((0,3,1,2)) feed = {input_tensors[0].name: image_data} # predict once first to bypass the model building time prediction = model.run(None, feed) start = time.time() for i in range(loop_count): prediction = model.run(None, feed) end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_input_shape, elim_grid_sense, v5_decode, output_path)
def yolo_predict_pb(model, image, anchors, num_classes, model_image_size, conf_threshold): # NOTE: TF 1.x frozen pb graph need to specify input/output tensor name # so we hardcode the input/output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = ['graph/predict_conv_1/BiasAdd:0', 'graph/predict_conv_2/BiasAdd:0'] elif len(anchors) == 9: output_tensor_names = ['graph/predict_conv_1/BiasAdd:0', 'graph/predict_conv_2/BiasAdd:0', 'graph/predict_conv_3/BiasAdd:0'] elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction output_tensor_names = ['graph/predict_conv/BiasAdd:0'] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor_name = 'graph/image_input:0' # get input/output tensors image_input = model.get_tensor_by_name(input_tensor_name) output_tensors = [model.get_tensor_by_name(output_tensor_name) for output_tensor_name in output_tensor_names] batch, height, width, channel = image_input.shape model_image_size = (int(height), int(width)) # prepare input image image_data = preprocess_image(image, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(image.size)) with tf.Session(graph=model) as sess: prediction = sess.run(output_tensors, feed_dict={ image_input: image_data }) prediction.sort(key=lambda x: len(x[0])) if len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(prediction) == 1, 'invalid YOLOv2 prediction number.' pred_boxes, pred_classes, pred_scores = yolo2_postprocess_np(prediction[0], image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) else: pred_boxes, pred_classes, pred_scores = yolo3_postprocess_np(prediction, image_shape, anchors, num_classes, model_image_size, max_boxes=100, confidence=conf_threshold) return pred_boxes, pred_classes, pred_scores
def validate_yolo_model(model, image_file, anchors, class_names, model_input_shape, elim_grid_sense, v5_decode, loop_count, output_path): img = Image.open(image_file).convert('RGB') image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_input_shape) #origin image shape, in (height, width) format image_shape = img.size[::-1] # predict once first to bypass the model building time model.predict([image_data]) start = time.time() for i in range(loop_count): prediction = model.predict([image_data]) end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) if type(prediction) is not list: prediction = [prediction] handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_input_shape, elim_grid_sense, v5_decode, output_path) return
def validate_yolo_model_tflite(model_path, image, anchors, class_names, loop_count): interpreter = interpreter_wrapper.Interpreter(model_path=model_path) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() # check the type of the input tensor if input_details[0]['dtype'] == np.float32: floating_model = True height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] imH, imW, _ = image.shape image_shape = (imH, imW) image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) model_image_size = (height, width) image_data = preprocess_image(image_rgb, model_image_size) # predict once first to bypass the model building time interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() # start = time.time() for i in range(loop_count): interpreter.set_tensor(input_details[0]['index'], image_data) interpreter.invoke() prediction = [] for output_detail in output_details: output_data = interpreter.get_tensor(output_detail['index']) prediction.append(output_data) # end = time.time() # print("Average Inference time: {:.8f}ms".format((end - start) * 1000 / loop_count)) boxes, classes, scores = handle_prediction(prediction, image, image_shape, anchors, class_names, model_image_size) return boxes, classes, scores
def validate_deeplab_model(model_path, image_file, class_names, model_image_size, do_crf, label_file, loop_count): # load model custom_object_dict = get_custom_objects() model = load_model(model_path, compile=False, custom_objects=custom_object_dict) K.set_learning_phase(0) num_classes = model.output.shape.as_list()[-1] if class_names: # check if classes number match with model prediction assert num_classes == len( class_names), 'classes number mismatch with model.' # prepare input image img = Image.open(image_file) image_data = preprocess_image(img, model_image_size) image = image_data[0].astype('uint8') #origin image shape, in (width, height) format origin_image_size = img.size # predict once first to bypass the model building time model.predict([image_data]) # get predict output start = time.time() for i in range(loop_count): prediction = model.predict([image_data]) end = time.time() print("Average Inference time: {:.8f}ms".format( (end - start) * 1000 / loop_count)) handle_prediction(prediction, image, np.array(img), num_classes, class_names, model_image_size, origin_image_size, do_crf, label_file)
def validate_yolo_model_mnn(model_path, image_file, anchors, class_names, loop_count): interpreter = MNN.Interpreter(model_path) session = interpreter.createSession() # assume only 1 input tensor for image input_tensor = interpreter.getSessionInput(session) # get input shape input_shape = input_tensor.getShape() if input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Tensorflow: batch, height, width, channel = input_shape elif input_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: batch, channel, height, width = input_shape else: # should be MNN.Tensor_DimensionType_Caffe_C4, unsupported now raise ValueError('unsupported input tensor dimension type') model_image_size = (height, width) # prepare input image img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(img.size)) # use a temp tensor to copy data tmp_input = MNN.Tensor(input_shape, input_tensor.getDataType(),\ image_data, input_tensor.getDimensionType()) # predict once first to bypass the model building time input_tensor.copyFrom(tmp_input) interpreter.runSession(session) start = time.time() for i in range(loop_count): input_tensor.copyFrom(tmp_input) interpreter.runSession(session) end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) def get_tensor_list(output_tensors): # transform the output tensor dict to ordered tensor list, for further postprocess # # output tensor list should be like (for YOLOv3): # [ # (name, tensor) for (13, 13, 3, num_classes+5), # (name, tensor) for (26, 26, 3, num_classes+5), # (name, tensor) for (52, 52, 3, num_classes+5) # ] output_list = [] for (output_tensor_name, output_tensor) in output_tensors.items(): tensor_shape = output_tensor.getShape() dim_type = output_tensor.getDimensionType() tensor_height, tensor_width = tensor_shape[2:4] if dim_type == MNN.Tensor_DimensionType_Caffe else tensor_shape[1:3] if len(anchors) == 6: # Tiny YOLOv3 if tensor_height == height//32: output_list.insert(0, (output_tensor_name, output_tensor)) elif tensor_height == height//16: output_list.insert(1, (output_tensor_name, output_tensor)) else: raise ValueError('invalid tensor shape') elif len(anchors) == 9: # YOLOv3 if tensor_height == height//32: output_list.insert(0, (output_tensor_name, output_tensor)) elif tensor_height == height//16: output_list.insert(1, (output_tensor_name, output_tensor)) elif tensor_height == height//8: output_list.insert(2, (output_tensor_name, output_tensor)) else: raise ValueError('invalid tensor shape') elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction assert len(output_tensors) == 1, 'YOLOv2 model should have only 1 output tensor.' output_list.insert(0, (output_tensor_name, output_tensor)) else: raise ValueError('invalid anchor number') return output_list output_tensors = interpreter.getSessionOutputAll(session) output_tensor_list = get_tensor_list(output_tensors) prediction = [] for (output_tensor_name, output_tensor) in output_tensor_list: output_shape = output_tensor.getShape() output_elementsize = reduce(mul, output_shape) print('output tensor name: {}, shape: {}'.format(output_tensor_name, output_shape)) assert output_tensor.getDataType() == MNN.Halide_Type_Float # copy output tensor to host, for further postprocess tmp_output = MNN.Tensor(output_shape, output_tensor.getDataType(),\ #np.zeros(output_shape, dtype=float), output_tensor.getDimensionType()) tuple(np.zeros(output_shape, dtype=float).reshape(output_elementsize, -1)), output_tensor.getDimensionType()) output_tensor.copyToHostTensor(tmp_output) #tmp_output.printTensorData() output_data = np.array(tmp_output.getData(), dtype=float).reshape(output_shape) # our postprocess code based on TF channel last format, so if the output format # doesn't match, we need to transpose if output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe: output_data = output_data.transpose((0,2,3,1)) elif output_tensor.getDimensionType() == MNN.Tensor_DimensionType_Caffe_C4: raise ValueError('unsupported output tensor dimension type') prediction.append(output_data) prediction.sort(key=lambda x: len(x[0])) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size) return
def validate_yolo_model_pb(model_path, image_file, anchors, class_names, model_image_size, loop_count): # check tf version to be compatible with TF 2.x global tf if tf.__version__.startswith('2'): import tensorflow.compat.v1 as tf tf.disable_eager_execution() # NOTE: TF 1.x frozen pb graph need to specify input/output tensor name # so we hardcode the input/output tensor names here to get them from model if len(anchors) == 6: output_tensor_names = ['graph/predict_conv_1/BiasAdd:0', 'graph/predict_conv_2/BiasAdd:0'] elif len(anchors) == 9: output_tensor_names = ['graph/predict_conv_1/BiasAdd:0', 'graph/predict_conv_2/BiasAdd:0', 'graph/predict_conv_3/BiasAdd:0'] elif len(anchors) == 5: # YOLOv2 use 5 anchors and have only 1 prediction output_tensor_names = ['graph/predict_conv/BiasAdd:0'] else: raise ValueError('invalid anchor number') # assume only 1 input tensor for image input_tensor_name = 'graph/image_input:0' #load frozen pb graph def load_pb_graph(model_path): # We parse the graph_def file with tf.gfile.GFile(model_path, "rb") as f: graph_def = tf.GraphDef() graph_def.ParseFromString(f.read()) # We load the graph_def in the default graph with tf.Graph().as_default() as graph: tf.import_graph_def( graph_def, input_map=None, return_elements=None, name="graph", op_dict=None, producer_op_list=None ) return graph graph = load_pb_graph(model_path) # We can list operations, op.values() gives you a list of tensors it produces # op.name gives you the name. These op also include input & output node # print output like: # prefix/Placeholder/inputs_placeholder # ... # prefix/Accuracy/predictions # # NOTE: prefix/Placeholder/inputs_placeholder is only op's name. # tensor name should be like prefix/Placeholder/inputs_placeholder:0 #for op in graph.get_operations(): #print(op.name, op.values()) image_input = graph.get_tensor_by_name(input_tensor_name) output_tensors = [graph.get_tensor_by_name(output_tensor_name) for output_tensor_name in output_tensor_names] batch, height, width, channel = image_input.shape model_image_size = (int(height), int(width)) img = Image.open(image_file) image = np.array(img, dtype='uint8') image_data = preprocess_image(img, model_image_size) #origin image shape, in (height, width) format image_shape = tuple(reversed(img.size)) # predict once first to bypass the model building time with tf.Session(graph=graph) as sess: prediction = sess.run(output_tensors, feed_dict={ image_input: image_data }) start = time.time() for i in range(loop_count): with tf.Session(graph=graph) as sess: prediction = sess.run(output_tensors, feed_dict={ image_input: image_data }) end = time.time() print("Average Inference time: {:.8f}ms".format((end - start) * 1000 /loop_count)) prediction.sort(key=lambda x: len(x[0])) handle_prediction(prediction, image_file, image, image_shape, anchors, class_names, model_image_size)
def detect_image(self, image): if self.model_image_size != (None, None): assert self.model_image_size[ 0] % 32 == 0, 'Multiples of 32 required' assert self.model_image_size[ 1] % 32 == 0, 'Multiples of 32 required' image = Image.open(image) image_data = preprocess_image(image, self.model_image_size) image_shape = image.size self.interpreter.set_tensor(self.input_details[0]['index'], image_data) start = time.time() self.interpreter.invoke() output_data = [ self.interpreter.get_tensor(self.output_details[2]['index']), self.interpreter.get_tensor(self.output_details[0]['index']), self.interpreter.get_tensor(self.output_details[1]['index']) ] out_boxes, out_classes, out_scores = yolo3_postprocess_np( output_data, image_shape, self.anchors, len(self.class_names), self.model_image_size, max_boxes=20, confidence=0.35) self.log.info('Found {} boxes for {}'.format(len(out_boxes), 'img')) end = time.time() self.log.info("Inference time: {:.8f}s".format(end - start)) if out_classes is None or len(out_classes) == 0: self.log.warning("No boxes found!") return image_data, None, None order = out_boxes[:, 0].argsort() out_boxes = out_boxes[order] out_classes = out_classes[order] out_scores = out_scores[order] yref_top = min(out_boxes[:, 1]) yref_top_idx = np.argmin(out_boxes[:, 1]) r_number, r_screen = ([] for i in range(2)) for idx, box in enumerate(out_boxes): _, ymin, _, ymax = box if ymin < 195: r_screen.append(out_classes[idx]) else: r_number.append(out_classes[idx]) #r_number = int("".join("{0}".format(n) for n in r_number)) r_number = "".join(str(n) for n in r_number) r_screen = "".join(str(n) for n in r_screen) #draw result on input image image_array = np.array(image, dtype='uint8') image_array = draw_boxes(image_array, out_boxes, out_classes, out_scores, self.class_names, self.colors) return Image.fromarray(image_array), r_number, r_screen