def testFunctionalSequentialModel(self): """Test a Functional tf.keras model containing a Sequential model.""" with session.Session().as_default(): model = keras.models.Sequential() model.add(keras.layers.Dense(2, input_shape=(3,))) model.add(keras.layers.RepeatVector(3)) model.add(keras.layers.TimeDistributed(keras.layers.Dense(3))) model = keras.models.Model(model.input, model.output) model.compile( loss=keras.losses.MSE, optimizer=keras.optimizers.RMSprop(), metrics=[keras.metrics.categorical_accuracy], sample_weight_mode='temporal') x = np.random.random((1, 3)) y = np.random.random((1, 3, 3)) model.train_on_batch(x, y) model.predict(x) model.predict(x) fd, keras_file = tempfile.mkstemp('.h5') try: keras.models.save_model(model, keras_file) finally: os.close(fd) # Convert to TFLite model. converter = lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() self.assertTrue(tflite_model) # Check tensor details of converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(1, len(input_details)) self.assertEqual('dense_input', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) self.assertTrue(([1, 3] == input_details[0]['shape']).all()) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('time_distributed/Reshape_1', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) self.assertTrue(([1, 3, 3] == output_details[0]['shape']).all()) self.assertEqual((0., 0.), output_details[0]['quantization']) # Check inference of converted model. input_data = np.array([[1, 2, 3]], dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() tflite_result = interpreter.get_tensor(output_details[0]['index']) keras_model = keras.models.load_model(keras_file) keras_result = keras_model.predict(input_data) np.testing.assert_almost_equal(tflite_result, keras_result, 5) os.remove(keras_file)
def _evaluateTFLiteModel(self, tflite_model, input_data): """Evaluates the model on the `input_data`.""" interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() for input_tensor, tensor_data in zip(input_details, input_data): interpreter.set_tensor(input_tensor['index'], tensor_data.numpy()) interpreter.invoke() return interpreter.get_tensor(output_details[0]['index'])
def testSequentialModel(self): """Test a Sequential tf.keras model with default inputs.""" keras_file = self._getSequentialModel() converter = lite.TFLiteConverter.from_keras_model_file(keras_file) tflite_model = converter.convert() self.assertTrue(tflite_model) # Check tensor details of converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(1, len(input_details)) self.assertEqual('dense_input', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) self.assertTrue(([1, 3] == input_details[0]['shape']).all()) self.assertEqual((0., 0.), input_details[0]['quantization']) output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('time_distributed/Reshape_1', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) self.assertTrue(([1, 3, 3] == output_details[0]['shape']).all()) self.assertEqual((0., 0.), output_details[0]['quantization']) # Check inference of converted model. input_data = np.array([[1, 2, 3]], dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() tflite_result = interpreter.get_tensor(output_details[0]['index']) keras_model = keras.models.load_model(keras_file) keras_result = keras_model.predict(input_data) np.testing.assert_almost_equal(tflite_result, keras_result, 5) os.remove(keras_file)
def testScalarValid(self): # Construct a graph using a scalar (empty shape) input. in_tensor = array_ops.placeholder(dtype=dtypes.float32, shape=[]) out_tensor = in_tensor + in_tensor sess = session.Session() # Test conversion with the scalar input shape. converter = lite.TFLiteConverter.from_session(sess, [in_tensor], [out_tensor]) tflite_model = mlir_convert_and_check_for_unsupported(self, converter) if tflite_model is None: return # Check values from converted model. interpreter = Interpreter(model_content=tflite_model) interpreter.allocate_tensors() input_details = interpreter.get_input_details() self.assertEqual(1, len(input_details)) self.assertEqual('Placeholder', input_details[0]['name']) self.assertEqual(np.float32, input_details[0]['dtype']) self.assertEqual(len(input_details[0]['shape']), 0) output_details = interpreter.get_output_details() self.assertEqual(1, len(output_details)) self.assertEqual('add', output_details[0]['name']) self.assertEqual(np.float32, output_details[0]['dtype']) self.assertEqual(len(output_details[0]['shape']), 0) # Validate inference using the scalar inputs/outputs. test_input = np.array(4.0, dtype=np.float32) expected_output = np.array(8.0, dtype=np.float32) interpreter.set_tensor(input_details[0]['index'], test_input) interpreter.invoke() output_data = interpreter.get_tensor(output_details[0]['index']) self.assertTrue((expected_output == output_data).all())
ret = video.set(4,imH) while(True): # Acquire frame and resize to expected shape [1xHxWx3] ret, frame = video.read() frame_resized = cv2.resize(frame, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'],input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor(output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor(output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1,(boxes[i][0] * imH))) xmin = int(max(1,(boxes[i][1] * imW)))
class Gesture: def __init__(self, width=None, height=None, pnet=None): pygame.init() self.clock = pygame.time.Clock() if width and height: self.WIDTH = width self.HEIGHT = height self.window = pygame.display.set_mode((self.WIDTH, self.HEIGHT)) else: info_object = pygame.display.Info() self.WIDTH = info_object.current_w self.HEIGHT = info_object.current_h flags = pygame.FULLSCREEN | pygame.DOUBLEBUF | pygame.HWSURFACE self.window = pygame.display.set_mode(flags=flags) pygame.display.set_caption('Posenet Hands') if pnet: self.pnet = pnet else: self.pnet = posenet_interface.posenetInterface(257) self.myfont = pygame.font.SysFont("Comic Sans MS", 40) self.image = None self.interpreter = Interpreter(model_path='hand_landmark.tflite') self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() def run(self): while 1: self.draw() for event in pygame.event.get(): if event.type == pygame.QUIT: pygame.quit() raise SystemExit elif event.type == pygame.KEYDOWN: if event.key == pygame.K_ESCAPE: pygame.quit() return pygame.display.update() self.clock.tick(60) def blit_cam_frame(self, frame, screen): # frame = np.fliplr(frame) frame = np.rot90(frame) frame = pygame.surfarray.make_surface(frame) screen.blit(pygame.transform.scale(frame, (self.WIDTH, self.HEIGHT)), (0, 0)) def scale_keypoints(self, original_shape, keypoints): scale = np.array([self.HEIGHT / original_shape[0], self.WIDTH / original_shape[1]]) return keypoints * scale def get_adjacent_keypoints(self, keypoints): results = [] for left, right in CONNECTED_POINTS: results.append( np.array([keypoints[left].pt, keypoints[right].pt]).astype(np.int32), ) return results def hand_landmark(self, input_image, right_hand): target_width = target_height = 256 input_image = input_image[0] scale_x = self.image.shape[1] / 256 scale_y = self.image.shape[0] / 256 shift_x = 0 shift_y = 0 # If hand keypoint is available if right_hand[0] != 0.0 or right_hand[1] != 0.0: right_hand = np.int0(right_hand) # Take square region near hand keypoint which is located at wrist square_size = 150 square_size_half = square_size // 2 shift_x = max(right_hand[1] - square_size_half, 0) shift_y = max(right_hand[0] - square_size, 0) input_image = self.image[shift_y:right_hand[0], shift_x:right_hand[1] + square_size_half] scale_x = input_image.shape[1] / 256 scale_y = input_image.shape[0] / 256 # TODO Find hand and rotate so hand is always upright # input_image = find_hand(new_img) if input_image is None: return # cv2.imshow('hand', input_image) input_image = input_image * (2.0 / 255.0) - 1.0 input_img = cv2.resize(input_image, (target_width, target_height), interpolation=cv2.INTER_LINEAR).astype( np.float32) input_img = np.expand_dims(input_img, 0) self.interpreter.set_tensor(self.input_details[0]['index'], input_img) start = time.time() self.interpreter.invoke() print('infer time:', time.time() - start) hand_points = self.interpreter.get_tensor(self.output_details[0]['index'])[0] hand_confidence = self.interpreter.get_tensor(self.output_details[1]['index']) if hand_confidence[0] < 0.1: return norm_hand_points = [] for i in range(0, len(hand_points), 2): x = (hand_points[i] * scale_x) + shift_x y = (hand_points[i + 1] * scale_y) + shift_y # cv2.putText(self.image, str(idx), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 0.5, # (255, 0, 0)) norm_hand_points.append(cv2.KeyPoint(x, y, 10)) return norm_hand_points def draw(self): self.image, keypoints, input_image = self.pnet.get_image(return_input_img=True) norm_hand_points = self.hand_landmark(input_image, keypoints[0][10]) if norm_hand_points is not None: self.image = cv2.drawKeypoints( self.image, norm_hand_points, outImage=np.array([]), color=(0, 255, 255), flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) adjacent_keypoints = self.get_adjacent_keypoints(norm_hand_points) self.image = cv2.polylines(self.image, adjacent_keypoints, isClosed=False, color=(0, 255, 255), thickness=2) self.blit_cam_frame(self.image, self.window) frames = self.myfont.render(str(int(self.clock.get_fps())) + " FPS", True, pygame.Color('green')) self.window.blit(frames, (self.WIDTH - 100, 20))
with open("../example.png", "rb") as image_file: encoded_string = b64encode(image_file.read()) image_binary = pil_open(BytesIO(b64decode(encoded_string))) image_binary = image_binary.resize([96, 96]) full_image_np = asarray(image_binary, dtype='float32')/255. arr = [] BATCH_SIZE = 1 arr = [] for i in range(BATCH_SIZE): arr.append(full_image_np) full_image_np = asarray(arr, dtype='float32') start_time = int(round(time.time() * 1000)) for i in range(16384//BATCH_SIZE): INTERPRETER.set_tensor(INPUT_DETAILS[0]['index'], full_image_np) INTERPRETER.invoke() output_data = [] for od in OUTPUT_DETAILS: for tensor in INTERPRETER.get_tensor(od['index']).tolist(): output_data.append(tensor) print(f'{(int(round(time.time() * 1000))-start_time)/1000.0} seconds')
def TFlite(frameCount): # grab global references to the output frame, and # lock variables global outputFrame, lock, _reset ct = CentroidTracker() #Funtion Select cam_select, cam_rtsp, min_conf_threshold, notify, detect_list, resW, resH = select_option( ) # warmup if cam_select == 0: vs = cv2.VideoCapture(0) else: vs = cv2.VideoCapture(cam_rtsp) imW, imH = int(resW), int(resH) vs.set(3, imW) vs.set(4, imH) time.sleep(2.0) ################## args = argsparser() MODEL_NAME = args.modeldir GRAPH_NAME = args.graph LABELMAP_NAME = args.labelsTF ####################### # min_conf_threshold = args.threshold ####################### CWD_PATH = os.getcwd() # Path to .tflite file, which contains the model that is used for object detection PATH_TO_CKPT = os.path.join(CWD_PATH, MODEL_NAME, GRAPH_NAME) # Path to label map file PATH_TO_LABELS = os.path.join(CWD_PATH, MODEL_NAME, LABELMAP_NAME) # Load the label map with open(PATH_TO_LABELS, 'r') as f: labels = [line.strip() for line in f.readlines()] if labels[0] == '???': del (labels[0]) # Load the Tensorflow Lite model and get details interpreter = Interpreter(model_path=PATH_TO_CKPT) interpreter.allocate_tensors() input_details = interpreter.get_input_details() output_details = interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() font = cv2.FONT_HERSHEY_SIMPLEX ########################################## t1_image = time.time() t2_image = time.time() objects_first = 0 ########################################## log.info("Detect On") while True: t1 = cv2.getTickCount() ret, frame_q = vs.read() frame_resized = cv2.resize(frame_q, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std # Perform the actual detection by running the model with the image as input interpreter.set_tensor(input_details[0]['index'], input_data) interpreter.invoke() # Retrieve detection results boxes = interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = interpreter.get_tensor( output_details[1]['index'])[0] # Class index of detected objects scores = interpreter.get_tensor( output_details[2]['index'])[0] # Confidence of detected objects #num = interpreter.get_tensor(output_details[3]['index'])[0] # Total number of detected objects (inaccurate and not needed) ckname = '' object_name = None rects = [] # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Draw label object_name = labels[int(classes[i])] if object_name in detect_list: ckname = ckname + object_name + ',' ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) cv2.rectangle(frame_q, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) label = '%s: %d%%' % (object_name, int(scores[i] * 100)) #print(label) labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) label_ymin = max(ymin, labelSize[1] + 10) color = color_name(object_name) cv2.rectangle( frame_q, (xmin, label_ymin - labelSize[1] - 10), (xmin + labelSize[0] + 45, label_ymin + baseLine - 10), color, cv2.FILLED) # Draw white box to put label text in cv2.putText(frame_q, label, (xmin + 45, label_ymin - 7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) x = np.array([xmin, ymin, xmax, ymax]) # print(x) rects.append(x.astype("int")) cv2.putText(frame_q, "FPS: {0:.2f}".format(frame_rate_calc), (30, 50), font, 1, (255, 255, 0), 2, cv2.LINE_AA) #Funtion display_show t1_image = time.time() frame_q, objects_first, t2_image = display_show( rects, t1_image, t2_image, ckname, frame_q, objects_first, notify, ct) with lock: outputFrame = frame_q.copy() if _reset == 1: sys.exit() t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 vs.stop()
class PeopleDetector(Thread, FrameHandler): """ An interface describing an object that can handle the result of the people detection process """ __pkg: object __labels: list __interpreter: Interpreter __height: float __width: float __alive: bool __head: HeadController __detection_handler: Optional[PeopleDetectionHandler] __tracker: CentroidTracker __led_controller: LedController __detection_state: DetectionState # Keep a reference ot the state def __init__(self): super().__init__() self.__alive = True self.__pkg = importlib.util.find_spec("tflite_runtime") with open(PATH_TO_LABELS, "r") as f: self.__labels = [line.strip() for line in f.readlines()] if self.__labels[0] == '???': del (self.__labels[0]) # Then load tensorflow lite model self.__interpreter = Interpreter(model_path=PATH_TO_CKPT) self.__interpreter.allocate_tensors() self.__head = HeadController() self.__head.start() self.__tracker = CentroidTracker() self.__detection_handler = None self.__led_controller = LedController() self.__detection_state = PersonNotPresentState(self.__detection_handler) def run(self) -> None: super().run() input_details = self.__interpreter.get_input_details() output_details = self.__interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 counter = {} gone_left = False while self.__alive: ret, image = self.get_next_frame() if ret and image is not None: # image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) imH, imW, _ = image.shape image_resized = cv2.resize(image, (width, height)) input_data = np.expand_dims(image_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std self.__interpreter.set_tensor(input_details[0]['index'], input_data) self.__interpreter.invoke() boxes = self.__interpreter.get_tensor(output_details[0]['index'])[0] classes = self.__interpreter.get_tensor(output_details[1]['index'])[0] scores = self.__interpreter.get_tensor(output_details[2]['index'])[0] frame = image frame_w = frame.shape[1] frame_h = frame.shape[0] r = [] track_id = 0 for i in range(len(scores)): object_name = self.__labels[int(classes[i])] if (object_name == "person" and scores[i] > min_conf_threshold) and (scores[i] <= 1.0): rects = [] ymin = int(max(1, (boxes[i][0] * imH))) rects.append(ymin) xmin = int(max(1, (boxes[i][1] * imW))) rects.append(xmin) ymax = int(min(imH, (boxes[i][2] * imH))) rects.append(ymax) xmax = int(min(imW, (boxes[i][3] * imW))) rects.append(xmax) rects = [xmin, ymin, xmax, ymax] val = np.array(rects) r.append(val.astype("int")) # ---------------------------------Head random rotation, rotate to 90 if any people detected---------------------------------# left = randint(0, 45) right = randint(135, 180) if len(r) > 0: self.__head.rotate(90) else: self.__head.rotate(right if gone_left else left) # Here we tell the detection state that no person is present self.__detection_state = self.__detection_state.on_detection_result(False) counter = {} # And then toggle the rotation gone_left = not gone_left # --------------------------------- Choose an ID, Check if present for atleast 10 frames ---------------------------------# objects = self.__tracker.update(r) flag = 0 next_id = 0 i = 0 new_coord = [] next_coord = [] coord = [] for (objectID, centroid) in objects.items(): if objectID == track_id: flag = 1 new_coord = centroid if i == 0: next_id = objectID next_coord = centroid i += 1 if objectID in counter: counter[objectID] += 1 else: counter[objectID] = 0 if len(objects.items()) > 0: if flag == 0: track_id = next_id coord = next_coord else: coord = new_coord # --------------------------------- Control LED till 10 frames ---------------------------------# if len(coord) > 0: # If a person exists compute the index of the led to turn on x_pos = coord[0] led_index = math.floor((x_pos * 3.0) / frame_w) animation: LedAnimation = LedAnimation.ANIM_EYE_0 if led_index == 0: animation = LedAnimation.ANIM_EYE_2 if led_index == 1: animation = LedAnimation.ANIM_EYE_1 if led_index == 2: animation = LedAnimation.ANIM_EYE_0 # Ask the led manager to play the animation #self.__led_controller.play_animation(animation) # --------------------------------- Set the handler as true if a person presemt for more than 20 frames ---------------------------------# if counter[track_id] > 20: self.__detection_state = self.__detection_state.on_detection_result(True) cv2.imshow("Detector", frame) if cv2.waitKey(1) == ord('q'): break def stop(self) -> None: self.__alive = False def set_detection_handler(self, detection_handler: PeopleDetectionHandler): self.__detection_handler = detection_handler self.__detection_state.set_detection_handler(detection_handler)
class CNNClassifier(object): def __init__(self, model_file, label_file): logger.info(model_file) self._interpreter = Interpreter(model_path=model_file) self._interpreter.set_num_threads(4) self._interpreter.allocate_tensors() self._labels = self.load_labels(label_file) self._input_details = self._interpreter.get_input_details() self._output_details = self._interpreter.get_output_details() self._input_height = self._input_details[0]['shape'][1] self._input_width = self._input_details[0]['shape'][2] self._floating_model = (self._input_details[0]['dtype'] == np.float32) def close(self): pass def read_tensor_from_image_file(self, file_name): image = cv2.imread(file_name) return self.read_tensor_from_image_mat(image) def read_tensor_from_image_mat(self, image_mat): frame_rgb = cv2.cvtColor(image_mat, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self._input_width, self._input_height)) input_data = np.expand_dims(frame_resized, axis=0) # Normalize pixel values if using a floating model (i.e. if model is non-quantized) if self._floating_model: input_mean = 127.5 input_std = 127.5 input_data = (np.float32(input_data) - input_mean) / input_std return input_data def load_labels(self, label_file): labels = [] with open(label_file, 'r') as f: labels = [line.strip() for line in f.readlines()] return labels def classify_image(self, image_file_or_mat, top_results=3): input_image = None if isinstance(image_file_or_mat, str): input_image = self.read_tensor_from_image_file( file_name=image_file_or_mat) else: input_image = self.read_tensor_from_image_mat(image_file_or_mat) self._interpreter.set_tensor(self._input_details[0]['index'], input_image) self._interpreter.invoke() scores = self._interpreter.get_tensor( self._output_details[0]['index'])[0] #print("scores: " + str(scores)) confidence = 0.4 base = 1 # normalize to int8 for quantized models if len(scores) > 0 and (scores[0] == int(scores[0])): confidence = 128 base = 256 pairs = [] for i in range(0, len(scores)): if scores[i] > confidence: object_name = self._labels[i] pairs.append((object_name, int(100 * scores[i] / base))) pairs = sorted(pairs, key=lambda x: x[1], reverse=True)[:top_results] return pairs def detect_objects(self, image_file_or_mat, top_results=3): input_image = None if isinstance(image_file_or_mat, str): input_image = self.read_tensor_from_image_file( file_name=image_file_or_mat) else: input_image = self.read_tensor_from_image_mat(image_file_or_mat) self._interpreter.set_tensor(self._input_details[0]['index'], input_image) self._interpreter.invoke() # Retrieve detection results boxes = self._interpreter.get_tensor(self._output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self._interpreter.get_tensor( self._output_details[1]['index'])[ 0] # Class index of detected objects scores = self._interpreter.get_tensor( self._output_details[2]['index'])[ 0] # Confidence of detected objects # Loop over all detections and draw detection box if confidence is above minimum threshold min_conf_threshold = 0.1 imH = 100 imW = 100 pairs = [] for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): # Get bounding box coordinates and draw box # Interpreter can return coordinates that are outside of image dimensions, need to force them to be within image using max() and min() ymin = int(max(1, (boxes[i][0] * imH))) xmin = int(max(1, (boxes[i][1] * imW))) ymax = int(min(imH, (boxes[i][2] * imH))) xmax = int(min(imW, (boxes[i][3] * imW))) object_name = self._labels[int(classes[i]) + 1] pairs.append((object_name, int(100 * scores[i]), (xmin, ymin, xmax, ymax))) pairs = sorted(pairs, key=lambda x: x[1], reverse=True)[:top_results] logger.info(str(pairs)) return pairs
class PersonDetector(object): def __init__(self): self.car_boxes = [] os.chdir(cwd) #Tensorflow localization/detection model # Single-shot-dectection with mobile net architecture trained on COCO # dataset # detect_model_name = 'ssd_mobilenet_v1_coco_2017_11_17' # PATH_TO_CKPT = detect_model_name + '/frozen_inference_graph.pb' # detect_model_name = 'mobilenet_ssd_v2_coco_quant_postprocess_edgetpu' # PATH_TO_CKPT = detect_model_name + '/tflite_graph.pb' # # setup tensorflow graph # self.detection_graph = tf.Graph() # # configuration for possible GPU use # config = tf.ConfigProto() # config.gpu_options.allow_growth = True # # load frozen tensorflow detection model and initialize # # the tensorflow graph # with self.detection_graph.as_default(): # od_graph_def = tf.GraphDef() # with tf.gfile.GFile(PATH_TO_CKPT, 'rb') as fid: # serialized_graph = fid.read() # od_graph_def.ParseFromString(serialized_graph) # tf.import_graph_def(od_graph_def, name='') # self.sess = tf.Session(graph=self.detection_graph, config=config) # self.image_tensor = self.detection_graph.get_tensor_by_name('image_tensor:0') # # Each box represents a part of the image where a particular object was detected. # self.boxes = self.detection_graph.get_tensor_by_name('detection_boxes:0') # # Each score represent how level of confidence for each of the objects. # # Score is shown on the result image, together with the class label. # self.scores =self.detection_graph.get_tensor_by_name('detection_scores:0') # self.classes = self.detection_graph.get_tensor_by_name('detection_classes:0') # self.num_detections =self.detection_graph.get_tensor_by_name('num_detections:0') ## tflite detect_model_name = 'ssd_mobilenet_v2_coco_quantized_tflite' PATH_TO_CKPT = detect_model_name + '/detect.tflite' # Define lite graph and Load Tensorflow Lite model into memory self.interpreter = Interpreter(model_path=PATH_TO_CKPT) self.interpreter.allocate_tensors() self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() # Helper function to convert image into numpy array def load_image_into_numpy_array(self, image): (im_width, im_height) = image.size return np.array(image.getdata()).reshape( (im_height, im_width, 3)).astype(np.uint8) # Helper function to convert normalized box coordinates to pixels def box_normal_to_pixel(self, box, dim): height, width = dim[0], dim[1] box_pixel = [ int(box[0] * height), int(box[1] * width), int(box[2] * height), int(box[3] * width) ] return np.array(box_pixel) def get_localization(self, image, visual=False, threshold=0.3): category_index = { 1: { 'id': 1, 'name': u'person' }, 2: { 'id': 2, 'name': u'bicycle' }, 3: { 'id': 3, 'name': u'car' }, 4: { 'id': 4, 'name': u'motorcycle' }, 5: { 'id': 5, 'name': u'airplane' }, 6: { 'id': 6, 'name': u'bus' }, 7: { 'id': 7, 'name': u'train' }, 8: { 'id': 8, 'name': u'truck' }, 9: { 'id': 9, 'name': u'boat' }, 10: { 'id': 10, 'name': u'traffic light' }, 11: { 'id': 11, 'name': u'fire hydrant' }, 13: { 'id': 13, 'name': u'stop sign' }, 14: { 'id': 14, 'name': u'parking meter' } } # Resize and normalize image for network input frame = cv2.resize(image, (300, 300)) frame = np.expand_dims(frame, axis=0) frame = (2.0 / 255.0) * frame - 1.0 frame = frame.astype('float32') # run model self.interpreter.set_tensor(self.input_details[0]['index'], frame) self.interpreter.invoke() # get results boxes = self.interpreter.get_tensor(self.output_details[0]['index']) classes = self.interpreter.get_tensor(self.output_details[1]['index']) scores = self.interpreter.get_tensor(self.output_details[2]['index']) num = self.interpreter.get_tensor(self.output_details[3]['index']) boxes = np.squeeze(boxes) classes = np.squeeze(classes) scores = np.squeeze(scores) cls = classes.tolist() # The ID for person is 1 idx_vec = [ i for i, v in enumerate(cls) if ((v == 0) and (scores[i] > threshold)) ] filtered_scores = [ i for i, v in enumerate(scores.tolist()) if i in idx_vec ] if len(idx_vec) == 0: print('no detection!') else: tmp_car_boxes = [] for idx in idx_vec: dim = image.shape[0:2] box = self.box_normal_to_pixel(boxes[idx], dim) box_h = box[2] - box[ 0] # that means box[2] is y1, box[0] is y2 box_w = box[3] - box[ 1] # that means box[3] is x2, box[1] is x1 ratio = box_h / (box_w + 0.01) #if ((ratio < 0.8) and (box_h>20) and (box_w>20)): tmp_car_boxes.append(box) logging.info('{} , confidence: {} ratio: {}'.format( box, scores[idx], ratio)) ''' else: print('wrong ratio or wrong size, ', box, ', confidence: ', scores[idx], 'ratio:', ratio) ''' # non-maximum suppression - cluster overlapping boxes first, then in each cluster, pick the box with highest confidence # to cluster overlapping boxes, loop through the list of boxes and create cluster if the pair is not found together yet. box_clusters = [] indices_to_remove = [] for i in range(len(tmp_car_boxes)): for j in range(len(tmp_car_boxes)): if (i == j): continue else: tmp_box_one = tmp_car_boxes[i] tmp_box_two = tmp_car_boxes[j] # tmp_box_one = get_iou_adapter(tmp_car_boxes[i]) # tmp_box_two = get_iou_adapter(tmp_car_boxes[j]) iou = get_iou(tmp_box_one, tmp_box_two) # print(iou) if (iou > NON_MAXIMUM_SUPPRESSION_THRESHOLD): tmp_box_one_confidence = filtered_scores[i] tmp_box_two_confidence = filtered_scores[j] if (tmp_box_one_confidence > tmp_box_two_confidence): indices_to_remove.append(j) else: indices_to_remove.append(i) indices_to_remove = set(indices_to_remove) # remove duplicates filtered_boxes = [ tmp_car_boxes[i] for i in range(len(tmp_car_boxes)) if i not in indices_to_remove ] # both_found_in_box_clusters = False # one_found_in_box_cluster = False # for cluster in box_clusters: # if tmp_box_one in cluster and tmp_box_two in cluster: # both_found_in_box_clusters = True # break # elif tmp_box_one in cluster or tmp_box_two in cluster: # one_found_in_box_cluster = True # if (not both_found_in_box_clusters): # box_clusters.append(tmp_box_one, tmp_box_two) # for if (len(filtered_boxes) != len(tmp_car_boxes)): print('Non-maximum suppression removed {} detections'.format( len(tmp_car_boxes) - len(filtered_boxes))) logging.info( 'Non-maximum suppression removed {} detections'.format( len(tmp_car_boxes) - len(filtered_boxes))) self.car_boxes = filtered_boxes return self.car_boxes
t1=time() ret, frame=cap.read() # reading frames from cam # Image optimisation #h,w=frame.shape[:2] height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] frameo=cv2.resize(frame,(height,width)) #resizing frame according to model prescribed size if floating_model: input_data = (np.float32(input_data)/218) -1 else: input_data=np.expand_dims(frameo,axis=0) # Expand the shape of array interpreter.set_tensor(input_details[0]['index'], input_data) #passing data to model interpreter.invoke() #making predictions boundbox = interpreter.get_tensor(output_details[0]['index']) #getting output obj_class = interpreter.get_tensor(output_details[1]['index']) #getting outout score = interpreter.get_tensor(output_details[2]['index']) #getting output num = interpreter.get_tensor(output_details[3]['index']) #Always equals to 10 for i in range(int(num)): top, left, bottom, right = boundbox[0][i] #getting the postion of detected object classId = int(obj_class[0][i]) #getting class of object scores = score[0][i] #getting predction score of that object if scores > 0.5: #if score > 50% x1 =int( left * width) y1 =int( bottom * height)
class Human_Detector: def __init__(self): # Define the threshold of the classifier self.min_threshold = 0.65 # Configure image width and height self.imW, self.imH = 300, 300 # Load the label map from the directory label_file = 'models/tensorflow_models/labelmap.txt' self.label_list = [] with open(label_file, 'r') as label_map: for i in label_map.read().split("\n"): self.label_list.append(i) # Load the tensorflow models self.interpreter = Interpreter( model_path='models/tensorflow_models/detect.tflite') self.interpreter.allocate_tensors() # Get model details self.input_details = self.interpreter.get_input_details() self.output_details = self.interpreter.get_output_details() self.height = self.input_details[0]['shape'][1] self.width = self.input_details[0]['shape'][2] # Human detection method def detect(self, frame): # Tensorflow model frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (self.width, self.height)) input_data = np.expand_dims(frame_resized, axis=0) # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() # get the detection results classes = self.interpreter.get_tensor(self.output_details[1]['index'])[ 0] # get the index of detect object scores = self.interpreter.get_tensor(self.output_details[2]['index'])[ 0] # the score of this classification # Perform the actual detection by running the model with the image as input self.interpreter.set_tensor(self.input_details[0]['index'], input_data) self.interpreter.invoke() human_count = 0 # Tensorflow motion detection # Loop over all detections and draw detection box if confidence is above minimum threshold for i in range(len(scores)): if (scores[i] > self.min_threshold) and (scores[i] <= 1.0): object_name = self.label_list[int( classes[i] )] # Look up object name from "labels" array using class index # Define the model name label = '%s: %d%%' % (object_name, int(scores[i] * 100)) # If the objectt equals human, the count + 1 if object_name == 'person': human_count += 1 rectangle = self.interpreter.get_tensor( self.output_details[0]['index'])[ 0] # get the box around this obejct # Define the position of the box ymin = int(max(1, (rectangle[i][0] * self.imH))) xmin = int(max(1, (rectangle[i][1] * self.imW))) ymax = int(min(self.imH, (rectangle[i][2] * self.imH))) xmax = int(min(self.imW, (rectangle[i][3] * self.imW))) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 1) labelSize, baseLine = cv2.getTextSize( label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) label_ymin = max( ymin, labelSize[1] + 10 ) # Make sure not to draw label too close to top of window # Draw label on the frame cv2.putText(frame, label, (xmin, label_ymin - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.4, (0, 255, 0), 1) # Draw label text return frame, human_count
class PeopleDetector(): __interpreter: Interpreter __labels: list __alive: bool def __init__(self): self.__alive = True with open(PATH_TO_LABELS, 'r') as f: self.__labels = [line.strip() for line in f.readlines()] if self.__labels[0] == '???': del (self.__labels[0]) self.__interpreter = Interpreter(model_path=PATH_TO_CKPT) self.__interpreter.allocate_tensors() def run(self) -> None: input_details = self.__interpreter.get_input_details() output_details = self.__interpreter.get_output_details() height = input_details[0]['shape'][1] width = input_details[0]['shape'][2] floating_model = (input_details[0]['dtype'] == np.float32) input_mean = 127.5 input_std = 127.5 # Initialize frame rate calculation frame_rate_calc = 1 freq = cv2.getTickFrequency() # Initialize video stream videostream = VideoStream(resolution=(imW, imH), framerate=30).start() ct = CentroidTracker() # h = Head() time.sleep(1) frame_count = 0 while (self.__alive): t1 = cv2.getTickCount() frame1 = videostream.read() frame_count += 1 frame = frame1.copy() frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) frame_resized = cv2.resize(frame_rgb, (width, height)) input_data = np.expand_dims(frame_resized, axis=0) if floating_model: input_data = (np.float32(input_data) - input_mean) / input_std self.__interpreter.set_tensor(input_details[0]['index'], input_data) self.__interpreter.invoke() boxes = self.__interpreter.get_tensor(output_details[0]['index'])[ 0] # Bounding box coordinates of detected objects classes = self.__interpreter.get_tensor( output_details[1]['index'])[ 0] # Class index of detected objects scores = self.__interpreter.get_tensor(output_details[2]['index'])[ 0] # Confidence of detected objects rects = [] r = [] val = [] track_id = 0 max_size = 0 for i in range(len(scores)): if ((scores[i] > min_conf_threshold) and (scores[i] <= 1.0)): object_name = self.__labels[int(classes[i])] if object_name == "person": rects = [] ymin = int(max(1, (boxes[i][0] * imH))) rects.append(ymin) xmin = int(max(1, (boxes[i][1] * imW))) rects.append(xmin) ymax = int(min(imH, (boxes[i][2] * imH))) rects.append(ymax) xmax = int(min(imW, (boxes[i][3] * imW))) rects.append(xmax) rects = [xmin, ymin, xmax, ymax] val = np.array(rects) r.append(val.astype("int")) cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (10, 255, 0), 2) #Draw label object_name = self.__labels[int( classes[i] )] # Look up object name from "labels" array using class index xmid = xmin + ((xmax - xmin) / 2) p = 640 - xmid #angle = h.find_angle(p * (1/64)) #rot = h.rotate(angle) #label = '%s: %d - %d%%' % (object_name, xmin,xmax) # Example: 'person: 72%' #labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.7, 2) # Get font size #label_ymin = max(ymin, labelSize[1] + 10) # Make sure not to draw label too close to top of window #cv2.rectangle(frame, (xmin, label_ymin-labelSize[1]-10), (xmin+labelSize[0], label_ymin+baseLine-10), (255, 255, 255), cv2.FILLED) # Draw white box to put label text in #cv2.putText(frame, label, (xmin, label_ymin-7), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 0), 2) # Draw label text # Draw framerate in corner of frame objects = ct.update(r) #frame_size = output[1] #print(output) print(objects) cv2.putText(frame, 'FPS: {0:.2f}'.format(frame_rate_calc), (30, 50), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), 2, cv2.LINE_AA) #All the results have been drawn on the frame, so it's time to display it. flag = 0 next_id = 0 i = 0 new_coord = [] next_coord = [] coord = [] for (objectID, centroid) in objects.items(): if (objectID == track_id): flag = 1 new_coord = centroid if (i == 0): next_id = objectID next_coord = centroid i += 1 text = "ID {}".format(objectID) cv2.putText(frame, text, (centroid[0] - 10, centroid[1] - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2) cv2.circle(frame, (centroid[0], centroid[1]), 4, (0, 255, 0), -1) if (flag == 0): track_id = next_id coord = next_coord else: coord = new_coord # head rotation cv2.imshow('Object detector', frame) # Calculate framerate t2 = cv2.getTickCount() time1 = (t2 - t1) / freq frame_rate_calc = 1 / time1 # Press 'q' to quit if cv2.waitKey(1) == ord('q'): break def stop(self) -> None: # Clean up cv2.destroyAllWindows() videostream.stop() self.__alive = False