def video_processor(model, version , vidpath, device="/CPU:0"): img_array = [] i = 0 t = 0 start = time.time() tick = 0 e, f, a, b, c, d = 0, 0, 0, 0, 0, 0 if isinstance(model, str): with tf.device(device): model = build_model(name=model, model_version=version) model.make_predict_function() if hasattr(model, "predict"): predfunc = model.predict print("using pred function") else: predfunc = model print("using call function") colors = gen_colors(80) label_names = get_coco_names( path="yolo/dataloaders/dataset_specs/coco.names") print(label_names) # output_writer = cv2.VideoWriter('yolo_output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), frame_count, (480, 640)) # change output file name if needed pred = None cap = cv2.VideoCapture(vidpath) assert cap.isOpened() width = int(cap.get(3)) height = int(cap.get(4)) print('width, height, fps:', width, height, int(cap.get(5))) frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) while cap.isOpened(): success, image = cap.read() #with tf.device(device): e = datetime.datetime.now() image = tf.cast(image, dtype=tf.float32) image = image / 255 f = datetime.datetime.now() if t % 1 == 0: a = datetime.datetime.now() #with tf.device(device): pimage = tf.expand_dims(image, axis=0) pimage = tf.image.resize(pimage, (416, 416)) pred = predfunc(pimage) b = datetime.datetime.now() image = image.numpy() if pred != None: c = datetime.datetime.now() boxes, classes = int_scale_boxes(pred["bbox"], pred["classes"], width, height) draw = get_draw_fn(colors, label_names, 'YOLO') draw_box(image, boxes[0].numpy(), classes[0].numpy(), pred["confidence"][0], draw) d = datetime.datetime.now() cv2.imshow('frame', image) i += 1 t += 1 if time.time() - start - tick >= 1: tick += 1 print_opt((((f - e) + (b - a) + (d - c))), i) i = 0 if cv2.waitKey(1) & 0xFF == ord('q'): break cap.release() cv2.destroyAllWindows() return
def run(self): # init the model model = self._model gpu_device = self._gpu_device # print processing information print(f"capture (width, height): ({self._width},{self._height})") print(f"Yolo Possible classes: {self._classes}") if self._preprocess_function == None: preprocess = self._preprocess else: preprocess = self._preprocess_function # get one frame and put it inthe process que to get the process started if self._cap.isOpened(): success, image = self._cap.read() with tf.device(self._pre_process_device): e = datetime.datetime.now() image = preprocess(image) self._load_que.put(image) f = datetime.datetime.now() with tf.device(self._gpu_device): pimage = tf.image.resize(image, (self._p_width, self._p_height)) pimage = tf.expand_dims(pimage, axis=0) if hasattr(model, "predict"): predfunc = model.predict print("using pred function") else: predfunc = model print("using call function") else: return # start a thread to load frames load_thread = t.Thread(target=self.read, args=()) load_thread.start() # start a thread to display frames display_thread = t.Thread(target=self.display, args=()) display_thread.start() try: # while the captue is open start processing frames while (self._cap.isOpened()): # in case the load que has many frames in it, load one batch proc = [] for i in range(self._batch_size): if self._load_que.empty(): break value = self._load_que.get() proc.append(value) # for debugging # we can watch it catch up to real time # print(len(self._load_que.queue), end= " ") # if the que was empty the model is ahead, and take abreak to let the other threads catch up if len(proc) == 0: time.sleep(self._wait_time) continue #print() # log time and process the batch loaded in the for loop above a = datetime.datetime.now() with tf.device(self._gpu_device): image = tf.convert_to_tensor(proc) pimage = tf.image.resize(image, (self._p_width, self._p_height)) pred = predfunc(pimage) boxes, classes = int_scale_boxes(pred["bbox"], pred["classes"], self._width, self._height) if image.shape[1] != self._height: image = tf.image.resize(image, (self._height, self._width)) b = datetime.datetime.now() # computation latency to see how much delay between input and output if self._frames >= 1000: self._frames = 0 self._latency = -1 # compute the latency if self._latency != -1: self._latency += (b - a) else: self._latency = (b - a) # compute the number of frames processed, used to compute the moving average of latency self._frames += image.shape[0] self._batch_proc = image.shape[0] timeout = 0 # if the display que is full, do not put anything, just wait for a ms while self._display_que.full() and not self._running: time.sleep(self._wait_time) # put processed frames on the display que self._display_que.put((image.numpy(), boxes.numpy(), classes.numpy(), pred["confidence"])) #print everything self.print_opt() if not self._running: raise # join the laoding thread and diplay thread load_thread.join() display_thread.join() # close the video capture and destroy all open windows self._cap.release() cv2.destroyAllWindows() except KeyboardInterrupt: # arbitrary keyboard input self.print_opt() print("\n\n\n\n\n::: Video File Stopped -> KeyBoard Interrupt :::") self._running = False # join the laoding thread and diplay thread load_thread.join() display_thread.join() # close the video capture and destroy all open windows self._cap.release() cv2.destroyAllWindows() except Exception as e: # arbitrary keyboard input self.print_opt() print( f"\n\n\n\n\n::: Video File Complete ::: or error -> -> ->{e}") self._running = False time.sleep(5) # join the laoding thread and diplay thread load_thread.join() display_thread.join() # close the video capture and destroy all open windows self._cap.release() cv2.destroyAllWindows() return