def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', help='File path of Tflite model.', required=True) parser.add_argument( '--label', help='File path of label file.', required=True) parser.add_argument( '--top_k', help="keep top k candidates.", default=3) parser.add_argument( '--threshold', help="threshold to filter results.", default=0.5, type=float) parser.add_argument( '--width', help="Resolution width.", default=640, type=int) parser.add_argument( '--height', help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format='rgb', use_video_port=True): rawCapture.truncate(0) # input_buf = np.frombuffer(stream.getvalue(), dtype=np.uint8) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. start_ms = time.time() ans = engine.DetectWithImage(input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k) # ans = engine.DetectWithInputTensor(input_buf, threshold=0.05, # keep_aspect_ratio=False, relative_coord=False, top_k=10) elapsed_ms = time.time() - start_ms # Display result. if ans: for obj in ans: label_name = 'Unknown' if labels: label_name = labels[obj.label_id] caption = '{0}({1:.2f})'.format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) # Calc fps. fps = 1 / elapsed_ms elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_fps = 1 / avg_elapsed_ms avg_text = ' AGV: {0:.2f}ms, {1:.2f}fps'.format( (avg_elapsed_ms * 1000.0), avg_fps) # Display fps fps_text = '{0:.2f}ms, {1:.2f}fps'.format( (elapsed_ms * 1000.0), fps) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord('q'): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--thread", help="Num threads.", default=2, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") parser.add_argument("--output", help="File path of result.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize TF-Lite interpreter. interpreter = make_interpreter(args.model, args.thread) interpreter.allocate_tensors() _, height, width, channel = interpreter.get_input_details()[0]["shape"] print("Interpreter(height, width, channel): ", height, width, channel) # Read label and generate random colors. labels = read_label_file(args.label) if args.label else None last_key = sorted(labels.keys())[len(labels.keys()) - 1] random.seed(42) colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("open video file", args.videopath) cap = cv2.VideoCapture(args.videopath) w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) fps = cap.get(cv2.CAP_PROP_FPS) print("Input(height, width, fps): ", h, w, fps) model_name = os.path.splitext(os.path.basename(args.model))[0] # Output Video file # Define the codec and create VideoWriter object video_writer = None if args.output != "": fourcc = cv2.VideoWriter_fourcc(*"MP4V") video_writer = cv2.VideoWriter(args.output, fourcc, fps, (w, h)) elapsed_list = [] while cap.isOpened(): ret, frame = cap.read() if not ret: print("VideoCapture read return false.") break im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) resize_im = cv2.resize(im, (width, height)) # Run inference. start = time.perf_counter() set_input_tensor(interpreter, resize_im) interpreter.invoke() objs = get_output(interpreter, args.threshold) inference_time = (time.perf_counter() - start) * 1000 # Display result. for obj in objs: class_id = int(obj["class_id"]) caption = "{0}({1:.2f})".format(labels[class_id], obj["score"]) # Convert the bounding box figures from relative coordinates # to absolute coordinates based on the original resolution ymin, xmin, ymax, xmax = obj["bounding_box"] xmin = int(xmin * w) xmax = int(xmax * w) ymin = int(ymin * h) ymax = int(ymax * h) # Draw a rectangle and caption. visual.draw_rectangle(frame, (xmin, ymin, xmax, ymax), colors[class_id]) visual.draw_caption(frame, (xmin, ymin, xmax, ymax), caption) # Calc fps. elapsed_list.append(inference_time) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "Inference: {0:.2f}ms".format(inference_time) display_text = model_name + " " + fps_text + avg_text visual.draw_caption(frame, (10, 30), display_text) # Output video file if video_writer is not None: video_writer.write(frame) # Display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cap.release() if video_writer is not None: video_writer.release() cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print("Open camera.") cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print("Open video file: ", args.videopath) cap = cv2.VideoCapture(args.videopath) cap_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) cap_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT)) elapsed_list = [] while cap.isOpened(): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input(interpreter, (cap_width, cap_height), lambda size: cv2.resize(im, size)) interpreter.invoke() elapsed_ms = (time.perf_counter() - start) * 1000 # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(frame, box, colors[obj.id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument( "--threshold", help="threshold to filter results.", default=0.5, type=float ) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO ) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) elapsed_list = [] resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous( rawCapture, format="rgb", use_video_port=True ): rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size) ) interpreter.invoke() elapsed_ms = engine.get_inference_time() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if objects: for obj in objects: label_name = "Unknown" if labels: labels.get(obj.id, "Unknown") label_name = labels[obj.id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = (obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(im, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, im) if cv2.waitKey(10) & 0xFF == ord("q"): break finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
from tqdm import tqdm import json import cv2 import random from settings import COSSY_DIR from utils.visualization import random_colors, _draw_xywha if __name__ == "__main__": ann_data = json.load(open(COSSY_DIR + '/annotations/MW-R_mot.json')) nColors = 12 COLORS = random_colors(num=nColors, dtype='uint8').tolist() fw, fh = 768, 768 # fourcc = cv2.VideoWriter_fourcc(*'XVID') # out_path = f'./videos_with_ann/Edge_test_mot.avi' # vout = cv2.VideoWriter(out_path, fourcc, 10, (fw,fh)) random.shuffle(ann_data['videos']) for i, vidinfo in enumerate(tqdm(ann_data['videos'])): id2color = dict() vname = vidinfo['id'] for imname, img_anns in zip(vidinfo['file_names'], vidinfo['annotations']): # impath = os.path.join(f'./frames/{vname}/{imname}') impath = f'{COSSY_DIR}/frames/{imname}' im = cv2.imread(impath) object_ids = [ann.get('person_id', None) for ann in img_anns] for ann in img_anns: assert ann['category_id'] == 1
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--threshold", help="threshold to filter results.", type=float, default=0.5) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine and load labels. interpreter = make_interpreter(args.model) interpreter.allocate_tensors() labels = read_label_file(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) is_inpaint_mode = False resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): start_ms = time.time() rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) # Run inference. start = time.perf_counter() _, scale = common.set_resized_input( interpreter, (resolution_width, rezolution_height), lambda size: cv2.resize(image, size), ) interpreter.invoke() # Display result. objects = detect.get_objects(interpreter, args.threshold, scale) if is_inpaint_mode == True: mask = np.full((args.height, args.width), 0, dtype=np.uint8) for obj in objects: if labels and obj.id in labels: # Draw a mask rectangle. box = ( obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax, ) visual.draw_rectangle(mask, box, (255, 255, 255), thickness=-1) # Image Inpainting dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS) else: for obj in objects: if labels and obj.id in labels: label_name = labels[obj.id] caption = "{0}({1:.2f})".format( label_name, obj.score) # Draw a rectangle and caption. box = ( obj.bbox.xmin, obj.bbox.ymin, obj.bbox.xmax, obj.bbox.ymax, ) visual.draw_rectangle(im, box, colors[obj.id]) visual.draw_caption(im, box, caption) dst = im # Calc fps. elapsed_ms = time.time() - start_ms fps = 1 / elapsed_ms # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(dst, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, dst) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break elif key == ord(" "): is_inpaint_mode = not is_inpaint_mode print("inpant mode change :", is_inpaint_mode) finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", default=0.5, type=float) parser.add_argument("--width", help="Resolution width.", default=640, type=int) parser.add_argument("--height", help="Resolution height.", default=480, type=int) parser.add_argument("--videopath", help="File path of Videofile.", default="") args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) # Video capture. if args.videopath == "": print('open camera.') cap = cv2.VideoCapture(0) cap.set(cv2.CAP_PROP_FRAME_WIDTH, args.width) cap.set(cv2.CAP_PROP_FRAME_HEIGHT, args.height) else: print(args.videopath) cap = cv2.VideoCapture(args.videopath) elapsed_list = [] while (cap.isOpened()): _, frame = cap.read() im = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) input_buf = PIL.Image.fromarray(im) # Run inference. start_ms = time.time() ans = engine.detect_with_image( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) elapsed_ms = engine.get_inference_time() # Display result. if ans: for obj in ans: label_name = "Unknown" if labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format(label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(frame, box, colors[obj.label_id]) visual.draw_caption(frame, box, caption) # Calc fps. elapsed_list.append(elapsed_ms) avg_text = "" if len(elapsed_list) > 100: elapsed_list.pop(0) avg_elapsed_ms = np.mean(elapsed_list) avg_text = " AGV: {0:.2f}ms".format(avg_elapsed_ms) # Display fps fps_text = "{0:.2f}ms".format(elapsed_ms) visual.draw_caption(frame, (10, 30), fps_text + avg_text) # display cv2.imshow(WINDOW_NAME, frame) if cv2.waitKey(10) & 0xFF == ord("q"): break # When everything done, release the window cv2.destroyAllWindows()
def main(): parser = argparse.ArgumentParser() parser.add_argument("--model", help="File path of Tflite model.", required=True) parser.add_argument("--label", help="File path of label file.", required=True) parser.add_argument("--top_k", help="keep top k candidates.", default=3) parser.add_argument("--threshold", help="threshold to filter results.", type=float, default=0.5) parser.add_argument("--width", help="Resolution width.", default=640) parser.add_argument("--height", help="Resolution height.", default=480) args = parser.parse_args() # Initialize window. cv2.namedWindow( WINDOW_NAME, cv2.WINDOW_GUI_NORMAL | cv2.WINDOW_AUTOSIZE | cv2.WINDOW_KEEPRATIO) cv2.moveWindow(WINDOW_NAME, 100, 200) # Initialize engine. engine = DetectionEngine(args.model) labels = ReadLabelFile(args.label) if args.label else None # Generate random colors. last_key = sorted(labels.keys())[len(labels.keys()) - 1] colors = visual.random_colors(last_key) is_inpaint_mode = False resolution_width = args.width rezolution_height = args.height with picamera.PiCamera() as camera: camera.resolution = (resolution_width, rezolution_height) camera.framerate = 30 _, width, height, channels = engine.get_input_tensor_shape() rawCapture = PiRGBArray(camera) # allow the camera to warmup time.sleep(0.1) try: for frame in camera.capture_continuous(rawCapture, format="rgb", use_video_port=True): start_ms = time.time() rawCapture.truncate(0) image = frame.array im = cv2.cvtColor(image, cv2.COLOR_RGB2BGR) input_buf = PIL.Image.fromarray(image) # Run inference. ans = engine.DetectWithImage( input_buf, threshold=args.threshold, keep_aspect_ratio=False, relative_coord=False, top_k=args.top_k, ) # Display result. if is_inpaint_mode == True: mask = np.full((args.height, args.width), 0, dtype=np.uint8) if ans: for obj in ans: if labels and obj.label_id in labels: # Draw a mask rectangle. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(mask, box, (255, 255, 255), thickness=-1) # Image Inpainting dst = cv2.inpaint(im, mask, 3, cv2.INPAINT_TELEA) # dst = cv2.inpaint(im, mask,3,cv2.INPAINT_NS) else: for obj in ans: if labels and obj.label_id in labels: label_name = labels[obj.label_id] caption = "{0}({1:.2f})".format( label_name, obj.score) # Draw a rectangle and caption. box = obj.bounding_box.flatten().tolist() visual.draw_rectangle(im, box, colors[obj.label_id]) visual.draw_caption(im, box, caption) dst = im # Calc fps. elapsed_ms = time.time() - start_ms fps = 1 / elapsed_ms # Display fps fps_text = "{0:.2f}ms, {1:.2f}fps".format( (elapsed_ms * 1000.0), fps) visual.draw_caption(dst, (10, 30), fps_text) # Display image cv2.imshow(WINDOW_NAME, dst) key = cv2.waitKey(10) & 0xFF if key == ord("q"): break elif key == ord(" "): is_inpaint_mode = not is_inpaint_mode print("inpant mode change :", is_inpaint_mode) finally: camera.stop_preview() # When everything done, release the window cv2.destroyAllWindows()