def recognize_from_video(detector): capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = f_h, f_w writer = webcamera_utils.get_writer(args.savepath, save_h, save_w) else: writer = None if args.write_prediction: frame_count = 0 frame_digit = int( math.log10(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + 1) video_name = os.path.splitext(os.path.basename(args.video))[0] while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break raw_img = frame if args.detector: detector.compute(raw_img, args.threshold, args.iou) res_img = plot_results(detector, raw_img, COCO_CATEGORY) detect_object = detector else: img, ratio = preprocess(raw_img, (HEIGHT, WIDTH)) output = detector.run(img[None, :, :, :]) predictions = postprocess(output[0], (HEIGHT, WIDTH))[0] detect_object = predictions_to_object(predictions, raw_img, ratio, args.iou, args.threshold) detect_object = reverse_letterbox( detect_object, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) # write prediction if args.write_prediction: savepath = get_savepath( args.savepath, video_name, post_fix='_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png') pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detect_object, frame, COCO_CATEGORY) frame_count += 1 capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_image(detector): # prepare input data org_img = load_image(args.input) print(f'input image shape: {org_img.shape}') org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR) img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) output = detector.predict([img]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: output = detector.predict([img]) detect_object = post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) # plot result res_img = plot_results(detect_object, org_img, COCO_CATEGORY) # plot result cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_video(detector): capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None if args.write_prediction: frame_count = 0 frame_digit = int(math.log10(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + 1) video_name = os.path.splitext(os.path.basename(args.video))[0] while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break if args.detector: img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, args.threshold, args.iou) res_img = plot_results(detector, frame, COCO_CATEGORY) else: img = letterbox_convert(frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) output = detector.predict([img]) detect_object = post_processing( img, args.threshold, args.iou, output ) detect_object = reverse_letterbox(detect_object[0], frame, (IMAGE_HEIGHT,IMAGE_WIDTH)) res_img = plot_results(detect_object, frame, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) # write prediction if args.write_prediction: savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png') pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detect_object, frame, COCO_CATEGORY) frame_count += 1 capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_image(): env_id = args.env_id detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) # input image loop for image_path in args.input: # prepare input data logger.debug(f'input image: {image_path}') raw_img = cv2.imread(image_path) img = cv2.resize(raw_img, dsize=(1280, 896)) img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, 0) img = img / 255.0 logger.debug(f'input image shape: {raw_img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) pred = detector.predict(img) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: pred = detector.predict(img) pred = non_max_suppression_numpy(pred, THRESHOLD, IOU) for i, det in enumerate(pred): if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], raw_img.shape).round() img_size_h, img_size_w = raw_img.shape[:2] output = [] # Write results for *xyxy, conf, cls in det: xyxy = [int(v) for v in xyxy] x1, y1, x2, y2 = xyxy r = ailia.DetectorObject( category=int(cls), prob=conf, x=x1 / img_size_w, y=y1 / img_size_h, w=(x2 - x1) / img_size_w, h=(y2 - y1) / img_size_h, ) output.append(r) detect_object = reverse_letterbox(output, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def recognize_from_image(detector): # input image loop for image_path in args.input: # prepare input data logger.debug(f'input image: {image_path}') raw_img = cv2.imread(image_path, cv2.IMREAD_COLOR) if not args.detector: img, ratio = preprocess(raw_img, (HEIGHT, WIDTH)) logger.debug(f'input image shape: {raw_img.shape}') def compute(): if args.detector: detector.compute(raw_img, args.threshold, args.iou) return None else: return detector.run(img[None, :, :, :]) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) output = compute() end = int(round(time.time() * 1000)) if i != 0: total_time = total_time + (end - start) logger.info(f'\tailia processing time {end - start} ms') logger.info( f'\taverage time {total_time / (args.benchmark_count-1)} ms') else: output = compute() if args.detector: res_img = plot_results(detector, raw_img, COCO_CATEGORY) detect_object = detector else: predictions = postprocess(output[0], (HEIGHT, WIDTH))[0] detect_object = predictions_to_object(predictions, raw_img, ratio, args.iou, args.threshold) detect_object = reverse_letterbox( detect_object, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) # plot result savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) # write prediction if args.write_prediction: pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detect_object, raw_img, COCO_CATEGORY) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize detector = None detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = letterbox_convert(frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) output = detector.predict([img]) detect_object = yolov5_utils.post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) res_img = plot_results(detect_object, frame, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() print('Script finished successfully.')
def recognize_from_image(): # net initialize detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) # prepare input data org_img = load_image(image_path) org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR) logger.info(f'input image shape: {org_img.shape}') img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) output = detector.predict([img]) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: output = detector.predict([img]) detect_object = yolov5_utils.post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) # plot result res_img = plot_results(detect_object, org_img, COCO_CATEGORY) # plot result savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = args.env_id net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) detector = NanoDetDetection(net, input_shape=[HEIGHT, WIDTH], reg_max=REG_MAX) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: logger.warning( 'currently, video results cannot be output correctly...') f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = f_h, f_w writer = webcamera_utils.get_writer(args.savepath, save_h, save_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break raw_img = frame detect_object = detector.detect(raw_img) detect_object = reverse_letterbox(detect_object, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_image(): env_id = args.env_id net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) net.set_input_shape((1, 3, HEIGHT, WIDTH)) detector = NanoDetDetection(net, input_shape=[HEIGHT, WIDTH], reg_max=REG_MAX) # input image loop for image_path in args.input: # prepare input data logger.debug(f'input image: {image_path}') raw_img = cv2.imread(image_path) logger.debug(f'input image shape: {raw_img.shape}') # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) detect_object = detector.detect(raw_img) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: detect_object = detector.detect(raw_img) detect_object = reverse_letterbox(detect_object, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def compute_blazeface_with_keypoint(detector, frame, anchor_path='anchors.npy', back=False, min_score_thresh=DEFAULT_MIN_SCORE_THRESH): if back: BLAZEFACE_INPUT_IMAGE_HEIGHT = 256 BLAZEFACE_INPUT_IMAGE_WIDTH = 256 else: BLAZEFACE_INPUT_IMAGE_HEIGHT = 128 BLAZEFACE_INPUT_IMAGE_WIDTH = 128 # preprocessing image = letterbox_convert( frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing face_detections = postprocess(preds_ailia, anchor_path, back=back, min_score_thresh=min_score_thresh) face_detections = face_detections[0] detections = [] detections_eyes = [] for i, d in enumerate(face_detections): # face position obj = ailia.DetectorObject(category=0, prob=1.0, x=d[1], y=d[0], w=d[3] - d[1], h=d[2] - d[0]) detections.append(obj) # keypoints obj = ailia.DetectorObject(category=0, prob=1.0, x=d[4], y=d[5], w=0, h=0) detections_eyes.append(obj) obj = ailia.DetectorObject(category=0, prob=1.0, x=d[6], y=d[7], w=0, h=0) detections_eyes.append(obj) # revert square from detections detections = reverse_letterbox( detections, frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) detections_eyes = reverse_letterbox( detections_eyes, frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) # convert to keypoints keypoints = [] for i in range(len(detections_eyes) // 2): keypoint = { "eye_left_x": detections_eyes[i * 2 + 0].x, "eye_left_y": detections_eyes[i * 2 + 0].y, "eye_right_x": detections_eyes[i * 2 + 1].x, "eye_right_y": detections_eyes[i * 2 + 1].y } keypoints.append(keypoint) return detections, keypoints
def recognize_from_image(detector): if args.profile: detector.set_profile_mode(True) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) org_img = load_image(image_path) if not args.detector: org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR) logger.debug(f'input image shape: {org_img.shape}') img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) if args.detector: detector.compute(org_img, args.threshold, args.iou) else: output = detector.predict([img]) end = int(round(time.time() * 1000)) if i != 0: total_time = total_time + (end - start) logger.info(f'\tailia processing time {end - start} ms') logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms') else: if args.detector: detector.compute(org_img, args.threshold, args.iou) else: output = detector.predict([img]) if not args.detector: detect_object = post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT,IMAGE_WIDTH)) res_img = plot_results(detect_object, org_img, COCO_CATEGORY) else: res_img = plot_results(detector, org_img, COCO_CATEGORY) # plot result savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) # write prediction if args.write_prediction: pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detect_object, org_img, COCO_CATEGORY) if args.profile: print(detector.get_summary()) logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize env_id = args.env_id detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: logger.warning( 'currently, video results cannot be output correctly...') f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) save_h, save_w = f_h, f_w writer = webcamera_utils.get_writer(args.savepath, save_h, save_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break raw_img = frame img = cv2.resize(raw_img, dsize=(1280, 896)) img = np.transpose(img, (2, 0, 1)) img = np.expand_dims(img, 0) img = img / 255.0 pred = detector.predict(img) pred = non_max_suppression_numpy(pred, THRESHOLD, IOU) for i, det in enumerate(pred): if det is not None and len(det): # Rescale boxes from img_size to im0 size det[:, :4] = scale_coords(img.shape[2:], det[:, :4], raw_img.shape).round() img_size_h, img_size_w = raw_img.shape[:2] output = [] # Write results for *xyxy, conf, cls in det: xyxy = [int(v) for v in xyxy] x1, y1, x2, y2 = xyxy r = ailia.DetectorObject( category=int(cls), prob=conf, x=x1 / img_size_w, y=y1 / img_size_h, w=(x2 - x1) / img_size_w, h=(y2 - y1) / img_size_h, ) output.append(r) detect_object = reverse_letterbox(output, raw_img, (raw_img.shape[0], raw_img.shape[1])) res_img = plot_results(detect_object, raw_img, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')