def recognize_from_image(detector): # prepare input data org_img = load_image(args.input) print(f'input image shape: {org_img.shape}') org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR) img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) # inference print('Start inference...') if args.benchmark: print('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) output = detector.predict([img]) end = int(round(time.time() * 1000)) print(f'\tailia processing time {end - start} ms') else: output = detector.predict([img]) detect_object = post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) # plot result res_img = plot_results(detect_object, org_img, COCO_CATEGORY) # plot result cv2.imwrite(args.savepath, res_img) print('Script finished successfully.')
def recognize_from_video(detector): capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None if args.write_prediction: frame_count = 0 frame_digit = int(math.log10(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + 1) video_name = os.path.splitext(os.path.basename(args.video))[0] while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break if args.detector: img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA) detector.compute(img, args.threshold, args.iou) res_img = plot_results(detector, frame, COCO_CATEGORY) else: img = letterbox_convert(frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) output = detector.predict([img]) detect_object = post_processing( img, args.threshold, args.iou, output ) detect_object = reverse_letterbox(detect_object[0], frame, (IMAGE_HEIGHT,IMAGE_WIDTH)) res_img = plot_results(detect_object, frame, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) # write prediction if args.write_prediction: savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png') pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detect_object, frame, COCO_CATEGORY) frame_count += 1 capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() logger.info('Script finished successfully.')
def recognize_from_video(): # net initialize detector = None detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) capture = webcamera_utils.get_capture(args.video) # create video writer if savepath is specified as video format if args.savepath != SAVE_IMAGE_PATH: f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)) f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)) writer = webcamera_utils.get_writer(args.savepath, f_h, f_w) else: writer = None while (True): ret, frame = capture.read() if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret: break img = letterbox_convert(frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) output = detector.predict([img]) detect_object = yolov5_utils.post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], frame, (IMAGE_HEIGHT, IMAGE_WIDTH)) res_img = plot_results(detect_object, frame, COCO_CATEGORY) cv2.imshow('frame', res_img) # save results if writer is not None: writer.write(res_img) capture.release() cv2.destroyAllWindows() if writer is not None: writer.release() print('Script finished successfully.')
def recognize_from_image(): # net initialize detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) # prepare input data org_img = load_image(image_path) org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR) logger.info(f'input image shape: {org_img.shape}') img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') for i in range(5): start = int(round(time.time() * 1000)) output = detector.predict([img]) end = int(round(time.time() * 1000)) logger.info(f'\tailia processing time {end - start} ms') else: output = detector.predict([img]) detect_object = yolov5_utils.post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) # plot result res_img = plot_results(detect_object, org_img, COCO_CATEGORY) # plot result savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) logger.info('Script finished successfully.')
def compute_blazeface_with_keypoint(detector, frame, anchor_path='anchors.npy', back=False, min_score_thresh=DEFAULT_MIN_SCORE_THRESH): if back: BLAZEFACE_INPUT_IMAGE_HEIGHT = 256 BLAZEFACE_INPUT_IMAGE_WIDTH = 256 else: BLAZEFACE_INPUT_IMAGE_HEIGHT = 128 BLAZEFACE_INPUT_IMAGE_WIDTH = 128 # preprocessing image = letterbox_convert( frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) image = image.transpose((2, 0, 1)) # channel first image = image[np.newaxis, :, :, :] # (batch_size, channel, h, w) input_data = image / 127.5 - 1.0 # inference preds_ailia = detector.predict([input_data]) # postprocessing face_detections = postprocess(preds_ailia, anchor_path, back=back, min_score_thresh=min_score_thresh) face_detections = face_detections[0] detections = [] detections_eyes = [] for i, d in enumerate(face_detections): # face position obj = ailia.DetectorObject(category=0, prob=1.0, x=d[1], y=d[0], w=d[3] - d[1], h=d[2] - d[0]) detections.append(obj) # keypoints obj = ailia.DetectorObject(category=0, prob=1.0, x=d[4], y=d[5], w=0, h=0) detections_eyes.append(obj) obj = ailia.DetectorObject(category=0, prob=1.0, x=d[6], y=d[7], w=0, h=0) detections_eyes.append(obj) # revert square from detections detections = reverse_letterbox( detections, frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) detections_eyes = reverse_letterbox( detections_eyes, frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH)) # convert to keypoints keypoints = [] for i in range(len(detections_eyes) // 2): keypoint = { "eye_left_x": detections_eyes[i * 2 + 0].x, "eye_left_y": detections_eyes[i * 2 + 0].y, "eye_right_x": detections_eyes[i * 2 + 1].x, "eye_right_y": detections_eyes[i * 2 + 1].y } keypoints.append(keypoint) return detections, keypoints
def recognize_from_image(detector): if args.profile: detector.set_profile_mode(True) # input image loop for image_path in args.input: # prepare input data logger.info(image_path) org_img = load_image(image_path) if not args.detector: org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR) logger.debug(f'input image shape: {org_img.shape}') img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH)) img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) img = np.transpose(img, [2, 0, 1]) img = img.astype(np.float32) / 255 img = np.expand_dims(img, 0) # inference logger.info('Start inference...') if args.benchmark: logger.info('BENCHMARK mode') total_time = 0 for i in range(args.benchmark_count): start = int(round(time.time() * 1000)) if args.detector: detector.compute(org_img, args.threshold, args.iou) else: output = detector.predict([img]) end = int(round(time.time() * 1000)) if i != 0: total_time = total_time + (end - start) logger.info(f'\tailia processing time {end - start} ms') logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms') else: if args.detector: detector.compute(org_img, args.threshold, args.iou) else: output = detector.predict([img]) if not args.detector: detect_object = post_processing(img, args.threshold, args.iou, output) detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT,IMAGE_WIDTH)) res_img = plot_results(detect_object, org_img, COCO_CATEGORY) else: res_img = plot_results(detector, org_img, COCO_CATEGORY) # plot result savepath = get_savepath(args.savepath, image_path) logger.info(f'saved at : {savepath}') cv2.imwrite(savepath, res_img) # write prediction if args.write_prediction: pred_file = '%s.txt' % savepath.rsplit('.', 1)[0] write_predictions(pred_file, detect_object, org_img, COCO_CATEGORY) if args.profile: print(detector.get_summary()) logger.info('Script finished successfully.')