コード例 #1
0
def recognize_from_image(detector):
    # prepare input data
    org_img = load_image(args.input)
    print(f'input image shape: {org_img.shape}')

    org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR)
    img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH))

    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = np.transpose(img, [2, 0, 1])
    img = img.astype(np.float32) / 255
    img = np.expand_dims(img, 0)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            output = detector.predict([img])
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        output = detector.predict([img])
    detect_object = post_processing(img, args.threshold, args.iou, output)
    detect_object = reverse_letterbox(detect_object[0], org_img,
                                      (IMAGE_HEIGHT, IMAGE_WIDTH))

    # plot result
    res_img = plot_results(detect_object, org_img, COCO_CATEGORY)

    # plot result
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
コード例 #2
0
def recognize_from_video(detector):
    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    if args.write_prediction:
        frame_count = 0
        frame_digit = int(math.log10(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + 1)
        video_name = os.path.splitext(os.path.basename(args.video))[0]

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        if args.detector:
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
            detector.compute(img, args.threshold, args.iou)
            res_img = plot_results(detector, frame, COCO_CATEGORY)
        else:
            img = letterbox_convert(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = np.transpose(img, [2, 0, 1])
            img = img.astype(np.float32) / 255
            img = np.expand_dims(img, 0)

            output = detector.predict([img])
            detect_object = post_processing(
                img, args.threshold, args.iou, output
            )
            detect_object = reverse_letterbox(detect_object[0], frame, (IMAGE_HEIGHT,IMAGE_WIDTH))
            res_img = plot_results(detect_object, frame, COCO_CATEGORY)

        cv2.imshow('frame', res_img)
        # save results
        if writer is not None:
            writer.write(res_img)

        # write prediction
        if args.write_prediction:
            savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png')
            pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
            write_predictions(pred_file, detect_object, frame, COCO_CATEGORY)
            frame_count += 1

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
コード例 #3
0
def recognize_from_video():
    # net initialize
    detector = None
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img = letterbox_convert(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.transpose(img, [2, 0, 1])
        img = img.astype(np.float32) / 255
        img = np.expand_dims(img, 0)

        output = detector.predict([img])
        detect_object = yolov5_utils.post_processing(img, args.threshold,
                                                     args.iou, output)
        detect_object = reverse_letterbox(detect_object[0], frame,
                                          (IMAGE_HEIGHT, IMAGE_WIDTH))

        res_img = plot_results(detect_object, frame, COCO_CATEGORY)

        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    print('Script finished successfully.')
コード例 #4
0
def recognize_from_image():
    # net initialize
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)

        # prepare input data
        org_img = load_image(image_path)
        org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR)
        logger.info(f'input image shape: {org_img.shape}')

        img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH))

        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        img = np.transpose(img, [2, 0, 1])
        img = img.astype(np.float32) / 255
        img = np.expand_dims(img, 0)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                output = detector.predict([img])
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            output = detector.predict([img])
        detect_object = yolov5_utils.post_processing(img, args.threshold,
                                                     args.iou, output)
        detect_object = reverse_letterbox(detect_object[0], org_img,
                                          (IMAGE_HEIGHT, IMAGE_WIDTH))

        # plot result
        res_img = plot_results(detect_object, org_img, COCO_CATEGORY)

        # plot result
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)
    logger.info('Script finished successfully.')
コード例 #5
0
def compute_blazeface_with_keypoint(detector,
                                    frame,
                                    anchor_path='anchors.npy',
                                    back=False,
                                    min_score_thresh=DEFAULT_MIN_SCORE_THRESH):
    if back:
        BLAZEFACE_INPUT_IMAGE_HEIGHT = 256
        BLAZEFACE_INPUT_IMAGE_WIDTH = 256
    else:
        BLAZEFACE_INPUT_IMAGE_HEIGHT = 128
        BLAZEFACE_INPUT_IMAGE_WIDTH = 128

    # preprocessing
    image = letterbox_convert(
        frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.transpose((2, 0, 1))  # channel first
    image = image[np.newaxis, :, :, :]  # (batch_size, channel, h, w)
    input_data = image / 127.5 - 1.0

    # inference
    preds_ailia = detector.predict([input_data])

    # postprocessing
    face_detections = postprocess(preds_ailia,
                                  anchor_path,
                                  back=back,
                                  min_score_thresh=min_score_thresh)
    face_detections = face_detections[0]

    detections = []
    detections_eyes = []
    for i, d in enumerate(face_detections):
        # face position
        obj = ailia.DetectorObject(category=0,
                                   prob=1.0,
                                   x=d[1],
                                   y=d[0],
                                   w=d[3] - d[1],
                                   h=d[2] - d[0])
        detections.append(obj)

        # keypoints
        obj = ailia.DetectorObject(category=0,
                                   prob=1.0,
                                   x=d[4],
                                   y=d[5],
                                   w=0,
                                   h=0)
        detections_eyes.append(obj)

        obj = ailia.DetectorObject(category=0,
                                   prob=1.0,
                                   x=d[6],
                                   y=d[7],
                                   w=0,
                                   h=0)
        detections_eyes.append(obj)

    # revert square from detections
    detections = reverse_letterbox(
        detections, frame,
        (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH))
    detections_eyes = reverse_letterbox(
        detections_eyes, frame,
        (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH))

    # convert to keypoints
    keypoints = []
    for i in range(len(detections_eyes) // 2):
        keypoint = {
            "eye_left_x": detections_eyes[i * 2 + 0].x,
            "eye_left_y": detections_eyes[i * 2 + 0].y,
            "eye_right_x": detections_eyes[i * 2 + 1].x,
            "eye_right_y": detections_eyes[i * 2 + 1].y
        }
        keypoints.append(keypoint)

    return detections, keypoints
コード例 #6
0
def recognize_from_image(detector):
    if args.profile:
        detector.set_profile_mode(True)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        org_img = load_image(image_path)

        if not args.detector:
            org_img = cv2.cvtColor(org_img, cv2.COLOR_BGRA2BGR)
            logger.debug(f'input image shape: {org_img.shape}')

            img = letterbox_convert(org_img, (IMAGE_HEIGHT, IMAGE_WIDTH))

            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = np.transpose(img, [2, 0, 1])
            img = img.astype(np.float32) / 255
            img = np.expand_dims(img, 0)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            total_time = 0
            for i in range(args.benchmark_count):
                start = int(round(time.time() * 1000))
                if args.detector:
                    detector.compute(org_img, args.threshold, args.iou)
                else:
                    output = detector.predict([img])
                end = int(round(time.time() * 1000))
                if i != 0:
                    total_time = total_time + (end - start)
                logger.info(f'\tailia processing time {end - start} ms')
            logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms')
        else:
            if args.detector:
                detector.compute(org_img, args.threshold, args.iou)
            else:
                output = detector.predict([img])

        if not args.detector:
            detect_object = post_processing(img, args.threshold, args.iou, output)
            detect_object = reverse_letterbox(detect_object[0], org_img, (IMAGE_HEIGHT,IMAGE_WIDTH))
            res_img = plot_results(detect_object, org_img, COCO_CATEGORY)
        else:
            res_img = plot_results(detector, org_img, COCO_CATEGORY)

        # plot result
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)

        # write prediction
        if args.write_prediction:
            pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
            write_predictions(pred_file, detect_object, org_img, COCO_CATEGORY)

    if args.profile:
        print(detector.get_summary())

    logger.info('Script finished successfully.')