Ejemplo n.º 1
0
def recognize_from_video():
    # net initialize
    detector = ailia.Net(
        DETECTOR_MODEL_PATH, DETECTOR_WEIGHT_PATH, env_id=args.env_id
    )
    estimator = ailia.Net(
        ESTIMATOR_MODEL_PATH, ESTIMATOR_WEIGHT_PATH, env_id=args.env_id
    )

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    while(True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        _, img128, scale, pad = but.resize_pad(frame[:, :, ::-1])
        input_data = img128.astype('float32') / 255.
        input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0)

        # inference
        # Person detection
        detector_out = detector.predict([input_data])
        detections = but.detector_postprocess(detector_out)
        count = len(detections) if detections[0].size > 0 else 0

        # Pose estimation
        landmarks = []
        flags = []
        if count > 0:
            img, affine, _ = but.estimator_preprocess(
                frame, detections, scale, pad
            )
            flags, normalized_landmarks, _ = estimator.predict([img])
            landmarks = but.denormalize_landmarks(normalized_landmarks, affine)

        # postprocessing
        display_result(frame, count, landmarks, flags)
        cv2.imshow('frame', frame)

        # save results
        if writer is not None:
            writer.write(frame)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
    pass
Ejemplo n.º 2
0
def recognize_pose(frame, detector, estimator, out_frame=None):
    _, img128, scale, pad = bput.resize_pad(frame[:, :, ::-1])
    input_data = img128.astype('float32') / 255.
    input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0)

    # inference
    # Person detection
    detector_out = detector.predict([input_data])
    detections = bput.detector_postprocess(
        detector_out,
        anchor_path="../../pose_estimation/blazepose/anchors.npy",
        min_score_thresh=0.5)
    count = len(detections) if detections[0].size > 0 else 0

    # display bbox
    if args.bbox:
        detections2 = bput.denormalize_detections(detections[0].copy(), scale,
                                                  pad)
        display_hand_box(out_frame, detections2)

    # Pose estimation
    landmarks = []
    flags = []
    if count > 0:
        imgs, affine, _ = bput.estimator_preprocess(frame, detections, scale,
                                                    pad)

        #flags, normalized_landmarks, _ = estimator.predict([imgs])
        #print(flags.shape)
        #print(normalized_landmarks.shape)

        flags = np.zeros((imgs.shape[0]))
        normalized_landmarks = np.zeros((imgs.shape[0], 31, 4))
        for i in range(imgs.shape[0]):
            flag, normalized_landmark, _ = estimator.predict(
                [imgs[i:i + 1, :, :, :]])
            flags[i] = flag
            normalized_landmarks[i] = normalized_landmark

        landmarks = bput.denormalize_landmarks(normalized_landmarks, affine)

    # postprocessing
    display_result_pose(out_frame, count, landmarks, flags)
Ejemplo n.º 3
0
def pose_estimate(net, det_net, img):
    h, w = img.shape[:2]
    src_img = img

    logger.debug(f'input image shape: {img.shape}')

    _, img224, scale, pad = but.resize_pad(img)
    img224 = img224.astype('float32') / 255.
    img224 = np.expand_dims(img224, axis=0)

    detector_out = det_net.predict([img224])
    detections = but.detector_postprocess(detector_out)
    count = len(detections) if detections[0].size != 0 else 0

    # Pose estimation
    imgs = []
    if 0 < count:
        imgs, affine, _ = but.estimator_preprocess(
            src_img, detections, scale, pad
        )

    flags = []
    landmarks = []
    for i, img in enumerate(imgs):
        img = np.expand_dims(img, axis=0)
        output = net.predict([img])

        normalized_landmarks, f, _, _, _ = output
        normalized_landmarks = postprocess(normalized_landmarks)

        flags.append(f[0])
        landmarks.append(normalized_landmarks[0])

    if len(imgs)>=1:
        landmarks = np.stack(landmarks)
        landmarks = but.denormalize_landmarks(landmarks, affine)

    return flags, landmarks
Ejemplo n.º 4
0
def recognize_from_image():
    # net initialize
    detector = ailia.Net(DETECTOR_MODEL_PATH,
                         DETECTOR_WEIGHT_PATH,
                         env_id=args.env_id)
    estimator = ailia.Net(ESTIMATOR_MODEL_PATH,
                          ESTIMATOR_WEIGHT_PATH,
                          env_id=args.env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        src_img = cv2.imread(image_path)
        _, img128, scale, pad = but.resize_pad(src_img[:, :, ::-1])
        input_data = img128.astype('float32') / 255.
        input_data = np.expand_dims(np.moveaxis(input_data, -1, 0), 0)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for _ in range(5):
                start = int(round(time.time() * 1000))
                # Person detection
                detector_out = detector.predict([input_data])
                detections = but.detector_postprocess(detector_out)
                count = len(detections) if detections[0].size > 0 else 0

                # Pose estimation
                landmarks = []
                flags = []
                if count > 0:
                    img, affine, _ = but.estimator_preprocess(
                        src_img, detections, scale, pad)
                    flags, normalized_landmarks, _ = estimator.predict([img])
                    landmarks = but.denormalize_landmarks(
                        normalized_landmarks, affine)
                    end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            # Person detection
            detector_out = detector.predict([input_data])
            detections = but.detector_postprocess(detector_out)
            count = len(detections) if detections[0].size != 0 else 0

            # Pose estimation
            landmarks = []
            flags = []
            if count > 0:
                img, affine, _ = but.estimator_preprocess(
                    src_img, detections, scale, pad)
                flags, normalized_landmarks, _ = estimator.predict([img])
                landmarks = but.denormalize_landmarks(normalized_landmarks,
                                                      affine)

        # postprocessing
        logger.info(f'person_count={count}')
        display_result(src_img, count, landmarks, flags)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, src_img)
    logger.info('Script finished successfully.')