コード例 #1
0
def run_demo(args):
    ie = IECore()
    detector_person = Detector(ie,
                               path_to_model_xml=args.model_od,
                               device=args.device,
                               label_class=args.person_label)

    single_human_pose_estimator = HumanPoseEstimator(
        ie, path_to_model_xml=args.model_hpe, device=args.device)
    if args.input != '':
        img = cv2.imread(args.input[0], cv2.IMREAD_COLOR)
        frames_reader, delay = (VideoReader(
            args.input), 1) if img is None else (ImageReader(args.input), 0)
    else:
        raise ValueError('--input has to be set')

    for frame in frames_reader:
        bboxes = detector_person.detect(frame)
        human_poses = [
            single_human_pose_estimator.estimate(frame, bbox)
            for bbox in bboxes
        ]

        colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0)]

        for pose, bbox in zip(human_poses, bboxes):
            cv2.rectangle(frame, (bbox[0], bbox[1]),
                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0),
                          2)
            print("{} keypoints".format(len(pose)))
            for id_kpt, kpt in enumerate(pose):
                print("Position: {} id:{}".format((int(kpt[0]), int(kpt[1])),
                                                  id_kpt))
                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 6,
                           colors[id_kpt], -1)

        cv2.putText(
            frame,
            'summary: {:.1f} FPS (estimation: {:.1f} FPS / detection: {:.1f} FPS)'
            .format(
                float(1 / (detector_person.infer_time +
                           single_human_pose_estimator.infer_time *
                           len(human_poses))),
                float(1 / single_human_pose_estimator.infer_time),
                float(1 / detector_person.infer_time)), (5, 15),
            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200))
        if args.no_show:
            continue
        cv2.imshow('Human Pose Estimation Demo', frame)
        key = cv2.waitKey(delay)
        if key == 27:
            return
コード例 #2
0
def run_demo(args):
    ie = IECore()
    detector_person = Detector(ie,
                               path_to_model_xml=args.model_od,
                               device=args.device,
                               label_class=args.person_label)

    single_human_pose_estimator = HumanPoseEstimator(
        ie, path_to_model_xml=args.model_hpe, device=args.device)
    cap = open_images_capture(args.input, args.loop)
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")
    delay = int(cap.get_type() in ('VIDEO', 'CAMERA'))

    video_writer = cv2.VideoWriter()
    if args.output and not video_writer.open(
            args.output, cv2.VideoWriter_fourcc(*'MJPG'), cap.fps(),
        (frame.shape[1], frame.shape[0])):
        raise RuntimeError("Can't open video writer")

    frames_processed = 0
    presenter = monitors.Presenter(args.utilization_monitors, 25)
    while frame is not None:
        bboxes = detector_person.detect(frame)
        human_poses = [
            single_human_pose_estimator.estimate(frame, bbox)
            for bbox in bboxes
        ]

        presenter.drawGraphs(frame)

        colors = [(0, 0, 255), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0), (255, 0, 0), (0, 255, 0), (255, 0, 0),
                  (0, 255, 0)]

        for pose, bbox in zip(human_poses, bboxes):
            cv2.rectangle(frame, (bbox[0], bbox[1]),
                          (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0),
                          2)
            for id_kpt, kpt in enumerate(pose):
                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3,
                           colors[id_kpt], -1)

        cv2.putText(
            frame,
            'summary: {:.1f} FPS (estimation: {:.1f} FPS / detection: {:.1f} FPS)'
            .format(
                float(1 / (detector_person.infer_time +
                           single_human_pose_estimator.infer_time *
                           len(human_poses))),
                float(1 / single_human_pose_estimator.infer_time),
                float(1 / detector_person.infer_time)), (5, 15),
            cv2.FONT_HERSHEY_COMPLEX, 0.5, (0, 0, 200))

        frames_processed += 1
        if video_writer.isOpened() and (args.output_limit <= 0 or
                                        frames_processed <= args.output_limit):
            video_writer.write(frame)

        if not args.no_show:
            cv2.imshow('Human Pose Estimation Demo', frame)
            key = cv2.waitKey(delay)
            if key == 27:
                break
            presenter.handleKey(key)
        frame = cap.read()
    print(presenter.reportMeans())
def run_demo(args):
    cap = open_images_capture(args.input, args.loop)

    log.info('OpenVINO Runtime')
    log.info('\tbuild: {}'.format(get_version()))
    core = Core()

    log.info('Reading Object Detection model {}'.format(args.model_od))
    detector_person = Detector(core, args.model_od,
                               device=args.device,
                               label_class=args.person_label)
    log.info('The Object Detection model {} is loaded to {}'.format(args.model_od, args.device))

    log.info('Reading Human Pose Estimation model {}'.format(args.model_hpe))
    single_human_pose_estimator = HumanPoseEstimator(core, args.model_hpe,
                                                     device=args.device)
    log.info('The Human Pose Estimation model {} is loaded to {}'.format(args.model_hpe, args.device))

    delay = int(cap.get_type() in ('VIDEO', 'CAMERA'))
    video_writer = cv2.VideoWriter()

    frames_processed = 0
    presenter = monitors.Presenter(args.utilization_monitors, 25)
    metrics = PerformanceMetrics()

    start_time = perf_counter()
    frame = cap.read()
    if frame is None:
        raise RuntimeError("Can't read an image from the input")

    if args.output and not video_writer.open(args.output, cv2.VideoWriter_fourcc(*'MJPG'),
                                             cap.fps(), (frame.shape[1], frame.shape[0])):
        raise RuntimeError("Can't open video writer")

    while frame is not None:
        bboxes = detector_person.detect(frame)
        human_poses = [single_human_pose_estimator.estimate(frame, bbox) for bbox in bboxes]

        presenter.drawGraphs(frame)

        colors = [(0, 0, 255),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0),
                  (255, 0, 0), (0, 255, 0), (255, 0, 0), (0, 255, 0)]

        for pose, bbox in zip(human_poses, bboxes):
            cv2.rectangle(frame, (bbox[0], bbox[1]), (bbox[0] + bbox[2], bbox[1] + bbox[3]), (255, 0, 0), 2)
            for id_kpt, kpt in enumerate(pose):
                cv2.circle(frame, (int(kpt[0]), int(kpt[1])), 3, colors[id_kpt], -1)

        metrics.update(start_time, frame)

        frames_processed += 1
        if video_writer.isOpened() and (args.output_limit <= 0 or frames_processed <= args.output_limit):
            video_writer.write(frame)

        if not args.no_show:
            cv2.imshow('Human Pose Estimation Demo', frame)
            key = cv2.waitKey(delay)
            if key == 27:
                break
            presenter.handleKey(key)

        start_time = perf_counter()
        frame = cap.read()

    metrics.log_total()
    for rep in presenter.reportMeans():
        log.info(rep)