def recognize_from_video():
    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
    detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id)

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        writer = webcamera_utils.get_writer(args.savepath, IMAGE_HEIGHT,
                                            IMAGE_WIDTH)
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        # detect face
        detections = compute_blazeface(
            detector,
            frame,
            anchor_path='../../face_detection/blazeface/anchors.npy',
        )

        # get detected face
        if len(detections) == 0:
            crop_img = frame
        else:
            crop_img, top_left, bottom_right = crop_blazeface(
                detections[0], FACE_MARGIN, frame)
            if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
                crop_img = frame

        # preprocess
        input_image, input_data = webcamera_utils.preprocess_frame(
            crop_img, IMAGE_HEIGHT, IMAGE_WIDTH, data_rgb=False)

        # inference
        preds_ailia = net.predict(input_data)[0]

        # postprocessing
        fig = gen_img_from_predsailia(input_data, preds_ailia)
        fig.savefig('tmp.png')
        img = cv2.imread('tmp.png')
        cv2.imshow('frame', img)

        # save results
        if writer is not None:
            img = cv2.resize(img, (IMAGE_WIDTH, IMAGE_HEIGHT))
            writer.write(img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    os.remove('tmp.png')
    print('Script finished successfully.')
def recognize_from_frame(net, detector, frame):
    spoof_thresh = args.spoof_thresh

    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)

    # crop, preprocess
    images = []
    detections = []
    for obj in new_detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        img = preprocess(crop_img)
        images.append(img)
        detections.append(
            (top_left[0], top_left[1], bottom_right[0], bottom_right[1]))

    if not images:
        return frame

    images = np.concatenate(images)

    # feedforward
    output = net.predict([images])
    logits = output[0]
    preds = softmax(logits, axis=1)

    frame = draw_detections(frame, detections, preds, spoof_thresh)

    return frame
def recognize_from_video():
    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
    if args.active_3d:
        logger.info('>>> 3D mode is activated!')
        depth_net = ailia.Net(DEPTH_MODEL_PATH,
                              DEPTH_WEIGHT_PATH,
                              env_id=args.env_id)
    detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id)

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        logger.warning('[WARNING] currently video results output feature '
                       'is not supported in this model!')
        # TODO: shape should be debugged!
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    fig, axs = create_figure(active_3d=args.active_3d)

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        # detect face
        detections = compute_blazeface(
            detector,
            frame,
            anchor_path='../../face_detection/blazeface/anchors.npy',
        )

        # get detected face
        if len(detections) == 0:
            crop_img = frame
        else:
            crop_img, top_left, bottom_right = crop_blazeface(
                detections[0], FACE_MARGIN, frame)
            if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
                crop_img = frame

        # preprocess
        input_image, input_data = webcamera_utils.preprocess_frame(
            crop_img, IMAGE_HEIGHT, IMAGE_WIDTH, normalize_type='255')

        # inference
        preds_ailia = net.predict(input_data)

        pts, pts_img = get_preds_from_hm(preds_ailia)
        pts, pts_img = pts.reshape(68, 2) * 4, pts_img.reshape(68, 2)

        if args.active_3d:
            # 3D mode
            heatmaps = np.zeros((68, IMAGE_HEIGHT, IMAGE_WIDTH),
                                dtype=np.float32)
            for i in range(68):
                if pts[i, 0] > 0:
                    heatmaps[i] = draw_gaussian(heatmaps[i], pts[i], 2)
            heatmaps = heatmaps[np.newaxis, :, :, :]
            depth_pred = depth_net.predict(
                np.concatenate((input_data, heatmaps), 1))
            depth_pred = depth_pred.reshape(68, 1)
            pts_img = np.concatenate((pts_img, depth_pred * 2), 1)

        resized_img = cv2.resize(cv2.cvtColor(input_image, cv2.COLOR_BGR2RGB),
                                 (IMAGE_WIDTH, IMAGE_HEIGHT))

        # visualize results (clear axs at first)
        axs = visualize_results(axs,
                                resized_img,
                                pts_img,
                                active_3d=args.active_3d)
        plt.pause(0.01)
        if not plt.get_fignums():
            break

        # save results
        # FIXME: How to save plt --> cv2.VideoWriter()
        # if writer is not None:
        #     # put pixel buffer in numpy array
        #     canvas = FigureCanvas(fig)
        #     canvas.draw()
        #     mat = np.array(canvas.renderer._renderer)
        #     res_img = cv2.cvtColor(mat, cv2.COLOR_RGB2BGR)
        #     writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
def recognize_from_video():
    # net initialize
    emotion_classifier = ailia.Classifier(
        EMOTION_MODEL_PATH,
        EMOTION_WEIGHT_PATH,
        env_id=args.env_id,
        format=ailia.NETWORK_IMAGE_FORMAT_GRAY,
        range=ailia.NETWORK_IMAGE_RANGE_S_FP32,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
    )
    gender_classifier = ailia.Classifier(
        GENDER_MODEL_PATH,
        GENDER_WEIGHT_PATH,
        env_id=args.env_id,
        format=ailia.NETWORK_IMAGE_FORMAT_GRAY,
        range=ailia.NETWORK_IMAGE_RANGE_S_FP32,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
    )
    detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=args.env_id)

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath is not None:
        print('[WARNING] currently video results output feature '
              'is not supported in this model!')
        # TODO: shape should be debugged!
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    while(True):
        ret, frame = capture.read()

        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        # detect face
        # WIP: FIXME: AiliaInvalidArgumentException error
        detections = compute_blazeface(
            detector,
            frame,
            anchor_path='../../face_detection/blazeface/anchors.npy',
        )

        for obj in detections:
            # get detected face
            crop_img, top_left, bottom_right = crop_blazeface(
                obj, FACE_MARGIN, frame
            )
            if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
                continue
            crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2BGRA)

            # emotion inference
            emotion_classifier.compute(crop_img, EMOTION_MAX_CLASS_COUNT)
            count = emotion_classifier.get_class_count()
            print('=' * 80)
            print(f'emotion_class_count={count}')

            # print result
            emotion_text = ""
            for idx in range(count):
                print(f'+ idx={idx}')
                info = emotion_classifier.get_class(idx)
                print(
                    f'  category={info.category} ' +
                    f'[ {EMOTION_CATEGORY[info.category]} ]'
                )
                print(f'  prob={info.prob}')
                if idx == 0:
                    emotion_text = (f'[ {EMOTION_CATEGORY[info.category]} ] '
                                    f'prob={info.prob:.3f}')
            print('')

            # gender inference
            gender_text = ""
            gender_classifier.compute(crop_img, GENDER_MAX_CLASS_COUNT)
            count = gender_classifier.get_class_count()
            # print reuslt
            for idx in range(count):
                print(f'+ idx={idx}')
                info = gender_classifier.get_class(idx)
                print(
                    f'  category={info.category} ' +
                    f'[ {GENDER_CATEGORY[info.category]} ]'
                )
                print(f'  prob={info.prob}')
                if idx == 0:
                    gender_text = (f'[ {GENDER_CATEGORY[info.category]} ] '
                                   f'prob={info.prob:.3f}')
            print('')

            # display label
            LABEL_WIDTH = 400
            LABEL_HEIGHT = 20
            color = (255, 255, 255)
            cv2.rectangle(frame, top_left, bottom_right, color, thickness=2)
            cv2.rectangle(
                frame,
                top_left,
                (top_left[0]+LABEL_WIDTH, top_left[1]+LABEL_HEIGHT),
                color,
                thickness=-1,
            )

            text_position = (top_left[0], top_left[1]+LABEL_HEIGHT//2)
            color = (0, 0, 0)
            fontScale = 0.5
            cv2.putText(
                frame,
                emotion_text + " " + gender_text,
                text_position,
                cv2.FONT_HERSHEY_SIMPLEX,
                fontScale,
                color,
                1,
            )

            # show result
            cv2.imshow('frame', frame)
            time.sleep(SLEEP_TIME)

            # save results
            if writer is not None:
                writer.write(frame)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    print('Script finished successfully.')
Beispiel #5
0
def recognize_from_frame(net, detector, frame):
    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)
    detections = new_detections

    # estimate emotion
    for obj in detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
        emotion = predict(net, crop_img)
        idx = np.argmax(emotion)
        emotion = emotion_table[idx]

        # display label
        LABEL_WIDTH = bottom_right[1] - top_left[1]
        LABEL_HEIGHT = 20
        color = (255, 128, 128)
        cv2.rectangle(frame, top_left, bottom_right, color, thickness=2)
        cv2.rectangle(
            frame,
            top_left,
            (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT),
            color,
            thickness=-1,
        )

        text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2)
        color = (0, 0, 0)
        fontScale = 0.5
        cv2.putText(
            frame,
            emotion,
            text_position,
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale,
            color,
            1,
        )
Beispiel #6
0
def recognize_from_frame(net, detector, frame):
    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)
    detections = new_detections

    # estimate age and gender
    for obj in detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        img = cv2.resize(crop_img, (IMAGE_SIZE, IMAGE_SIZE))
        img = np.expand_dims(img, axis=0)  # 次元合せ

        # inference
        output = net.predict([img])
        prob, age_conv3 = output
        prob = prob[0][0][0]
        age_conv3 = age_conv3[0][0][0][0]

        i = np.argmax(prob)
        gender = 'Female' if i == 0 else 'Male'
        age = round(age_conv3 * 100)

        # display label
        LABEL_WIDTH = bottom_right[1] - top_left[1]
        LABEL_HEIGHT = 20
        if gender == "Male":
            color = (255, 128, 128)
        else:
            color = (128, 128, 255)
        cv2.rectangle(frame, top_left, bottom_right, color, thickness=2)
        cv2.rectangle(
            frame,
            top_left,
            (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT),
            color,
            thickness=-1,
        )

        text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2)
        color = (0, 0, 0)
        fontScale = 0.5
        cv2.putText(
            frame,
            "{} {}".format(gender, age),
            text_position,
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale,
            color,
            1,
        )