Ejemplo n.º 1
0
def predictions_to_object(predictions, raw_img, ratio, nms_thr, score_thr):
    boxes = predictions[:, :4]
    scores = predictions[:, 4:5] * predictions[:, 5:]

    boxes_xyxy = np.ones_like(boxes)
    boxes_xyxy[:, 0] = boxes[:, 0] - boxes[:, 2] / 2.
    boxes_xyxy[:, 1] = boxes[:, 1] - boxes[:, 3] / 2.
    boxes_xyxy[:, 2] = boxes[:, 0] + boxes[:, 2] / 2.
    boxes_xyxy[:, 3] = boxes[:, 1] + boxes[:, 3] / 2.
    boxes_xyxy /= ratio
    dets = multiclass_nms(boxes_xyxy, scores, nms_thr, score_thr)

    detect_object = []
    if dets is not None:
        img_size_h, img_size_w = raw_img.shape[:2]
        final_boxes, final_scores, final_cls_inds = dets[:, :
                                                         4], dets[:,
                                                                  4], dets[:,
                                                                           5]
        for i, box in enumerate(final_boxes):
            x1, y1, x2, y2 = box
            c = int(final_cls_inds[i])
            r = ailia.DetectorObject(
                category=c,
                prob=final_scores[i],
                x=x1 / img_size_w,
                y=y1 / img_size_h,
                w=(x2 - x1) / img_size_w,
                h=(y2 - y1) / img_size_h,
            )
            detect_object.append(r)

    return detect_object
Ejemplo n.º 2
0
def compute_blazeface(detector, frame):
    BLAZEFACE_INPUT_IMAGE_HEIGHT = 128
    BLAZEFACE_INPUT_IMAGE_WIDTH = 128

    # preprocessing
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = cv2.resize(img, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT))
    image = image.transpose((2, 0, 1))  # channel first
    image = image[np.newaxis, :, :, :]  # (batch_size, channel, h, w)
    input_data = image / 127.5 - 1.0

    # inference
    preds_ailia = detector.predict([input_data])

    # postprocessing
    org_detections = []
    blaze_face_detections = postprocess(preds_ailia)
    for idx in range(len(blaze_face_detections)):
        obj = blaze_face_detections[idx]
        if len(obj)==0:
            continue
        d = obj[0]
        obj = ailia.DetectorObject(
            category = 0,
            prob = 1.0,
            x = d[1],
            y = d[0],
            w = d[3]-d[1],
            h = d[2]-d[0] )
        
        org_detections.append(obj)

    return org_detections
Ejemplo n.º 3
0
def post_processing(img_shape, all_boxes, all_scores, indices):
    indices = indices.astype(np.int)

    bboxes = []
    for idx_ in indices[0]:
        cls_ind = idx_[1]
        score = all_scores[tuple(idx_)]

        idx_1 = (idx_[0], idx_[2])
        box = all_boxes[idx_1]
        y, x, y2, x2 = box
        w = (x2 - x) / img_shape[1]
        h = (y2 - y) / img_shape[0]
        x /= img_shape[1]
        y /= img_shape[0]

        r = ailia.DetectorObject(
            category=cls_ind,
            prob=score,
            x=x,
            y=y,
            w=w,
            h=h,
        )
        bboxes.append(r)

    return bboxes
Ejemplo n.º 4
0
def recognize_from_image():
    env_id = args.env_id
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.debug(f'input image: {image_path}')
        raw_img = cv2.imread(image_path)
        img = cv2.resize(raw_img, dsize=(1280, 896))
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, 0)
        img = img / 255.0
        logger.debug(f'input image shape: {raw_img.shape}')

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                pred = detector.predict(img)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            pred = detector.predict(img)

        pred = non_max_suppression_numpy(pred, THRESHOLD, IOU)
        for i, det in enumerate(pred):
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          raw_img.shape).round()
                img_size_h, img_size_w = raw_img.shape[:2]
                output = []
                # Write results
                for *xyxy, conf, cls in det:
                    xyxy = [int(v) for v in xyxy]
                    x1, y1, x2, y2 = xyxy
                    r = ailia.DetectorObject(
                        category=int(cls),
                        prob=conf,
                        x=x1 / img_size_w,
                        y=y1 / img_size_h,
                        w=(x2 - x1) / img_size_w,
                        h=(y2 - y1) / img_size_h,
                    )
                    output.append(r)

        detect_object = reverse_letterbox(output, raw_img,
                                          (raw_img.shape[0], raw_img.shape[1]))
        res_img = plot_results(detect_object, raw_img, COCO_CATEGORY)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)

    logger.info('Script finished successfully.')
Ejemplo n.º 5
0
def recognize_from_frame(net, detector, frame):
    spoof_thresh = args.spoof_thresh

    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)

    # crop, preprocess
    images = []
    detections = []
    for obj in new_detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        img = preprocess(crop_img)
        images.append(img)
        detections.append(
            (top_left[0], top_left[1], bottom_right[0], bottom_right[1]))

    if not images:
        return frame

    images = np.concatenate(images)

    # feedforward
    output = net.predict([images])
    logits = output[0]
    preds = softmax(logits, axis=1)

    frame = draw_detections(frame, detections, preds, spoof_thresh)

    return frame
Ejemplo n.º 6
0
def convert_to_ailia_detector_object(bboxes, scores, cls_inds, w, h):
    detector_object = []
    for i, box in enumerate(bboxes):
        cls_indx = int(cls_inds[i])

        r = ailia.DetectorObject(
            category=cls_indx,
            prob=scores[i],
            x=box[0] / w,
            y=box[1] / h,
            w=(box[2] - box[0]) / w,
            h=(box[3] - box[1]) / h,
        )

        detector_object.append(r)

    return detector_object
Ejemplo n.º 7
0
def compute_blazeface_with_keypoint(detector, frame, anchor_path='anchors.npy'):
    BLAZEFACE_INPUT_IMAGE_HEIGHT = 128
    BLAZEFACE_INPUT_IMAGE_WIDTH = 128

    # preprocessing
    img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    image = cv2.resize(img, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT))
    image = image.transpose((2, 0, 1))  # channel first
    image = image[np.newaxis, :, :, :]  # (batch_size, channel, h, w)
    input_data = image / 127.5 - 1.0

    # inference
    preds_ailia = detector.predict([input_data])

    # postprocessing
    detections = []
    keypoints = []
    blaze_face_detections = postprocess(preds_ailia, anchor_path)
    for idx in range(len(blaze_face_detections)):
        obj = blaze_face_detections[idx]
        if len(obj)==0:
            continue
        d = obj[0]

        # face position
        obj = ailia.DetectorObject(
            category = 0,
            prob = 1.0,
            x = d[1],
            y = d[0],
            w = d[3]-d[1],
            h = d[2]-d[0] )
        detections.append(obj)

        # keypoint potision
        keypoint = {
            "eye_left_x":blaze_face_detections[idx][0][4],"eye_left_y":blaze_face_detections[idx][0][5],
            "eye_right_x":blaze_face_detections[idx][0][6],"eye_right_y":blaze_face_detections[idx][0][7]
        }
        keypoints.append(keypoint)

    return detections, keypoints
Ejemplo n.º 8
0
def convert_to_ailia_detector_object(preds, w, h):
    i = 0
    detector_object = []
    for j in range(len(preds[i]['rois'])):
        (x1, y1, x2, y2) = preds[i]['rois'][j].astype(np.int)
        obj = preds[i]['class_ids'][j]
        score = float(preds[i]['scores'][j])

        r = ailia.DetectorObject(
            category=obj,
            prob=score,
            x=x1 / w,
            y=y1 / h,
            w=(x2 - x1) / w,
            h=(y2 - y1) / h,
        )

        detector_object.append(r)

    return detector_object
Ejemplo n.º 9
0
    def detect(self, img):
        raw_shape = img.shape
        img_input, ResizeM = self.preprocess(img)
        scores, raw_boxes = self.infer_image(img_input)
        bboxs, labels, confs = self.postprocess(scores, raw_boxes, ResizeM, raw_shape)

        img_size_h, img_size_w = img.shape[:2]
        output = []
        for i, box in enumerate(bboxs):
            x1, y1, x2, y2 = box
            c = int(labels[i])
            r = ailia.DetectorObject(
                category=c,
                prob=confs[i],
                x=x1 / img_size_w,
                y=y1 / img_size_h,
                w=(x2 - x1) / img_size_w,
                h=(y2 - y1) / img_size_h,
            )
            output.append(r)

        return output
Ejemplo n.º 10
0
def reverse_letterbox(detections, img, det_shape):
    h, w = img.shape[0], img.shape[1]

    pad_x = pad_y = 0
    if det_shape != None:
        scale = np.max((h / det_shape[0], w / det_shape[1]))
        start = (det_shape[0:2] - np.array(img.shape[0:2]) / scale) // 2
        pad_x = start[1] * scale
        pad_y = start[0] * scale

    new_detections = []
    for detection in detections:
        logger.debug(detection)
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=(detection.x * (w + pad_x * 2) - pad_x) / w,
            y=(detection.y * (h + pad_y * 2) - pad_y) / h,
            w=(detection.w * (w + pad_x * 2)) / w,
            h=(detection.h * (h + pad_y * 2)) / h,
        )
        new_detections.append(r)

    return new_detections
Ejemplo n.º 11
0
def post_processing(data, boxes, labels, masks):
    bbox_list = [boxes[labels == i, :] for i in range(len(CATEGORY))]
    mask_list = [masks[labels == i, :] for i in range(len(CATEGORY))]

    ###########################################
    # remove duplicate
    new_bbox_list = []
    new_mask_list = []
    for idx, (bbox, mask) in enumerate(zip(bbox_list, mask_list)):
        if len(bbox) < 1:
            new_bbox_list.append(None)
            new_mask_list.append(None)
            continue

        i = np.argmax(bbox[:, -1])
        new_bbox_list.append(bbox[i, :])
        new_mask_list.append(mask[i, :])

    bbox_list = new_bbox_list
    mask_list = new_mask_list
    #########################################

    ori_shape = data['ori_shape'][:2]
    img_shape = data['img_shape'][:2]
    scale_factor = data['scale_factor']

    ret_boxes = []
    segm_masks = []
    for cls_ind, (box, mask) in enumerate(zip(bbox_list, mask_list)):
        if box is None:
            continue

        score = box[-1]
        x, y, x2, y2 = box[:4]

        if score < args.threshold:
            continue

        w = (x2 - x)
        h = (y2 - y)
        ori_x = int(x / scale_factor[1])
        ori_y = int(y / scale_factor[0])
        ori_x2 = int(x2 / scale_factor[1])
        ori_y2 = int(y2 / scale_factor[0])
        ori_w = int(w / scale_factor[1])
        ori_h = int(h / scale_factor[0])

        # segment mask
        mask = cv2.resize(mask, (ori_w, ori_h), interpolation=cv2.INTER_LINEAR)
        segm_mask = np.zeros((max(ori_shape[0],
                                  ori_y2), max(ori_shape[1], ori_x2)))
        segm_mask[ori_y:ori_y + ori_h, ori_x:ori_x + ori_w] = mask
        segm_mask = segm_mask[:ori_shape[0], :ori_shape[1]]
        segm_mask = (segm_mask > RCNN_MASK_THRE).astype(np.uint8)

        # bbox
        w = w / img_shape[1]
        h = h / img_shape[0]
        x = x / img_shape[1]
        y = y / img_shape[0]
        r = ailia.DetectorObject(
            category=cls_ind,
            prob=score,
            x=x,
            y=y,
            w=w,
            h=h,
        )
        ret_boxes.append(r)
        segm_masks.append(segm_mask)

    return ret_boxes, segm_masks
Ejemplo n.º 12
0
def post_processing(img, conf_thresh, nms_thresh, output):
    # [batch, num, 1, 4]
    box_array = output[0]
    # [batch, num, num_classes]
    confs = output[1]

    t1 = time.time()

    if type(box_array).__name__ != 'ndarray':
        box_array = box_array.cpu().detach().numpy()
        confs = confs.cpu().detach().numpy()

    num_classes = confs.shape[2]

    # [batch, num, 4]
    box_array = box_array[:, :, 0]

    # [batch, num, num_classes] --> [batch, num]
    max_conf = np.max(confs, axis=2)
    max_id = np.argmax(confs, axis=2)

    t2 = time.time()

    bboxes_batch = []
    for i in range(box_array.shape[0]):

        argwhere = max_conf[i] > conf_thresh
        l_box_array = box_array[i, argwhere, :]
        l_max_conf = max_conf[i, argwhere]
        l_max_id = max_id[i, argwhere]

        bboxes = []
        # nms for each class
        for j in range(num_classes):

            cls_argwhere = l_max_id == j
            ll_box_array = l_box_array[cls_argwhere, :]
            ll_max_conf = l_max_conf[cls_argwhere]
            ll_max_id = l_max_id[cls_argwhere]

            keep = nms_cpu(ll_box_array, ll_max_conf, nms_thresh)

            if (keep.size > 0):
                ll_box_array = ll_box_array[keep, :]
                ll_max_conf = ll_max_conf[keep]
                ll_max_id = ll_max_id[keep]

                for k in range(ll_box_array.shape[0]):
                    r = ailia.DetectorObject(
                        category=ll_max_id[k],
                        prob=ll_max_conf[k],
                        x=ll_box_array[k, 0],
                        y=ll_box_array[k, 1],
                        w=ll_box_array[k, 2] - ll_box_array[k, 0],
                        h=ll_box_array[k, 3] - ll_box_array[k, 1],
                    )
                    bboxes.append(r)

        bboxes_batch.append(bboxes)

    t3 = time.time()

    print('-----------------------------------')
    print('       max and argmax : %f' % (t2 - t1))
    print('                  nms : %f' % (t3 - t2))
    print('Post processing total : %f' % (t3 - t1))
    print('-----------------------------------')

    return bboxes_batch
Ejemplo n.º 13
0
def recognize_from_video():
    # net initialize
    env_id = args.env_id
    detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        logger.warning(
            'currently, video results cannot be output correctly...')
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        save_h, save_w = f_h, f_w
        writer = webcamera_utils.get_writer(args.savepath, save_h, save_w)
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        raw_img = frame
        img = cv2.resize(raw_img, dsize=(1280, 896))
        img = np.transpose(img, (2, 0, 1))
        img = np.expand_dims(img, 0)
        img = img / 255.0

        pred = detector.predict(img)
        pred = non_max_suppression_numpy(pred, THRESHOLD, IOU)
        for i, det in enumerate(pred):
            if det is not None and len(det):
                # Rescale boxes from img_size to im0 size
                det[:, :4] = scale_coords(img.shape[2:], det[:, :4],
                                          raw_img.shape).round()
                img_size_h, img_size_w = raw_img.shape[:2]
                output = []
                # Write results
                for *xyxy, conf, cls in det:
                    xyxy = [int(v) for v in xyxy]
                    x1, y1, x2, y2 = xyxy
                    r = ailia.DetectorObject(
                        category=int(cls),
                        prob=conf,
                        x=x1 / img_size_w,
                        y=y1 / img_size_h,
                        w=(x2 - x1) / img_size_w,
                        h=(y2 - y1) / img_size_h,
                    )
                    output.append(r)

        detect_object = reverse_letterbox(output, raw_img,
                                          (raw_img.shape[0], raw_img.shape[1]))
        res_img = plot_results(detect_object, raw_img, COCO_CATEGORY)
        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
Ejemplo n.º 14
0
def post_processing(img, conf_thres, nms_thres, outputs):
    batch_detections = []

    img_size_w = img.shape[3]
    img_size_h = img.shape[2]

    batch_size = 1
    num_classes = 80

    anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]

    boxs = []
    a = np.array(anchors).reshape(3, -1, 2)
    anchor_grid = a.copy().reshape(3, 1, -1, 1, 1, 2)

    #onnx output
    #(1, 3, 80, 80, 85) # anchor 0
    #(1, 3, 40, 40, 85) # anchor 1
    #(1, 3, 20, 20, 85) # anchor 2

    #[cx,cy,w,h,conf,pred_cls(80)]

    for index, out in enumerate(outputs):
        batch = out.shape[1]
        feature_h = out.shape[2]
        feature_w = out.shape[3]

        # Feature map corresponds to the original image zoom factor
        stride_w = int(img_size_w / feature_w)
        stride_h = int(img_size_h / feature_h)

        grid_x, grid_y = np.meshgrid(np.arange(feature_w), np.arange(feature_h))

        # cx, cy, w, h
        pred_boxes = np.zeros(out[..., :4].shape)
        pred_boxes[..., 0] = (sigmoid(out[..., 0]) * 2.0 - 0.5 + grid_x) * stride_w  # cx
        pred_boxes[..., 1] = (sigmoid(out[..., 1]) * 2.0 - 0.5 + grid_y) * stride_h  # cy
        pred_boxes[..., 2:4] = (sigmoid(out[..., 2:4]) * 2) ** 2 * anchor_grid[index]  # wh

        conf = sigmoid(out[..., 4])
        pred_cls = sigmoid(out[..., 5:])

        output = np.concatenate((pred_boxes.reshape(batch_size, -1, 4),
                            conf.reshape(batch_size, -1, 1),
                            pred_cls.reshape(batch_size, -1, num_classes)),
                            -1)
        boxs.append(output)

    outputx = np.concatenate(boxs, 1)

    # NMS
    batch_detections = non_max_suppression(outputx, num_classes, conf_thres=conf_thres, nms_thres=nms_thres)

    # output ailia format
    detections = batch_detections[0]
    if detections is None:
        return [[]]

    labels = detections[..., -1]
    boxs = detections[..., :4]
    confs = detections[..., 4]

    bboxes = []

    bboxes_batch = []
    for i, box in enumerate(boxs):
        x1, y1, x2, y2 = box
        c = int(labels[i])
        r = ailia.DetectorObject(
            category=c,
            prob=confs[i],
            x=x1/img_size_w,
            y=y1/img_size_h,
            w=(x2 - x1)/img_size_w,
            h=(y2 - y1)/img_size_h,
        )
        bboxes.append(r)
    bboxes_batch.append(bboxes)
    
    return bboxes_batch
Ejemplo n.º 15
0
def recognize_from_image(detector, dst_path, src_dir, file_):
    # prepare input data
    #img = load_image(src_dir+"/"+file_)

    img = cv2.imread(src_dir + "/" + file_)
    h, w = img.shape[0], img.shape[1]

    if args.arch == "yolov3":
        img = cv2.cvtColor(img, cv2.COLOR_BGR2BGRA)
        detector.compute(img, YOLOV3_THRESHOLD, YOLOV3_IOU)
        count = detector.get_object_count()
    else:
        # prepare input data
        img = cv2.imread(src_dir + "/" + file_)
        image = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        image = cv2.resize(
            image, (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT))
        image = image.transpose((2, 0, 1))  # channel first
        image = image[np.newaxis, :, :, :]  # (batch_size, channel, h, w)
        input_data = image / 127.5 - 1.0

        # inference
        preds_ailia = detector.predict([input_data])

        # postprocessing
        detections = postprocess(preds_ailia)
        count = len(detections)

    texts = []
    written = False
    for idx in range(count):
        if args.arch == "yolov3":
            # get detected face
            obj = detector.get_object(idx)
            margin = 1.0
        else:
            # get detected face
            obj = detections[idx]
            d = obj[0]
            obj = ailia.DetectorObject(category=0,
                                       prob=1.0,
                                       x=d[1],
                                       y=d[0],
                                       w=d[3] - d[1],
                                       h=d[2] - d[0])
            margin = 1.4

        cx = (obj.x + obj.w / 2) * w
        cy = (obj.y + obj.h / 2) * h
        cw = max(obj.w * w * margin, obj.h * h * margin)
        fx = max(cx - cw / 2, 0)
        fy = max(cy - cw / 2, 0)
        fw = min(cw, w - fx)
        fh = min(cw, h - fy)
        top_left = (int(fx), int(fy))
        bottom_right = (int((fx + fw)), int(fy + fh))

        print("face detected " + str(top_left) + "-" + str(bottom_right))

        # get detected face
        crop_img = img[top_left[1]:bottom_right[1],
                       top_left[0]:bottom_right[0], 0:3]
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue
        cv2.imwrite(dst_path, crop_img)
        written = True

    if not written:
        print("face not found")
Ejemplo n.º 16
0
def recognize_from_frame(net, detector, frame):
    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)
    detections = new_detections

    # estimate emotion
    for obj in detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        crop_img = cv2.cvtColor(crop_img, cv2.COLOR_BGR2GRAY)
        emotion = predict(net, crop_img)
        idx = np.argmax(emotion)
        emotion = emotion_table[idx]

        # display label
        LABEL_WIDTH = bottom_right[1] - top_left[1]
        LABEL_HEIGHT = 20
        color = (255, 128, 128)
        cv2.rectangle(frame, top_left, bottom_right, color, thickness=2)
        cv2.rectangle(
            frame,
            top_left,
            (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT),
            color,
            thickness=-1,
        )

        text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2)
        color = (0, 0, 0)
        fontScale = 0.5
        cv2.putText(
            frame,
            emotion,
            text_position,
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale,
            color,
            1,
        )
Ejemplo n.º 17
0
def recognize_from_frame(net, detector, frame):
    # detect face
    detections = compute_blazeface(
        detector,
        frame,
        anchor_path='../../face_detection/blazeface/anchorsback.npy',
        back=True,
        min_score_thresh=FACE_MIN_SCORE_THRESH)

    # adjust face rectangle
    new_detections = []
    for detection in detections:
        margin = 1.5
        r = ailia.DetectorObject(
            category=detection.category,
            prob=detection.prob,
            x=detection.x - detection.w * (margin - 1.0) / 2,
            y=detection.y - detection.h * (margin - 1.0) / 2 -
            detection.h * margin / 8,
            w=detection.w * margin,
            h=detection.h * margin,
        )
        new_detections.append(r)
    detections = new_detections

    # estimate age and gender
    for obj in detections:
        # get detected face
        margin = 1.0
        crop_img, top_left, bottom_right = crop_blazeface(obj, margin, frame)
        if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
            continue

        img = cv2.resize(crop_img, (IMAGE_SIZE, IMAGE_SIZE))
        img = np.expand_dims(img, axis=0)  # 次元合せ

        # inference
        output = net.predict([img])
        prob, age_conv3 = output
        prob = prob[0][0][0]
        age_conv3 = age_conv3[0][0][0][0]

        i = np.argmax(prob)
        gender = 'Female' if i == 0 else 'Male'
        age = round(age_conv3 * 100)

        # display label
        LABEL_WIDTH = bottom_right[1] - top_left[1]
        LABEL_HEIGHT = 20
        if gender == "Male":
            color = (255, 128, 128)
        else:
            color = (128, 128, 255)
        cv2.rectangle(frame, top_left, bottom_right, color, thickness=2)
        cv2.rectangle(
            frame,
            top_left,
            (top_left[0] + LABEL_WIDTH, top_left[1] + LABEL_HEIGHT),
            color,
            thickness=-1,
        )

        text_position = (top_left[0], top_left[1] + LABEL_HEIGHT // 2)
        color = (0, 0, 0)
        fontScale = 0.5
        cv2.putText(
            frame,
            "{} {}".format(gender, age),
            text_position,
            cv2.FONT_HERSHEY_SIMPLEX,
            fontScale,
            color,
            1,
        )
Ejemplo n.º 18
0
def compute_blazeface_with_keypoint(detector,
                                    frame,
                                    anchor_path='anchors.npy',
                                    back=False,
                                    min_score_thresh=DEFAULT_MIN_SCORE_THRESH):
    if back:
        BLAZEFACE_INPUT_IMAGE_HEIGHT = 256
        BLAZEFACE_INPUT_IMAGE_WIDTH = 256
    else:
        BLAZEFACE_INPUT_IMAGE_HEIGHT = 128
        BLAZEFACE_INPUT_IMAGE_WIDTH = 128

    # preprocessing
    image = letterbox_convert(
        frame, (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH))
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = image.transpose((2, 0, 1))  # channel first
    image = image[np.newaxis, :, :, :]  # (batch_size, channel, h, w)
    input_data = image / 127.5 - 1.0

    # inference
    preds_ailia = detector.predict([input_data])

    # postprocessing
    face_detections = postprocess(preds_ailia,
                                  anchor_path,
                                  back=back,
                                  min_score_thresh=min_score_thresh)
    face_detections = face_detections[0]

    detections = []
    detections_eyes = []
    for i, d in enumerate(face_detections):
        # face position
        obj = ailia.DetectorObject(category=0,
                                   prob=1.0,
                                   x=d[1],
                                   y=d[0],
                                   w=d[3] - d[1],
                                   h=d[2] - d[0])
        detections.append(obj)

        # keypoints
        obj = ailia.DetectorObject(category=0,
                                   prob=1.0,
                                   x=d[4],
                                   y=d[5],
                                   w=0,
                                   h=0)
        detections_eyes.append(obj)

        obj = ailia.DetectorObject(category=0,
                                   prob=1.0,
                                   x=d[6],
                                   y=d[7],
                                   w=0,
                                   h=0)
        detections_eyes.append(obj)

    # revert square from detections
    detections = reverse_letterbox(
        detections, frame,
        (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH))
    detections_eyes = reverse_letterbox(
        detections_eyes, frame,
        (BLAZEFACE_INPUT_IMAGE_HEIGHT, BLAZEFACE_INPUT_IMAGE_WIDTH))

    # convert to keypoints
    keypoints = []
    for i in range(len(detections_eyes) // 2):
        keypoint = {
            "eye_left_x": detections_eyes[i * 2 + 0].x,
            "eye_left_y": detections_eyes[i * 2 + 0].y,
            "eye_right_x": detections_eyes[i * 2 + 1].x,
            "eye_right_y": detections_eyes[i * 2 + 1].y
        }
        keypoints.append(keypoint)

    return detections, keypoints
Ejemplo n.º 19
0
def compare_video():
    # prepare base image
    fe_list = []

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)

    # detector initialize
    if args.face == "yolov3":
        detector = ailia.Detector(FACE_MODEL_PATH,
                                  FACE_WEIGHT_PATH,
                                  1,
                                  format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                                  channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                                  range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                                  algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                                  env_id=env_id)
    else:
        detector = ailia.Net(FACE_MODEL_PATH, FACE_WEIGHT_PATH, env_id=env_id)

    # web camera
    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[Error] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    # inference loop
    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break
        h, w = frame.shape[0], frame.shape[1]

        # detect face
        if args.face == "yolov3":
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
            detector.compute(img, YOLOV3_FACE_THRESHOLD, YOLOV3_FACE_IOU)
            count = detector.get_object_count()
        else:
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            image = cv2.resize(
                img,
                (BLAZEFACE_INPUT_IMAGE_WIDTH, BLAZEFACE_INPUT_IMAGE_HEIGHT))
            image = image.transpose((2, 0, 1))  # channel first
            image = image[np.newaxis, :, :, :]  # (batch_size, channel, h, w)
            input_data = image / 127.5 - 1.0

            # inference
            preds_ailia = detector.predict([input_data])

            # postprocessing
            detections = postprocess(preds_ailia)
            count = len(detections)

        texts = []
        for idx in range(count):
            # get detected face
            if args.face == "yolov3":
                obj = detector.get_object(idx)
                margin = 1.0
            else:
                obj = detections[idx]
                if len(obj) == 0:
                    continue
                d = obj[0]
                obj = ailia.DetectorObject(category=0,
                                           prob=1.0,
                                           x=d[1],
                                           y=d[0],
                                           w=d[3] - d[1],
                                           h=d[2] - d[0])
                margin = 1.4

            cx = (obj.x + obj.w / 2) * w
            cy = (obj.y + obj.h / 2) * h
            cw = max(obj.w * w * margin, obj.h * h * margin)
            fx = max(cx - cw / 2, 0)
            fy = max(cy - cw / 2, 0)
            fw = min(cw, w - fx)
            fh = min(cw, h - fy)
            top_left = (int(fx), int(fy))
            bottom_right = (int((fx + fw)), int(fy + fh))

            # get detected face
            crop_img = img[top_left[1]:bottom_right[1],
                           top_left[0]:bottom_right[0], 0:3]
            if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
                continue
            crop_img, resized_frame = adjust_frame_size(
                crop_img, IMAGE_HEIGHT, IMAGE_WIDTH)

            # get matched face
            id_sim, score_sim = face_identification(fe_list, net,
                                                    resized_frame)

            # display result
            fontScale = w / 512.0
            thickness = 2
            color = hsv_to_rgb(256 * id_sim / 16, 255, 255)
            cv2.rectangle(frame, top_left, bottom_right, color, 2)

            text_position = (int(fx) + 4, int((fy + fh) - 8))

            cv2.putText(frame, f"{id_sim} : {score_sim:5.3f}", text_position,
                        cv2.FONT_HERSHEY_SIMPLEX, fontScale, color, thickness)

        cv2.imshow('frame', frame)

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')