Beispiel #1
0
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    # net initialize
    if args.detector:
        detector = ailia.Detector(
            MODEL_PATH,
            WEIGHT_PATH,
            len(COCO_CATEGORY),
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV4,
            env_id=args.env_id,
        )
    else:
        detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)
        detector.set_input_shape((1, 3, IMAGE_HEIGHT, IMAGE_WIDTH))

    if args.video is not None:
        # video mode
        recognize_from_video(detector)
    else:
        # image mode
        recognize_from_image(detector)
def main():
    # model files check and download
    logger.info('Check vehicle-attributes-recognition model...')
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)
    if args.video or args.detection:
        logger.info('Check object detection model...')
        check_and_download_models(DT_WEIGHT_PATH, DT_MODEL_PATH,
                                  DT_REMOTE_PATH)

    env_id = args.env_id

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    if args.video or args.detection:
        detector = ailia.Detector(
            DT_MODEL_PATH,
            DT_WEIGHT_PATH,
            len(COCO_CATEGORY),
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
            env_id=env_id,
        )
    else:
        detector = None

    if args.video:
        # video mode
        recognize_from_video(net, detector)
    else:
        # image mode
        recognize_from_image(net, detector)
Beispiel #3
0
def main():
    # model files check and download
    logger.info('=== GMM model ===')
    check_and_download_models(WEIGHT_GMM_PATH, MODEL_GMM_PATH, REMOTE_PATH)
    logger.info('=== TOM model ===')
    check_and_download_models(WEIGHT_TOM_PATH, MODEL_TOM_PATH, REMOTE_PATH)
    if args.video or not args.keypoints:
        logger.info('=== detector model ===')
        check_and_download_models(WEIGHT_YOLOV3_PATH, MODEL_YOLOV3_PATH,
                                  REMOTE_YOLOV3_PATH)
        logger.info('=== pose model ===')
        check_and_download_models(WEIGHT_POSE_PATH, MODEL_POSE_PATH,
                                  REMOTE_POSE_PATH)
    if args.video or not args.parse:
        logger.info('=== human segmentation model ===')
        check_and_download_models(WEIGHT_SEG_PATH, MODEL_SEG_PATH,
                                  REMOTE_SEG_PATH)

    # initialize
    if args.onnx:
        import onnxruntime
        GMM_net = onnxruntime.InferenceSession(WEIGHT_GMM_PATH)
        TOM_net = onnxruntime.InferenceSession(WEIGHT_TOM_PATH)
    else:
        GMM_net = ailia.Net(MODEL_GMM_PATH,
                            WEIGHT_GMM_PATH,
                            env_id=args.env_id)
        TOM_net = ailia.Net(MODEL_TOM_PATH,
                            WEIGHT_TOM_PATH,
                            env_id=args.env_id)

    if args.video or not args.keypoints:
        det_net = ailia.Detector(
            MODEL_YOLOV3_PATH,
            WEIGHT_YOLOV3_PATH,
            80,
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
            env_id=args.env_id,
        )
        pose_net = ailia.Net(MODEL_POSE_PATH,
                             WEIGHT_POSE_PATH,
                             env_id=args.env_id)
    else:
        det_net = pose_net = None
    if args.video or not args.parse:
        seg_net = ailia.Net(MODEL_SEG_PATH,
                            WEIGHT_SEG_PATH,
                            env_id=args.env_id)
    else:
        seg_net = None

    if args.video is not None:
        # video mode
        recognize_from_video(GMM_net, TOM_net, det_net, pose_net, seg_net)
    else:
        # image mode
        recognize_from_image(GMM_net, TOM_net, det_net, pose_net, seg_net)
Beispiel #4
0
def recognize_from_image():
    # prepare input data
    img = load_image(args.input)
    print(f'input image shape: {img.shape}')

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_S_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2,
        env_id=env_id
    )
    detector.set_anchors(ANCHORS)

    # compute execution time
    for i in range(5):
        start = int(round(time.time() * 1000))
        detector.compute(img, THRESHOLD, IOU)
        end = int(round(time.time() * 1000))
        print(f'ailia processing time {end - start} ms')

    # plot result
    res_img = plot_results(detector, img, COCO_CATEGORY)
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
Beispiel #5
0
def recognize_from_image():
    # prepare input data
    img = load_image(args.input)
    print(f'input image shape: {img.shape}')

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Detector(MODEL_PATH,
                              WEIGHT_PATH,
                              len(FACE_CATEGORY),
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                              algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                              env_id=env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            detector.compute(img, THRESHOLD, IOU)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        detector.compute(img, THRESHOLD, IOU)

    # plot result
    res_img = plot_results(detector, img, FACE_CATEGORY)
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
def recognize_from_image():
    # prepare input data
    img = load_image(args.input)
    print(f'input image shape: {img.shape}')

    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )
    if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
        detector.set_input_shape(args.detection_width, args.detection_height)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            detector.compute(img, args.threshold, args.iou)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        detector.compute(img, args.threshold, args.iou)

    # plot result
    res_img = plot_results(detector, img, COCO_CATEGORY)
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
Beispiel #7
0
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    env_id = args.env_id
    if args.detector:
        detector = ailia.Detector(MODEL_PATH,
                                  WEIGHT_PATH,
                                  len(COCO_CATEGORY),
                                  format=ailia.NETWORK_IMAGE_FORMAT_BGR,
                                  channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                                  range=ailia.NETWORK_IMAGE_RANGE_U_INT8,
                                  algorithm=ailia.DETECTOR_ALGORITHM_YOLOX,
                                  env_id=env_id)
        if args.detection_width != -1 or args.detection_height != -1:
            detector.set_input_shape(args.detection_width,
                                     args.detection_height)
    else:
        detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
        if args.detection_width != -1 or args.detection_height != -1:
            global WIDTH, HEIGHT
            WIDTH = args.detection_width
            HEIGHT = args.detection_height
            detector.set_input_shape((1, 3, HEIGHT, WIDTH))

    if args.video is not None:
        # video mode
        recognize_from_video(detector)
    else:
        # image mode
        recognize_from_image(detector)
Beispiel #8
0
def recognize_from_video():
    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )
    if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
        detector.set_input_shape(
            args.detection_width, args.detection_height
        )

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    if args.write_prediction:
        frame_count = 0
        frame_digit = int(math.log10(capture.get(cv2.CAP_PROP_FRAME_COUNT)) + 1)
        video_name = os.path.splitext(os.path.basename(args.video))[0]

    while(True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        detector.compute(img, args.threshold, args.iou)
        res_img = plot_results(detector, frame, COCO_CATEGORY, False)
        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

        # write prediction
        if args.write_prediction:
            savepath = get_savepath(args.savepath, video_name, post_fix = '_%s' % (str(frame_count).zfill(frame_digit) + '_res'), ext='.png')
            pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
            write_predictions(pred_file, detector, frame, COCO_CATEGORY)
            frame_count += 1

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
Beispiel #9
0
def recognize_from_image():
    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )
    if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
        detector.set_input_shape(
            args.detection_width, args.detection_height
        )
    if args.profile:
        detector.set_profile_mode(True)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        img = load_image(image_path)
        logger.debug(f'input image shape: {img.shape}')

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            total_time = 0
            for i in range(args.benchmark_count):
                start = int(round(time.time() * 1000))
                detector.compute(img, args.threshold, args.iou)
                end = int(round(time.time() * 1000))
                if i != 0:
                    total_time = total_time + (end - start)
                logger.info(f'\tailia processing time {end - start} ms')
            logger.info(f'\taverage time {total_time / (args.benchmark_count-1)} ms')
        else:
            detector.compute(img, args.threshold, args.iou)

        # plot result
        res_img = plot_results(detector, img, COCO_CATEGORY)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)

        # write prediction
        if args.write_prediction:
            pred_file = '%s.txt' % savepath.rsplit('.', 1)[0]
            write_predictions(pred_file, detector, img, COCO_CATEGORY)

    if args.profile:
        print(detector.get_summary())

    logger.info('Script finished successfully.')
Beispiel #10
0
def recognize_from_video():
    # net initialize
    if args.detector == True:
        detector = ailia.Detector(
            MODEL_PATH,
            WEIGHT_PATH,
            len(CATEGORY),
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_S_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2,
            env_id=args.env_id,
        )
        detector.set_anchors(ANCHORS)
    else:
        net = ailia.Net(None, WEIGHT_PATH)

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break
        if args.detector:
            img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
            detector.compute(img, THRESHOLD, IOU)
            res_img = plot_results(detector, frame, CATEGORY, False)
        else:
            img_PIL = Image.fromarray(frame)
            img = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            img = cv2.resize(img, (416, 416))
            img = img.transpose((2, 0, 1)) / 255
            img = img[np.newaxis, :, :, :].astype(np.float32)
            results = net.run([img])
            results = torch.FloatTensor(results[0])
            output_img = detect(img_PIL, results, video=True)
            res_img = np.array(output_img, dtype=np.uint8)

        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
Beispiel #11
0
def recognize_from_video():
    # net initialize
    detector = ailia.Detector(MODEL_PATH,
                              WEIGHT_PATH,
                              len(FACE_CATEGORY),
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=RANGE,
                              algorithm=ALGORITHM,
                              env_id=args.env_id)

    capture = webcamera_utils.get_capture(args.video)

    if args.savepath != SAVE_IMAGE_PATH:
        writer = webcamera_utils.get_writer(
            args.savepath,
            IMAGE_HEIGHT,
            IMAGE_WIDTH,
            fps=capture.get(cv2.CAP_PROP_FPS),
        )
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        _, resized_img = webcamera_utils.adjust_frame_size(
            frame, IMAGE_HEIGHT, IMAGE_WIDTH)

        img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA)
        detector.compute(img, THRESHOLD, IOU)

        detections = []
        for idx in range(detector.get_object_count()):
            obj = detector.get_object(idx)
            detections.append(obj)
        detections = nms_between_categories(detections,
                                            frame.shape[1],
                                            frame.shape[0],
                                            categories=[0, 1],
                                            iou_threshold=IOU)

        res_img = plot_results(detections, resized_img, FACE_CATEGORY, False)
        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
Beispiel #12
0
def init_detector(env_id):
    detector = ailia.Detector(DT_MODEL_PATH,
                              DT_WEIGHT_PATH,
                              len(COCO_CATEGORY),
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                              algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                              env_id=env_id)
    return detector
Beispiel #13
0
def recognize_from_image():
    # net initialize
    categories = 80
    threshold = 0.4
    iou = 0.45
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        categories,
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_SSD,
        env_id=args.env_id,
    )
    if args.profile:
        detector.set_profile_mode(True)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        org_img = load_image(
            image_path,
            (IMAGE_HEIGHT, IMAGE_WIDTH),
            normalize_type='None',
        )
        if org_img.shape[2] == 3:
            org_img = cv2.cvtColor(org_img, cv2.COLOR_RGB2BGRA)

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                detector.compute(org_img, threshold, iou)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            detector.compute(org_img, threshold, iou)

        # postprocessing
        res_img = plot_results(detector, org_img, VOC_CATEGORY)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)

    if args.profile:
        print(detector.get_summary())

    logger.info('Script finished successfully.')
Beispiel #14
0
def main():
    # model files check and download
    check_and_download_models(WEIGHT_PATH, MODEL_PATH, REMOTE_PATH)

    # check folder existing
    if not os.path.exists(args.input):
        print("error : directory not found "+args.input)
        sys.exit(1)
    if not os.path.exists(args.output):
        os.mkdir(args.output)

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    if args.arch == 'blazeface':
        detector = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=env_id)
    else:
        detector = ailia.Detector(
            MODEL_PATH,
            WEIGHT_PATH,
            len(FACE_CATEGORY),
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
            env_id=env_id
        )

    # process images
    no = 0
    for src_dir, dirs, files in os.walk(args.input):
        files = sorted(files)
        for file_ in files:
            root, ext = os.path.splitext(file_)

            if file_==".DS_Store":
                continue
            if file_=="Thumbs.db":
                continue
            if not(ext == ".jpg" or ext == ".png" or ext == ".bmp"):
                continue

            print(src_dir+"/"+file_)
            folders=src_dir.split("/")
            folder=folders[len(folders)-1]
            dst_dir = args.output+"/"+folder
            if not os.path.exists(dst_dir):
                os.mkdir(dst_dir)
            dst_path= dst_dir+ "/"+str(no)+".jpg"
            recognize_from_image(detector,dst_path,src_dir,file_)
            no=no+1
Beispiel #15
0
def recognize_from_image():
    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )

    pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=args.env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        img = load_image(image_path)
        logger.debug(f'input image shape: {img.shape}')

        # inference
        logger.info('Start inference...')
        detector.compute(img, THRESHOLD, IOU)

        # pose estimation
        if args.benchmark:
            logger.info('BENCHMARK mode')
            total_time = 0
            for i in range(args.benchmark_count):
                start = int(round(time.time() * 1000))
                pose_detections = pose_estimation(detector, pose, img)
                end = int(round(time.time() * 1000))
                logger.info(
                    f'\tailia processing detection time {end - start} ms')
                if i != 0:
                    total_time = total_time + (end - start)
            logger.info(
                f'\taverage detection time {total_time / (args.benchmark_count-1)} ms'
            )
        else:
            pose_detections = pose_estimation(detector, pose, img)

        # plot result
        res_img = plot_results(detector, pose, img, COCO_CATEGORY,
                               pose_detections)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)
    logger.info('Script finished successfully.')
Beispiel #16
0
def recognize_from_video():
    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(FACE_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=RANGE,
        algorithm=ALGORITHM,
        env_id=env_id
    )

    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    while(True):
        ret, frame = capture.read()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if not ret:
            continue

        _, resized_img = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH)

        img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA)
        detector.compute(img, THRESHOLD, IOU)

        detections = []
        for idx in range(detector.get_object_count()):
            obj = detector.get_object(idx)
            detections.append(obj)
        detections=nms_between_categories(detections,frame.shape[1],frame.shape[0],categories=[0,1],iou_threshold=IOU)

        res_img = plot_results(detections, resized_img, FACE_CATEGORY, False)
        cv2.imshow('frame', res_img)

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
Beispiel #17
0
def recognize_from_image():
    # net initialize
    detector = ailia.Detector(MODEL_PATH,
                              WEIGHT_PATH,
                              len(FACE_CATEGORY),
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=RANGE,
                              algorithm=ALGORITHM,
                              env_id=args.env_id)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        img = load_image(image_path)
        logger.debug(f'input image shape: {img.shape}')

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                detector.compute(img, THRESHOLD, IOU)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            detector.compute(img, THRESHOLD, IOU)

        # nms
        detections = []
        for idx in range(detector.get_object_count()):
            obj = detector.get_object(idx)
            detections.append(obj)
        detections = nms_between_categories(
            detections,
            img.shape[1],
            img.shape[0],
            categories=[0, 1],
            iou_threshold=IOU,
        )

        # plot result
        res_img = plot_results(detections, img, FACE_CATEGORY)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)
    logger.info('Script finished successfully.')
Beispiel #18
0
    def __init__(self):
        check_and_download_models(WEIGHT_PATH_YOLO, MODEL_PATH_YOLO,
                                  REMOTE_PATH_YOLO)

        # net initialize
        env_id = ailia.get_gpu_environment_id()
        self.detector = detector = ailia.Detector(
            MODEL_PATH_YOLO,
            WEIGHT_PATH_YOLO,
            len(FACE_CATEGORY),
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
            env_id=env_id)
Beispiel #19
0
def recognize_from_video():
    # net initialize
    categories = 80
    threshold = 0.4
    iou = 0.45
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        categories,
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_SSD,
        env_id=args.env_id,
    )

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        writer = webcamera_utils.get_writer(
            args.savepath, IMAGE_HEIGHT, IMAGE_WIDTH
        )
    else:
        writer = None

    while(True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        _, resized_img = webcamera_utils.adjust_frame_size(
            frame, IMAGE_HEIGHT, IMAGE_WIDTH
        )
        img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2BGRA)
        detector.compute(img, threshold, iou)
        res_img = plot_results(detector, resized_img, VOC_CATEGORY, False)
        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
Beispiel #20
0
def recognize_from_video():
    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )
    if args.detection_width != DETECTION_SIZE or args.detection_height != DETECTION_SIZE:
        detector.set_input_shape(
            args.detection_width, args.detection_height
        )

    capture = webcamera_utils.get_capture(args.video)

    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_IMAGE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        writer = webcamera_utils.get_writer(args.savepath, f_h, f_w)
    else:
        writer = None

    while(True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        detector.compute(img, args.threshold, args.iou)
        res_img = plot_results(detector, frame, COCO_CATEGORY, False)
        cv2.imshow('frame', res_img)

        # save results
        if writer is not None:
            writer.write(res_img)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    logger.info('Script finished successfully.')
Beispiel #21
0
def recognize_from_image():
    # net initialize
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_S_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2,
        env_id=args.env_id,
    )
    detector.set_anchors(ANCHORS)
    if args.profile:
        detector.set_profile_mode(True)

    # input image loop
    for image_path in args.input:
        # prepare input data
        logger.info(image_path)
        img = load_image(image_path)
        logger.debug(f'input image shape: {img.shape}')

        # inference
        logger.info('Start inference...')
        if args.benchmark:
            logger.info('BENCHMARK mode')
            for i in range(5):
                start = int(round(time.time() * 1000))
                detector.compute(img, THRESHOLD, IOU)
                end = int(round(time.time() * 1000))
                logger.info(f'\tailia processing time {end - start} ms')
        else:
            detector.compute(img, THRESHOLD, IOU)

        # plot result
        res_img = plot_results(detector, img, COCO_CATEGORY)
        savepath = get_savepath(args.savepath, image_path)
        logger.info(f'saved at : {savepath}')
        cv2.imwrite(savepath, res_img)

    if args.profile:
        print(detector.get_summary())

    logger.info('Script finished successfully.')
Beispiel #22
0
def recognize_from_video():
    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Detector(
        MODEL_PATH,
        WEIGHT_PATH,
        len(COCO_CATEGORY),
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_S_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV2,
        env_id=env_id
    )
    detector.set_anchors(ANCHORS)

    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    while(True):
        ret, frame = capture.read()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if not ret:
            continue

        _, resized_img = adjust_frame_size(frame, IMAGE_HEIGHT, IMAGE_WIDTH)

        img = cv2.cvtColor(resized_img, cv2.COLOR_RGB2BGRA)
        detector.compute(img, THRESHOLD, IOU)
        res_img = plot_results(detector, resized_img, COCO_CATEGORY, False)
        cv2.imshow('frame', res_img)

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
Beispiel #23
0
def main():
    # model files check and download
    detector = True

    if detector:
        logger.info('=== detector model ===')
        check_and_download_models(WEIGHT_YOLOV3_PATH, MODEL_YOLOV3_PATH,
                                  REMOTE_YOLOV3_PATH)
    logger.info('=== animalpose model ===')
    info = {
        'hrnet32': (WEIGHT_HRNET_W32_PATH, MODEL_HRNET_W32_PATH),
        'hrnet48': (WEIGHT_HRNET_W48_PATH, MODEL_HRNET_W48_PATH),
        'res50': (WEIGHT_RESNET_50_PATH, MODEL_RESNET_50_PATH),
        'res101': (WEIGHT_RESNET_101_PATH, MODEL_RESNET_101_PATH),
        'res152': (WEIGHT_RESNET_152_PATH, MODEL_RESNET_152_PATH),
    }
    weight_path, model_path = info[args.model]
    check_and_download_models(weight_path, model_path, REMOTE_PATH)

    env_id = args.env_id

    # initialize
    if detector:
        det_net = ailia.Detector(
            MODEL_YOLOV3_PATH,
            WEIGHT_YOLOV3_PATH,
            80,
            format=ailia.NETWORK_IMAGE_FORMAT_RGB,
            channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
            range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
            algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
            env_id=env_id,
        )
    else:
        det_net = None
    net = ailia.Net(model_path, weight_path, env_id=env_id)

    if args.video is not None:
        # video mode
        recognize_from_video(net, det_net)
    else:
        # image mode
        recognize_from_image(net, det_net)
Beispiel #24
0
def recognize_from_image():
    # prepare input data
    org_img = load_image(
        args.input,
        (IMAGE_HEIGHT, IMAGE_WIDTH),
        normalize_type='None',
    )
    if org_img.shape[2] == 3:
        org_img = cv2.cvtColor(org_img, cv2.COLOR_BGR2BGRA)

    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    categories = 80
    threshold = 0.4
    iou = 0.45
    detector = ailia.Detector(MODEL_PATH,
                              WEIGHT_PATH,
                              categories,
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                              algorithm=ailia.DETECTOR_ALGORITHM_SSD,
                              env_id=env_id)

    # inference
    print('Start inference...')
    if args.benchmark:
        print('BENCHMARK mode')
        for i in range(5):
            start = int(round(time.time() * 1000))
            detector.compute(org_img, threshold, iou)
            end = int(round(time.time() * 1000))
            print(f'\tailia processing time {end - start} ms')
    else:
        detector.compute(org_img, threshold, iou)

    # postprocessing
    res_img = plot_results(detector, org_img, VOC_CATEGORY)
    cv2.imwrite(args.savepath, res_img)
    print('Script finished successfully.')
Beispiel #25
0
def main():
    # model files check and download
    logger.info("=== YOLOv3 model ===")
    check_and_download_models(WEIGHT_YOLOV3_PATH, MODEL_YOLOV3_PATH,
                              REMOTE_YOLOV3_PATH)
    logger.info("=== HRNet model ===")
    check_and_download_models(WEIGHT_POSE_PATH, MODEL_POSE_PATH, REMOTE_PATH)
    logger.info("=== GAST model ===")
    check_and_download_models(WEIGHT_27FRAME_17JOINT_PATH,
                              MODEL_27FRAME_17JOINT_PATH, REMOTE_PATH)

    num_person = args.num_person

    # net initialize
    detector = ailia.Detector(
        MODEL_YOLOV3_PATH,
        WEIGHT_YOLOV3_PATH,
        80,
        format=ailia.NETWORK_IMAGE_FORMAT_RGB,
        channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
        range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
        algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
        env_id=args.env_id,
    )
    pose_net = ailia.Net(MODEL_POSE_PATH, WEIGHT_POSE_PATH, env_id=args.env_id)

    if not args.onnx:
        net = ailia.Net(MODEL_27FRAME_17JOINT_PATH,
                        WEIGHT_27FRAME_17JOINT_PATH,
                        env_id=args.env_id)
    else:
        import onnxruntime
        net = onnxruntime.InferenceSession(WEIGHT_27FRAME_17JOINT_PATH)

    info = {
        "yolo_model": detector,
        "pose_model": pose_net,
        "num_person": num_person,
    }
    recognize_from_video(net, info)
Beispiel #26
0
def compare_video():
    # prepare base image
    tracks = []

    # net initialize
    net = ailia.Net(MODEL_PATH, WEIGHT_PATH, env_id=args.env_id)

    # detector initialize
    if args.face == "blazeface":
        detector = ailia.Net(FACE_MODEL_PATH,
                             FACE_WEIGHT_PATH,
                             env_id=args.env_id)
    else:
        detector = ailia.Detector(FACE_MODEL_PATH,
                                  FACE_WEIGHT_PATH,
                                  len(FACE_CATEGORY),
                                  format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                                  channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                                  range=FACE_RANGE,
                                  algorithm=FACE_ALGORITHM,
                                  env_id=args.env_id)

    # web camera
    capture = webcamera_utils.get_capture(args.video)

    # ui buffer
    ui_width = capture.get(
        cv2.CAP_PROP_FRAME_WIDTH) + IMAGE_WIDTH / 4 * FACE_TRACK_T
    ui_height = max(capture.get(cv2.CAP_PROP_FRAME_HEIGHT),
                    IMAGE_HEIGHT / 4 * 2 * 8)
    ui = np.zeros((int(ui_height), int(ui_width), 3), np.uint8)
    frame_no = 0

    # writer
    writer = None
    if args.savepath is not None:
        writer = webcamera_utils.get_writer(
            args.savepath,
            ui.shape[0],
            ui.shape[1],
            fps=capture.get(cv2.CAP_PROP_FPS),
        )

    # inference loop
    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        # get frame size
        h, w = frame.shape[0], frame.shape[1]

        # get faces from image
        detections = get_faces(detector, frame, w, h)

        # track face
        face_identification(tracks, net, detections, frame_no)
        frame_no = frame_no + 1

        # display result
        ui[:, :, :] = 0
        ui[0:h, 0:w, :] = frame[:, :, :]
        display_detections(ui, w, h, detections)
        display_tracks(ui, w, h, tracks)

        # show
        cv2.imshow('arcface', ui)

        if writer is not None:
            writer.write(ui)

    if writer is not None:
        writer.release()

    capture.release()
    cv2.destroyAllWindows()
    logger.info('Script finished successfully.')
def recognize_from_video():
    try:
        print('[INFO] Webcam mode is activated')
        RECORD_TIME = 80
        capture = cv2.VideoCapture(int(args.video))
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    except ValueError:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    frame_rate = capture.get(cv2.CAP_PROP_FPS)
    if FRAME_SKIP:
        action_recognize_fps = int(args.fps)
    else:
        action_recognize_fps = frame_rate

    if args.savepath != "":
        size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)),
                int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
        fmt = cv2.VideoWriter_fourcc('m', 'p', '4', 'v')
        writer = cv2.VideoWriter(args.savepath, fmt, action_recognize_fps,
                                 size)
    else:
        writer = None

    # pose estimation
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    if args.arch == "lw_human_pose":
        pose = ailia.PoseEstimator(MODEL_PATH,
                                   WEIGHT_PATH,
                                   env_id=env_id,
                                   algorithm=ALGORITHM)

        detector = None
    else:
        detector = ailia.Detector(DETECTOR_MODEL_PATH,
                                  DETECTOR_WEIGHT_PATH,
                                  len(COCO_CATEGORY),
                                  format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                                  channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                                  range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                                  algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                                  env_id=env_id)

        pose = ailia.Net(POSE_MODEL_PATH, POSE_WEIGHT_PATH, env_id=env_id)

    # tracker class instance
    extractor = ailia.Net(EX_MODEL_PATH, EX_WEIGHT_PATH, env_id=env_id)
    metric = NearestNeighborDistanceMetric("cosine", MAX_COSINE_DISTANCE,
                                           NN_BUDGET)
    tracker = Tracker(metric, max_iou_distance=0.7, max_age=70, n_init=3)

    # action recognition
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    model = ailia.Net(ACTION_MODEL_PATH, ACTION_WEIGHT_PATH, env_id=env_id)

    action_data = {}

    frame_nb = int(capture.get(cv2.CAP_PROP_FRAME_COUNT))
    idx_frame = 0

    time_start = time.time()
    while (True):
        time_curr = time.time()
        if args.video == '0' and time_curr - time_start > RECORD_TIME:
            break
        ret, frame = capture.read()

        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if (not ret) or (frame_nb >= 1 and idx_frame >= frame_nb):
            break

        if FRAME_SKIP:
            mod = round(frame_rate / action_recognize_fps)
            if mod >= 1:
                if idx_frame % mod != 0:
                    idx_frame = idx_frame + 1
                    continue

        input_image, input_data = adjust_frame_size(
            frame,
            frame.shape[0],
            frame.shape[1],
        )
        input_data = cv2.cvtColor(input_data, cv2.COLOR_BGR2BGRA)

        # inferece
        if args.arch == "lw_human_pose":
            _ = pose.compute(input_data)
        else:
            detector.compute(input_data, THRESHOLD, IOU)

        # deepsort format
        h, w = input_image.shape[0], input_image.shape[1]
        if args.arch == "lw_human_pose":
            bbox_xywh, cls_conf, cls_ids = get_detector_result_lw_human_pose(
                pose, h, w)
        else:
            bbox_xywh, cls_conf, cls_ids = get_detector_result(detector, h, w)

        mask = cls_ids == 0
        bbox_xywh = bbox_xywh[mask]

        # bbox dilation just in case bbox too small,
        # delete this line if using a better pedestrian detector
        if args.arch == "pose_resnet":
            # bbox_xywh[:, 3:] *= 1.2   #May need to be removed in the future
            cls_conf = cls_conf[mask]

        # do tracking
        img_crops = []
        for box in bbox_xywh:
            x1, y1, x2, y2 = xywh_to_xyxy(box, h, w)
            img_crops.append(input_image[y1:y2, x1:x2])

        if img_crops:
            # preprocess
            img_batch = np.concatenate([
                normalize_image(resize(img), 'ImageNet')[np.newaxis, :, :, :]
                for img in img_crops
            ],
                                       axis=0).transpose(0, 3, 1, 2)

            # TODO better to pass a batch at once
            # features = extractor.predict(img_batch)
            features = []
            for img in img_batch:
                features.append(extractor.predict(img[np.newaxis, :, :, :])[0])
            features = np.array(features)
        else:
            features = np.array([])

        bbox_tlwh = xywh_to_tlwh(bbox_xywh)
        detections = [
            Detection(bbox_tlwh[i], conf, features[i])
            for i, conf in enumerate(cls_conf) if conf > MIN_CONFIDENCE
        ]

        # run on non-maximum supression
        boxes = np.array([d.tlwh for d in detections])
        scores = np.array([d.confidence for d in detections])
        nms_max_overlap = 1.0
        indices = non_max_suppression(boxes, nms_max_overlap, scores)
        detections = [detections[i] for i in indices]

        # update tracker
        tracker.predict()
        tracker.update(detections)

        # update bbox identities
        outputs = []
        for track in tracker.tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            box = track.to_tlwh()
            x1, y1, x2, y2 = tlwh_to_xyxy(box, h, w)
            track_id = track.track_id
            outputs.append(np.array([x1, y1, x2, y2, track_id], dtype=np.int))
        if len(outputs) > 0:
            outputs = np.stack(outputs, axis=0)

        # action detection
        actions = []
        persons = []
        if len(outputs) > 0:
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            for i, box in enumerate(bbox_xyxy):
                id = identities[i]

                if not (id in action_data):
                    action_data[id] = np.zeros(
                        (ailia.POSE_KEYPOINT_CNT - 1, TIME_RANGE, 3))

                # action recognition
                action, person = action_recognition(box, input_image, pose,
                                                    detector, model,
                                                    action_data[id])
                actions.append(action)
                persons.append(person)

        # draw box for visualization
        if len(outputs) > 0:
            bbox_tlwh = []
            bbox_xyxy = outputs[:, :4]
            identities = outputs[:, -1]
            frame = draw_boxes(input_image, bbox_xyxy, identities, actions,
                               action_data, (0, 0))

            for bb_xyxy in bbox_xyxy:
                bbox_tlwh.append(xyxy_to_tlwh(bb_xyxy))

        # draw skelton
        for person in persons:
            if person != None:
                display_result(input_image, person)

        if writer is not None:
            writer.write(input_image)

            # show progress
            if idx_frame == "0":
                print()
            print("\r" + str(idx_frame + 1) + " / " + str(frame_nb), end="")
            if idx_frame == frame_nb - 1:
                print()

        cv2.imshow('frame', input_image)

        idx_frame = idx_frame + 1

    if writer is not None:
        writer.release()

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
def recognize_from_video():
    # net initialize
    env_id = ailia.get_gpu_environment_id()
    if args.env_id is not None:
        count = ailia.get_environment_count()
        if count > args.env_id:
            env_id = args.env_id
        else:
            print(f'specified env_id: {args.env_id} cannot found error')
    print(f'env_id: {env_id}')

    detector = ailia.Detector(MODEL_PATH,
                              WEIGHT_PATH,
                              len(HAND_CATEGORY),
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                              algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                              env_id=env_id)

    hand = ailia.PoseEstimator(HAND_MODEL_PATH,
                               HAND_WEIGHT_PATH,
                               env_id=env_id,
                               algorithm=HAND_ALGORITHM)
    hand.set_threshold(0.1)

    ailia_input_w = detector.get_input_shape()[3]
    ailia_input_h = detector.get_input_shape()[2]

    capture = get_capture(args.video)
    # create video writer if savepath is specified as video format
    if args.savepath != SAVE_PATH:
        f_h = int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT))
        f_w = int(capture.get(cv2.CAP_PROP_FRAME_WIDTH))
        save_h, save_w = calc_adjust_fsize(f_h, f_w, ailia_input_h,
                                           ailia_input_w)
        writer = get_writer(args.savepath, save_h, save_w)
    else:
        writer = None

    while (True):
        ret, frame = capture.read()
        if (cv2.waitKey(1) & 0xFF == ord('q')) or not ret:
            break

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        detector.compute(img, THRESHOLD, IOU)

        h, w = img.shape[0], img.shape[1]
        count = detector.get_object_count()
        for idx in range(count):
            # get detected hand
            obj = detector.get_object(idx)
            margin = 1.0
            cx = (obj.x + obj.w / 2) * w
            cy = (obj.y + obj.h / 2) * h
            cw = max(obj.w * w, obj.h * h) * margin
            fx = max(cx - cw / 2, 0)
            fy = max(cy - cw / 2, 0)
            fw = min(cw, w - fx)
            fh = min(cw, h - fy)
            top_left = (int(fx), int(fy))
            bottom_right = (int(fx + fw), int(fy + fh))

            # display detected hand
            color = hsv_to_rgb(0, 255, 255)
            cv2.rectangle(frame, top_left, bottom_right, color, 4)

            # get detected face
            crop_img = img[top_left[1]:bottom_right[1],
                           top_left[0]:bottom_right[0], 0:4]
            if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
                continue

            # inference
            _ = hand.compute(crop_img.astype(np.uint8, order='C'))

            # postprocessing
            display_result(frame, hand, top_left, bottom_right)

        cv2.imshow('frame', frame)

        # save results
        if writer is not None:
            writer.write(frame)

    capture.release()
    cv2.destroyAllWindows()
    if writer is not None:
        writer.release()
    print('Script finished successfully.')
Beispiel #29
0
def recognize_from_video():
    # net initialize
    env_id = ailia.get_gpu_environment_id()
    print(f'env_id: {env_id}')
    detector = ailia.Detector(MODEL_PATH,
                              WEIGHT_PATH,
                              len(HAND_CATEGORY),
                              format=ailia.NETWORK_IMAGE_FORMAT_RGB,
                              channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST,
                              range=ailia.NETWORK_IMAGE_RANGE_U_FP32,
                              algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3,
                              env_id=env_id)

    hand = ailia.PoseEstimator(HAND_MODEL_PATH,
                               HAND_WEIGHT_PATH,
                               env_id=env_id,
                               algorithm=HAND_ALGORITHM)
    hand.set_threshold(0.1)

    if args.video == '0':
        print('[INFO] Webcam mode is activated')
        capture = cv2.VideoCapture(0)
        if not capture.isOpened():
            print("[ERROR] webcamera not found")
            sys.exit(1)
    else:
        if check_file_existance(args.video):
            capture = cv2.VideoCapture(args.video)

    while (True):
        ret, frame = capture.read()
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break
        if not ret:
            continue

        img = cv2.cvtColor(frame, cv2.COLOR_BGR2BGRA)
        detector.compute(img, THRESHOLD, IOU)

        h, w = img.shape[0], img.shape[1]
        count = detector.get_object_count()
        for idx in range(count):
            # get detected hand
            obj = detector.get_object(idx)
            margin = 1.0
            cx = (obj.x + obj.w / 2) * w
            cy = (obj.y + obj.h / 2) * h
            cw = max(obj.w * w, obj.h * h) * margin
            fx = max(cx - cw / 2, 0)
            fy = max(cy - cw / 2, 0)
            fw = min(cw, w - fx)
            fh = min(cw, h - fy)
            top_left = (int(fx), int(fy))
            bottom_right = (int(fx + fw), int(fy + fh))

            # display detected hand
            color = hsv_to_rgb(0, 255, 255)
            cv2.rectangle(frame, top_left, bottom_right, color, 4)

            # get detected face
            crop_img = img[top_left[1]:bottom_right[1],
                           top_left[0]:bottom_right[0], 0:4]
            if crop_img.shape[0] <= 0 or crop_img.shape[1] <= 0:
                continue

            # inferece
            _ = hand.compute(crop_img.astype(np.uint8, order='C'))

            # postprocessing
            display_result(frame, hand, top_left, bottom_right)

        cv2.imshow('frame', frame)

    capture.release()
    cv2.destroyAllWindows()
    print('Script finished successfully.')
Beispiel #30
0
model_path = sys.argv[1]+".prototxt"
weight_path = sys.argv[1]
classes_path = sys.argv[2]
img_path = sys.argv[3]

with open(classes_path) as f:
    class_names = f.readlines()
class_names = [c.strip() for c in class_names]

print("loading ...");

# detector initialize
env_id = ailia.get_gpu_environment_id()
categories = len(class_names)
detector = ailia.Detector(model_path, weight_path, categories, format=ailia.NETWORK_IMAGE_FORMAT_RGB, channel=ailia.NETWORK_IMAGE_CHANNEL_FIRST, range=ailia.NETWORK_IMAGE_RANGE_U_FP32, algorithm=ailia.DETECTOR_ALGORITHM_YOLOV3, env_id=env_id)

# load input image and convert to BGRA
img = cv2.imread( img_path, cv2.IMREAD_UNCHANGED )
if img.shape[2] == 3 :
    img = cv2.cvtColor( img, cv2.COLOR_BGR2BGRA )
elif img.shape[2] == 1 : 
    img = cv2.cvtColor( img, cv2.COLOR_GRAY2BGRA )

print( "img.shape=" + str(img.shape) )

work = img
w = img.shape[1]
h = img.shape[0]

print("inferencing ...");