Esempio n. 1
0
def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages,
                      straighten_pages):
    det_bsize = 4
    det_predictor = DetectionPredictor(
        PreProcessor(output_size=(512, 512), batch_size=det_bsize),
        detection.db_mobilenet_v3_large(
            pretrained=False,
            pretrained_backbone=False,
            assume_straight_pages=assume_straight_pages,
        ),
    )

    assert not det_predictor.model.training

    reco_bsize = 32
    reco_predictor = RecognitionPredictor(
        PreProcessor(output_size=(32, 128),
                     batch_size=reco_bsize,
                     preserve_aspect_ratio=True),
        recognition.crnn_vgg16_bn(pretrained=False,
                                  pretrained_backbone=False,
                                  vocab=mock_vocab),
    )

    assert not reco_predictor.model.training

    doc = DocumentFile.from_pdf(mock_pdf)

    predictor = OCRPredictor(
        det_predictor,
        reco_predictor,
        assume_straight_pages=assume_straight_pages,
        straighten_pages=straighten_pages,
        detect_orientation=True,
        detect_language=True,
    )

    if assume_straight_pages:
        assert predictor.crop_orientation_predictor is None
    else:
        assert isinstance(predictor.crop_orientation_predictor, nn.Module)

    out = predictor(doc)
    assert isinstance(out, Document)
    assert len(out.pages) == 2
    # Dimension check
    with pytest.raises(ValueError):
        input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
        _ = predictor([input_page])

    orientation = 0
    assert out.pages[0].orientation["value"] == orientation
def test_preprocessor(batch_size, output_size, input_tensor, expected_batches,
                      expected_value):

    processor = PreProcessor(output_size, batch_size)

    # Invalid input type
    with pytest.raises(TypeError):
        processor(42)
    # 4D check
    with pytest.raises(AssertionError):
        processor(np.full((256, 128, 3), 255, dtype=np.uint8))
    with pytest.raises(TypeError):
        processor(np.full((1, 256, 128, 3), 255, dtype=np.int32))
    # 3D check
    with pytest.raises(AssertionError):
        processor([np.full((3, 256, 128, 3), 255, dtype=np.uint8)])
    with pytest.raises(TypeError):
        processor([np.full((256, 128, 3), 255, dtype=np.int32)])

    out = processor(input_tensor)
    assert isinstance(out, list) and len(out) == expected_batches
    assert all(isinstance(b, tf.Tensor) for b in out)
    assert all(b.dtype == tf.float32 for b in out)
    assert all(b.shape[1:3] == output_size for b in out)
    assert all(tf.math.reduce_all(b == expected_value) for b in out)
    assert len(repr(processor).split("\n")) == 4
Esempio n. 3
0
def test_recognitionpredictor(mock_pdf, mock_vocab):  # noqa: F811

    batch_size = 4
    predictor = RecognitionPredictor(
        PreProcessor(output_size=(32, 128),
                     batch_size=batch_size,
                     preserve_aspect_ratio=True),
        recognition.crnn_vgg16_bn(vocab=mock_vocab, input_shape=(32, 128, 3)),
    )

    pages = DocumentFile.from_pdf(mock_pdf).as_images()
    # Create bounding boxes
    boxes = np.array([[0.5, 0.5, 0.75, 0.75], [0.5, 0.5, 1.0, 1.0]],
                     dtype=np.float32)
    crops = extract_crops(pages[0], boxes)

    out = predictor(crops)

    # One prediction per crop
    assert len(out) == boxes.shape[0]
    assert all(
        isinstance(val, str) and isinstance(conf, float) for val, conf in out)

    # Dimension check
    with pytest.raises(ValueError):
        input_crop = (255 * np.random.rand(1, 128, 64, 3)).astype(np.uint8)
        _ = predictor([input_crop])

    return predictor
Esempio n. 4
0
def test_ocrpredictor(mock_pdf, mock_vocab, assume_straight_pages,
                      straighten_pages):
    det_bsize = 4
    det_predictor = DetectionPredictor(
        PreProcessor(output_size=(512, 512), batch_size=det_bsize),
        detection.db_mobilenet_v3_large(
            pretrained=True,
            pretrained_backbone=False,
            input_shape=(512, 512, 3),
            assume_straight_pages=assume_straight_pages,
        ))

    reco_bsize = 16
    reco_predictor = RecognitionPredictor(
        PreProcessor(output_size=(32, 128),
                     batch_size=reco_bsize,
                     preserve_aspect_ratio=True),
        recognition.crnn_vgg16_bn(pretrained=False,
                                  pretrained_backbone=False,
                                  vocab=mock_vocab))

    doc = DocumentFile.from_pdf(mock_pdf).as_images()

    predictor = OCRPredictor(
        det_predictor,
        reco_predictor,
        assume_straight_pages=assume_straight_pages,
        straighten_pages=straighten_pages,
    )

    if assume_straight_pages:
        assert predictor.crop_orientation_predictor is None
    else:
        assert isinstance(predictor.crop_orientation_predictor, NestedObject)

    out = predictor(doc)
    assert isinstance(out, Document)
    assert len(out.pages) == 2
    # Dimension check
    with pytest.raises(ValueError):
        input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
        _ = predictor([input_page])
Esempio n. 5
0
def _predictor(arch: str, pretrained: bool,
               **kwargs: Any) -> RecognitionPredictor:

    if arch not in ARCHS:
        raise ValueError(f"unknown architecture '{arch}'")

    _model = recognition.__dict__[arch](pretrained=pretrained)
    kwargs['mean'] = kwargs.get('mean', _model.cfg['mean'])
    kwargs['std'] = kwargs.get('std', _model.cfg['std'])
    kwargs['batch_size'] = kwargs.get('batch_size', 32)
    input_shape = _model.cfg['input_shape'][:2] if is_tf_available(
    ) else _model.cfg['input_shape'][-2:]
    predictor = RecognitionPredictor(
        PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs),
        _model)

    return predictor
Esempio n. 6
0
def test_detectionpredictor(mock_pdf):  # noqa: F811

    batch_size = 4
    predictor = DetectionPredictor(
        PreProcessor(output_size=(512, 512), batch_size=batch_size),
        detection.db_resnet50(input_shape=(512, 512, 3)))

    pages = DocumentFile.from_pdf(mock_pdf).as_images()
    out = predictor(pages)
    # The input PDF has 2 pages
    assert len(out) == 2

    # Dimension check
    with pytest.raises(ValueError):
        input_page = (255 * np.random.rand(1, 256, 512, 3)).astype(np.uint8)
        _ = predictor([input_page])

    return predictor
Esempio n. 7
0
def _predictor(arch: Any, pretrained: bool,
               **kwargs: Any) -> RecognitionPredictor:

    if isinstance(arch, str):
        if arch not in ARCHS:
            raise ValueError(f"unknown architecture '{arch}'")

        _model = recognition.__dict__[arch](pretrained=pretrained)
    else:
        if not isinstance(
                arch, (recognition.CRNN, recognition.SAR, recognition.MASTER)):
            raise ValueError(f"unknown architecture: {type(arch)}")
        _model = arch

    kwargs["mean"] = kwargs.get("mean", _model.cfg["mean"])
    kwargs["std"] = kwargs.get("std", _model.cfg["std"])
    kwargs["batch_size"] = kwargs.get("batch_size", 32)
    input_shape = _model.cfg["input_shape"][:2] if is_tf_available(
    ) else _model.cfg["input_shape"][-2:]
    predictor = RecognitionPredictor(
        PreProcessor(input_shape, preserve_aspect_ratio=True, **kwargs),
        _model)

    return predictor