Example #1
0
def test_build_string():
    xml_str = """<String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="Liberté"/>"""
    element = _build_xml(xml_str)
    assert String.from_xml(element) == String(
        id="string_3",
        hpos=712,
        vpos=133,
        width=55,
        height=13,
        confidence=0.92,
        content="Liberté",
        alternatives=[],
    )

    xml_str = """
        <String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="Liberté">
            <Alternative>alt</Alternative>
        </String>
    """
    element = _build_xml(xml_str)
    assert String.from_xml(element) == String(
        id="string_3",
        hpos=712,
        vpos=133,
        width=55,
        height=13,
        confidence=0.92,
        content="Liberté",
        alternatives=[Alternative("alt")],
    )
Example #2
0
def test_build_text_block():
    element = _build_xml("""
        <TextBlock ID="block_1" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="53">
            <TextLine ID="line_2" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="21">
                <String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="abc"/>
                <SP WIDTH="9" VPOS="133" HPOS="767"/>
            </TextLine>
        </TextBlock>
    """)
    strings = [
        String(
            id="string_3",
            hpos=712,
            vpos=133,
            width=55,
            height=13,
            confidence=0.92,
            content="abc",
            alternatives=[],
        ),
        SP(hpos=767, vpos=133, width=9),
    ]
    lines = [
        TextLine(id="line_2",
                 hpos=712,
                 vpos=129,
                 width=235,
                 height=21,
                 strings=strings)
    ]
    assert TextBlock.from_xml(element) == TextBlock("block_1", 53, 235, 712,
                                                    129, lines)
Example #3
0
def test_page_extract_strings():
    assert Page("page_0", 2339, 1654, 0, None, []).extract_strings() == []
    assert Page("page_0", 2339, 1654, 0, None,
                [PrintSpace(1, 1, 1, 1, 1, [])]).extract_strings() == []
    block = ComposedBlock("", 1, 1, 1, 1, [])
    assert Page("page_0", 2339, 1654, 0, None,
                [PrintSpace(1, 1, 1, 1, 1, [block])]).extract_strings() == []
    tb = TextBlock("", 1, 1, 1, 1, [])
    block = ComposedBlock("", 1, 1, 1, 1, [tb])
    assert Page("page_0", 2339, 1654, 0, None,
                [PrintSpace(1, 1, 1, 1, 1, [block])]).extract_strings() == []
    tb = TextBlock(
        "", 1, 1, 1, 1,
        [TextLine("", 1, 1, 1, 1, [String("", 1, 1, 1, 1, "", 0, [])])])
    block = ComposedBlock("", 1, 1, 1, 1, [tb])
    page = Page("page_0", 2339, 1654, 0, None,
                [PrintSpace(1, 1, 1, 1, 1, [block])])
    assert page.extract_strings() == [String("", 1, 1, 1, 1, "", 0, [])]
Example #4
0
def _string(word: str = 'abc') -> String:
    return String(id="string_3",
                  hpos=712,
                  vpos=133,
                  width=55,
                  height=13,
                  confidence=0.92,
                  content=word,
                  alternatives=[])
Example #5
0
def test_build_text_line():
    xml_str = """
    <TextLine ID="line_2" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="21">
        <String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="abc"/>
        <SP WIDTH="9" VPOS="133" HPOS="767"/>
    </TextLine>
    """
    element = _build_xml(xml_str)
    strings = [
        String(
            id="string_3",
            hpos=712,
            vpos=133,
            width=55,
            height=13,
            confidence=0.92,
            content="abc",
            alternatives=[],
        ),
        SP(hpos=767, vpos=133, width=9),
    ]
    assert TextLine.from_xml(element) == TextLine(id="line_2",
                                                  hpos=712,
                                                  vpos=129,
                                                  width=235,
                                                  height=21,
                                                  strings=strings)

    xml_str = """
    <TextLine ID="line_2" HPOS="712" VPOS="129" WIDTH="235" HEIGHT="21">
    </TextLine>
    """
    element = _build_xml(xml_str)
    assert TextLine.from_xml(element) == TextLine(id="line_2",
                                                  hpos=712,
                                                  vpos=129,
                                                  width=235,
                                                  height=21,
                                                  strings=[])
Example #6
0
def test_load_string_or_sp():
    xml_str = """<SP WIDTH="9" VPOS="133" HPOS="767"/>"""
    element = _build_xml(xml_str)
    assert _load_string_or_sp(element) == SP(hpos=767, vpos=133, width=9)

    xml_str = """<String ID="string_3" HPOS="712" VPOS="133" WIDTH="55" HEIGHT="13" WC="0.92" CONTENT="abc"/>"""
    element = _build_xml(xml_str)
    assert _load_string_or_sp(element) == String(
        id="string_3",
        hpos=712,
        vpos=133,
        width=55,
        height=13,
        confidence=0.92,
        content="abc",
        alternatives=[],
    )

    with pytest.raises(ValueError):
        xml_str = """<Alternative>test</Alternative>"""
        element = _build_xml(xml_str)
        _load_string_or_sp(element)