Esempio n. 1
0
def test_get_form(src, expected, expected_get_fields):
    """Check if we can read out form data."""
    src = os.path.join(RESOURCE_ROOT, src)
    reader = PdfReader(src)
    fields = reader.get_form_text_fields()
    assert fields == expected

    with open("tmp-fields-report.txt", "w") as f:
        fields = reader.get_fields(fileobj=f)
    assert fields == expected_get_fields
    if fields:
        for field in fields.values():
            # Just access the attributes
            [
                field.field_type,
                field.parent,
                field.kids,
                field.name,
                field.alternate_name,
                field.mapping_name,
                field.flags,
                field.value,
                field.default_value,
                field.additional_actions,
            ]

    # cleanup
    os.remove("tmp-fields-report.txt")
Esempio n. 2
0
def test_get_fields():
    url = "https://corpora.tika.apache.org/base/docs/govdocs1/972/972486.pdf"
    name = "tika-972486.pdf"
    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
    fields = reader.get_fields()
    assert fields is not None
    assert "c1-1" in fields
    assert dict(fields["c1-1"]) == ({"/FT": "/Btn", "/T": "c1-1"})
Esempio n. 3
0
def test_get_fields_read_write_report():
    url = "https://corpora.tika.apache.org/base/docs/govdocs1/909/909655.pdf"
    name = "tika-909655.pdf"
    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
    with open("tmp-fields-report.txt", "w") as fp:
        fields = reader.get_fields(fileobj=fp)
    assert fields

    # cleanup
    os.remove("tmp-fields-report.txt")
Esempio n. 4
0
def test_get_fields():
    url = "https://corpora.tika.apache.org/base/docs/govdocs1/961/961883.pdf"
    name = "tika-961883.pdf"
    data = BytesIO(get_pdf_from_url(url, name=name))
    reader = PdfReader(data)
    with open("tmp.txt", "w") as fp:
        with pytest.warns(PdfReadWarning, match="Object 2 0 not defined."):
            retrieved_fields = reader.get_fields(fileobj=fp)

    assert retrieved_fields == {}

    # Cleanup
    os.remove("tmp.txt")
Esempio n. 5
0
def test_get_fields_read_else_block2():
    url = "https://corpora.tika.apache.org/base/docs/govdocs1/914/914902.pdf"
    name = "tika-914902.pdf"
    reader = PdfReader(BytesIO(get_pdf_from_url(url, name=name)))
    fields = reader.get_fields()
    assert fields is None