예제 #1
0
def test_line_break_tags():
    html = """a<p>b<br>c<h1>d<h2>e<h3>f<h4>g<h5>h<h6>i<pre>j<address>k<blockquote>l
    <dl>m<div>n<fieldset>o<form>p<hr>q<ol>r<ul>s<li>t
    """
    doc = process_html(html)

    expect_doc = Doc().append_blocks([
        TextParagraph().append_text_segment(TextSegment("a")),
        TextParagraph().append_text_segment(TextSegment("b")),
        TextParagraph().append_text_segment(TextSegment("c")),
        TextParagraph().append_text_segment(TextSegment("d")),
        TextParagraph().append_text_segment(TextSegment("e")),
        TextParagraph().append_text_segment(TextSegment("f")),
        TextParagraph().append_text_segment(TextSegment("g")),
        TextParagraph().append_text_segment(TextSegment("h")),
        TextParagraph().append_text_segment(TextSegment("i")),
        TextParagraph().append_text_segment(TextSegment("j")),
        TextParagraph().append_text_segment(TextSegment("k")),
        TextParagraph().append_text_segment(TextSegment("l")),
        TextParagraph().append_text_segment(TextSegment("m")),
        TextParagraph().append_text_segment(TextSegment("n")),
        TextParagraph().append_text_segment(TextSegment("o")),
        TextParagraph().append_text_segment(TextSegment("p")),
        TextParagraph().append_text_segment(TextSegment("q")),
        TextParagraph().append_text_segment(TextSegment("r")),
        TextParagraph().append_text_segment(TextSegment("s")),
        TextParagraph().append_text_segment(TextSegment("t")),
    ])

    assert doc.to_dict() == expect_doc.to_dict()
예제 #2
0
def _process_text(text: str) -> Doc:
    "Create Dolphin Doc from plain text"
    doc = Doc()
    for line in text.splitlines():
        line = line.strip()
        if line:
            par = TextParagraph().append_text_segment(TextSegment(line))
            doc.append_block(par)
    return doc
예제 #3
0
def test_ignore_tags():
    html = """a<style>b</style><script>c</script><noscript>d</noscript>e"""
    doc = process_html(html)

    expect_doc = Doc().append_blocks([
        TextParagraph().append_text_segment(TextSegment("ae")),
    ])

    assert doc.to_dict() == expect_doc.to_dict()
예제 #4
0
def test_plain_text():
    text = "paragraph 1\nparagraph 2\n\n  \n  \nparagraph 3\n"
    doc = process(Content(data=text))

    par1 = TextParagraph().append_text_segment(TextSegment("paragraph 1"))
    par2 = TextParagraph().append_text_segment(TextSegment("paragraph 2"))
    par3 = TextParagraph().append_text_segment(TextSegment("paragraph 3"))
    expect_doc = Doc().append_blocks([par1, par2, par3])

    assert doc.to_dict() == expect_doc.to_dict()
예제 #5
0
def test_plain_text_from_file():
    doc = process(
        Content(source=ContentSource.FILE,
                path="dolphin_doc_lib/testdata/plain_text.txt"))

    par1 = TextParagraph().append_text_segment(TextSegment("paragraph 1"))
    par2 = TextParagraph().append_text_segment(TextSegment("paragraph 2"))
    par3 = TextParagraph().append_text_segment(TextSegment("paragraph 3"))
    par4 = TextParagraph().append_text_segment(TextSegment("paragraph 4"))
    expect_doc = Doc().append_blocks([par1, par2, par3, par4])

    assert doc.to_dict() == expect_doc.to_dict()
예제 #6
0
def test_standard_table():
    html = """
        <table>
        <thead>
            <tr>
            <th>Month</th>
            <th>Savings</th>
            </tr>
        </thead>
        <tbody>
            <tr>
            <td>January</td>
            <td>$100</td>
            </tr>
            <tr>
            <td>February</td>
            <td>$80</td>
            </tr>
        </tbody>
        <tfoot>
            <tr>
            <td>Sum</td>
            <td>$180</td>
            </tr>
        </tfoot>
        </table>"""
    doc = process_html(html)

    expect_doc = Doc().append_block(
        Table(4, 2, [
            Cell(Rect[int](0, 0, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("Month"))),
            Cell(Rect[int](1, 0, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("Savings"))),
            Cell(Rect[int](0, 1, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("January"))),
            Cell(Rect[int](1, 1, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("$100"))),
            Cell(Rect[int](0, 2, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("February"))),
            Cell(Rect[int](1, 2, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("$80"))),
            Cell(Rect[int](0, 3, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("Sum"))),
            Cell(Rect[int](1, 3, 1, 1)).append_paragraph(
                TextParagraph().append_text_segment(TextSegment("$180"))),
        ]))
    assert doc.to_dict() == expect_doc.to_dict()
예제 #7
0
def process_html(html: str) -> Doc:
    "Create Dolphin Doc from html"
    root = BeautifulSoup(html, 'html5lib').body
    blocks_info = cast(BlocksInfo, _process(root))
    doc = Doc().append_blocks(blocks_info.blocks)
    return doc