def test_plain_text(): text = "paragraph 1\nparagraph 2\n\n \n \nparagraph 3\n" doc = process(Content(data=text)) par1 = TextParagraph().append_text_segment(TextSegment("paragraph 1")) par2 = TextParagraph().append_text_segment(TextSegment("paragraph 2")) par3 = TextParagraph().append_text_segment(TextSegment("paragraph 3")) expect_doc = Doc().append_blocks([par1, par2, par3]) assert doc.to_dict() == expect_doc.to_dict()
def test_move_cell(): # Table with 2 rows, 3 cols. # C1, C3, C5 # C2, C4, C5 table = Table(2, 3) # 2 rows, 3 cols cell1 = Cell(Rect[int](0, 0, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("C1"))) cell2 = Cell(Rect[int](0, 1, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("C2"))) cell3 = Cell(Rect[int](1, 0, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("C3"))) cell4 = Cell(Rect[int](1, 1, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("C4"))) cell5 = Cell(Rect[int](2, 0, 1, 2)).append_paragraph( TextParagraph().append_text_segment(TextSegment("C5"))) table.add_cell(cell1).add_cell(cell5).add_cell(cell4).add_cell(cell3) assert table.cells()[0] == cell1 assert table.cells()[1] == cell5 assert table.cells()[2] == cell4 assert table.cells()[3] == cell3 table.add_cell(cell2) assert table.cells()[0] == cell1 assert table.cells()[1] == cell3 assert table.cells()[2] == cell5 assert table.cells()[3] == cell2 assert table.cells()[4] == cell4 assert table.ready_to_move() assert cell1.move(Direction.LEFT) is None assert cell1.move(Direction.UP) is None assert cell1.move(Direction.RIGHT) is cell3 assert cell1.move(Direction.DOWN) is cell2 assert cell2.move(Direction.LEFT) is None assert cell2.move(Direction.UP) is cell1 assert cell2.move(Direction.RIGHT) is cell4 assert cell2.move(Direction.DOWN) is None assert cell3.move(Direction.LEFT) is cell1 assert cell3.move(Direction.UP) is None assert cell3.move(Direction.RIGHT) is cell5 assert cell3.move(Direction.DOWN) is cell4 assert cell4.move(Direction.LEFT) is cell2 assert cell4.move(Direction.UP) is cell3 assert cell4.move(Direction.RIGHT) is cell5 assert cell4.move(Direction.DOWN) is None assert cell5.move(Direction.LEFT) is cell3 assert cell5.move(Direction.UP) is None assert cell5.move(Direction.RIGHT) is None assert cell5.move(Direction.DOWN) is None
def test_merge_segments_with_link(): # This is a link: <a href="http://www.example.com"> link </a>. par = TextParagraph() \ .append_text_segment(TextSegment("This is a link: ")) \ .append_text_segment(TextSegment("example", "http://www.example.com")) \ .append_text_segment(TextSegment(".")) assert len(par.segments()) == 3 assert par.segments()[0].text() == "This is a link: " assert par.segments()[1].text() == "example" assert par.segments()[1].link() == "http://www.example.com" assert par.segments()[2].text() == "."
def test_plain_text_from_file(): doc = process( Content(source=ContentSource.FILE, path="dolphin_doc_lib/testdata/plain_text.txt")) par1 = TextParagraph().append_text_segment(TextSegment("paragraph 1")) par2 = TextParagraph().append_text_segment(TextSegment("paragraph 2")) par3 = TextParagraph().append_text_segment(TextSegment("paragraph 3")) par4 = TextParagraph().append_text_segment(TextSegment("paragraph 4")) expect_doc = Doc().append_blocks([par1, par2, par3, par4]) assert doc.to_dict() == expect_doc.to_dict()
def _process_string_node(node) -> BlocksInfo: # strip string to mimic browser behavior, not 100% accurate content: str = node.strip() if not content: return BlocksInfo() par = TextParagraph().append_text_segment(TextSegment(content)) return BlocksInfo(blocks=[par])
def _process_text(text: str) -> Doc: "Create Dolphin Doc from plain text" doc = Doc() for line in text.splitlines(): line = line.strip() if line: par = TextParagraph().append_text_segment(TextSegment(line)) doc.append_block(par) return doc
def test_ignore_tags(): html = """a<style>b</style><script>c</script><noscript>d</noscript>e""" doc = process_html(html) expect_doc = Doc().append_blocks([ TextParagraph().append_text_segment(TextSegment("ae")), ]) assert doc.to_dict() == expect_doc.to_dict()
def test_standard_table(): html = """ <table> <thead> <tr> <th>Month</th> <th>Savings</th> </tr> </thead> <tbody> <tr> <td>January</td> <td>$100</td> </tr> <tr> <td>February</td> <td>$80</td> </tr> </tbody> <tfoot> <tr> <td>Sum</td> <td>$180</td> </tr> </tfoot> </table>""" doc = process_html(html) expect_doc = Doc().append_block( Table(4, 2, [ Cell(Rect[int](0, 0, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("Month"))), Cell(Rect[int](1, 0, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("Savings"))), Cell(Rect[int](0, 1, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("January"))), Cell(Rect[int](1, 1, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("$100"))), Cell(Rect[int](0, 2, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("February"))), Cell(Rect[int](1, 2, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("$80"))), Cell(Rect[int](0, 3, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("Sum"))), Cell(Rect[int](1, 3, 1, 1)).append_paragraph( TextParagraph().append_text_segment(TextSegment("$180"))), ])) assert doc.to_dict() == expect_doc.to_dict()
def test_line_break_tags(): html = """a<p>b<br>c<h1>d<h2>e<h3>f<h4>g<h5>h<h6>i<pre>j<address>k<blockquote>l <dl>m<div>n<fieldset>o<form>p<hr>q<ol>r<ul>s<li>t """ doc = process_html(html) expect_doc = Doc().append_blocks([ TextParagraph().append_text_segment(TextSegment("a")), TextParagraph().append_text_segment(TextSegment("b")), TextParagraph().append_text_segment(TextSegment("c")), TextParagraph().append_text_segment(TextSegment("d")), TextParagraph().append_text_segment(TextSegment("e")), TextParagraph().append_text_segment(TextSegment("f")), TextParagraph().append_text_segment(TextSegment("g")), TextParagraph().append_text_segment(TextSegment("h")), TextParagraph().append_text_segment(TextSegment("i")), TextParagraph().append_text_segment(TextSegment("j")), TextParagraph().append_text_segment(TextSegment("k")), TextParagraph().append_text_segment(TextSegment("l")), TextParagraph().append_text_segment(TextSegment("m")), TextParagraph().append_text_segment(TextSegment("n")), TextParagraph().append_text_segment(TextSegment("o")), TextParagraph().append_text_segment(TextSegment("p")), TextParagraph().append_text_segment(TextSegment("q")), TextParagraph().append_text_segment(TextSegment("r")), TextParagraph().append_text_segment(TextSegment("s")), TextParagraph().append_text_segment(TextSegment("t")), ]) assert doc.to_dict() == expect_doc.to_dict()
def test_merge_segments(): par = TextParagraph() \ .append_text_segment(TextSegment("Hello ")) \ .append_text_segment(TextSegment("World!")) assert len(par.segments()) == 1 assert par.segments()[0].text() == "Hello World!"