def test_heading(self): splitter = HtmlSplitter(path=Path(FIXTURE_DIR).joinpath("heading.html")) html2topics = HtmlToTopics(splitter, render_cover_page=True) expected = [ NeoTopic(title="Cover Page", content="\n\nIntroduction\n\n"), NeoTopic( title="My First Heading", content=""" <p>My first paragraph.</p> """, ), ] self.assertEqual(len(html2topics.resources), 0) self.assertEqual(html2topics.topics, expected)
def test_empty_title(self): splitter = HtmlSplitter(path=Path(FIXTURE_DIR).joinpath("empty_title.html")) html2topics = HtmlToTopics(splitter, render_cover_page=True) expected = [ NeoTopic( title="Cover Page", origin_id=None, content="\n\nIntroduction\n\n \nText that should be in the introduction.\n\n ", ), NeoTopic( title="Installation", origin_id="_Ref2A4E1AB689A0D2EE52FF15610E2D8283", content="\n a\n \n b\n\n ", ), NeoTopic(title="Removal", origin_id="_Re2D8283", content="\n c\n \n d\n "), ] self.assertEqual(len(html2topics.resources), 0) self.assertEqual(html2topics.topics, expected)
def test_heading_three_level(self): splitter = HtmlSplitter(path=Path(FIXTURE_DIR).joinpath("heading_three_levels.html")) html2topics = HtmlToTopics(splitter, render_cover_page=True) expected = [ NeoTopic(title="Cover Page", content="\n\nIntroduction\n\n"), NeoTopic( title="Heading 1", content="\na\n", children=[ NeoTopic( title="Heading 1-2", content="\nb\n", children=[NeoTopic(title="Heading 1-2-3", content="\nc\n")], ) ], ), ] self.assertEqual(len(html2topics.resources), 0) self.assertEqual(expected, html2topics.topics)
def test_cover_page(self): splitter = HtmlSplitter( content="""<!DOCTYPE html> <html> <body> a <h1>b</h1> c </body> </html> """ ) html2topics = HtmlToTopics(splitter, render_cover_page=True) self.assertEqual(html2topics.resources, []) expected = [NeoTopic(title="Cover Page", content="a\n"), NeoTopic(title="b", content="c\n")] self.assertEqual(html2topics.topics, expected) html2topics = HtmlToTopics(splitter, render_cover_page=False) expected = expected[1:] self.assertEqual(html2topics.resources, []) self.assertEqual(html2topics.topics, expected)
def test_empty_without_cover_page(self): splitter = HtmlSplitter( content="""<!DOCTYPE html> <html> <body> a b c </body> </html> """ ) html2topics = HtmlToTopics(splitter, render_cover_page=True) self.assertEqual(html2topics.resources, []) expected_content = "a\nb\nc\n" expected = [NeoTopic(title="Cover Page", content=expected_content)] self.assertEqual(html2topics.topics, expected) html2topics = HtmlToTopics(splitter, render_cover_page=False) expected = [NeoTopic(title="Flat document", content=expected_content)] self.assertEqual(html2topics.resources, []) self.assertEqual(html2topics.topics, expected)
def test_anchor(self): splitter = HtmlSplitter(path=Path(FIXTURE_DIR).joinpath("anchor.html")) html2topics = HtmlToTopics(splitter, render_cover_page=True) expected = [ NeoTopic(title="Cover Page", origin_id=None, content="\n\nIntroduction\n\n"), NeoTopic(title="Heading 1", origin_id="heading1", content='\n\nHeading 1\n\n<a href="heading2"></a>\n\n'), NeoTopic( title="Heading 2", origin_id="heading2", content=""" Heading 2 <a href="heading1"></a> <a href="https://google.com/#Heading2">Clique</a> """, ), ] self.assertEqual(len(html2topics.resources), 0) self.assertEqual(html2topics.topics, expected)
def test_headings(self): splitter = HtmlSplitter(path=Path(FIXTURE_DIR).joinpath("headings_simple.html")) html2topics = HtmlToTopics(splitter, render_cover_page=True) expected = [ NeoTopic(title="Cover Page", content="z"), NeoTopic( title="Heading 1", content="\na\n", children=[ NeoTopic( title="Heading 1-2", content="\nb\n", children=[NeoTopic(title="Heading 1-2-3", content="c")] ) ], ), NeoTopic(title="Heading 1.2", content="\nd\n"), NeoTopic(title="Heading 1.3", content="\ne\n", children=[NeoTopic(title="Heading 1.3-2", content="\nf\n")]), ] self.assertEqual(len(html2topics.resources), 0) for i, part in enumerate(html2topics.topics): self.assertEqual( expected[i], part, "What we have:\n{}\n\n{}\nWhat we expect:\n{}\n\n".format(part, "-" * 80, expected[i]), )