Python make_soup_from_html Examples, helper_functions.make_soup_from_html Python Examples

Example #1

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_head_tag_empty_head(self, processing_options):
        """Test passing correct tag"""
        html = "<head></head>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('head')
        assert tag.name == 'head'

        result = html_data_extractors.extract_from_head_tag(
            tag, processing_options)
        assert result is None

Example #2

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_div_incorrect_tag(self, processing_options):
        """Test passing incorrect tag"""
        html = "<title>My Title</title>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('title')
        assert tag.name == 'title'

        expected = None
        result = html_data_extractors.extract_from_div(tag, processing_options)
        assert result == expected

Example #3

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_title(self, processing_options):
        """Test passing correct tag"""
        html = '<title>My Title</title>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('title')
        assert tag.name == 'title'

        result = html_data_extractors.extract_from_title(
            tag, processing_options)
        assert isinstance(result, Title)
        assert result.contents == 'My Title'

Example #4

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_tag_with_coloured_text_span_no_color_in_style(
            self, processing_options):
        """Test passing correct tag"""
        html = '<span class="font-color" style="">This is coloured.</span>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('span')
        assert tag.name == 'span'

        result = html_data_extractors.extract_from_tag(tag, processing_options)
        assert isinstance(result, list)
        assert result[0].contents == 'This is coloured.'

Example #5

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_iframe(self, processing_options):
        """Test passing correct tag"""
        html = '<iframe>My iframe</iframe>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('iframe')
        assert tag.name == 'iframe'

        result = html_data_extractors.extract_from_iframe(
            tag, processing_options)
        assert isinstance(result, TextItem)
        assert result.contents == '<iframe>My iframe</iframe>'

Example #6

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_heading_incorrect_tag(self, html, tag_name,
                                                processing_options):
        """Test passing incorrect tag"""
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find(tag_name)
        assert tag.name == tag_name

        expected = None
        result = html_data_extractors.extract_from_heading(
            tag, processing_options, None)
        assert result == expected

Example #7

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_unknown_span_incorrect_tag(self, processing_options):
        """Test passing incorrect tag"""
        html = "<body>My Body</body>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('body')
        assert tag.name == 'body'

        expected = None
        result = html_data_extractors.extract_from_unknown_span(
            tag, processing_options, None)
        assert result == expected

Example #8

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_div(self, processing_options):
        """Test passing correct tag"""
        html = "<div><title>My Title</title></div>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('div')
        assert tag.name == 'div'

        result = html_data_extractors.extract_from_div(tag, processing_options)
        assert isinstance(result, Paragraph)
        assert len(result.contents) == 1
        assert isinstance(result.contents[0], Title)
        assert result.contents[0].contents == 'My Title'

Example #9

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_unknown_span(self, processing_options):
        """Test passing correct tag"""
        html = '<span>a span</span>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('span')
        assert tag.name == 'span'

        result = html_data_extractors.extract_from_unknown_span(
            tag, processing_options, None)
        assert isinstance(result, list)
        assert isinstance(result[0], TextItem)
        assert result[0].contents == 'a span'

Example #10

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_hyperlink(self, html, href, display_text,
                                    processing_options):
        """Test passing correct tag"""
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('a')
        assert tag.name == 'a'

        result = html_data_extractors.extract_from_hyperlink(
            tag, processing_options)
        assert isinstance(result, Hyperlink)
        assert result.href == href
        assert result.contents == display_text

Example #11

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_div_two_child_divs(self, processing_options):
        """Test passing correct tag"""
        # html = "<div><div><div>My Div</div></div></div>"
        html = "<div><div><div><br></div></div></div>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('div')
        assert tag.name == 'div'

        result = html_data_extractors.extract_from_div(tag, processing_options)
        assert isinstance(result, list)
        assert len(result[0].contents) == 1
        assert isinstance(result[0], Paragraph)

Example #12

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_coloured_text_span_no_style(self,
                                                      processing_options):
        """Test passing correct tag"""
        html = '<span>This is coloured.</span>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('span')
        assert tag.name == 'span'

        expected = None
        result = html_data_extractors.extract_from_coloured_text_span(
            tag, processing_options)
        assert result == expected

Example #13

0

Show file

    def test_nimbus_outline_html_output(self, processing_options):
        """Test passing correct tag"""
        html = '<div class="outline" id="b406348235_764"><div contenteditable="false" class="outline-container"><div class="outline-content-wrapper "><div class="outline-header "><div class="outline-left"><div class="outline-expand-icon "> </div></div><div class="outline-name">Outline</div></div><div class="outline-body"><ul class="outline-list outline-numbered"><li class="outline-list-item level-0"><a href="#b1023299123_950">A test note of page content</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1009">Testing lists</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1042">Testing inserted files</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1086">Testing a table</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1130">There are only 3 levels of heading in nimbus</a></li><li class="outline-list-item level-0"><a href="#b788977277_831">heading 1</a></li><li class="outline-list-item level-1"><a href="#b788977277_860">heading 2</a></li><li class="outline-list-item level-2"><a href="#b788977277_889">heading 3</a></li><li class="outline-list-item level-0"><a href="#b1023299123_1757">heading with italic text</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1218">Testing the horizontal line</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1266">Link and embeds</a></li><li class="outline-list-item level-1"><a href="#b992245780_93">Code Blocks</a></li><li class="outline-list-item level-1"><a href="#b992245780_132">Nimbus mentions</a></li><li class="outline-list-item level-1"><a href="#b992245780_175">Quoted text</a></li><li class="outline-list-item level-1"><a href="#b992245780_196">Hints</a></li><li class="outline-list-item level-1"><a href="#b992245780_220">Toggle block</a></li><li class="outline-list-item level-1"><a href="#b2183561539_350">Outline (effectively a linked TOC)</a></li><li class="outline-list-item level-1"><a href="#b992245780_450">Nimbus button</a></li><li class="outline-list-item level-1"><a href="#b992245780_478">Text formatting</a></li><li class="outline-list-item level-1"><a href="#b942953620_901">Testing inserted mp3</a></li><li class="outline-list-item level-1"><a href="#b942953620_1059">Test block sections - may or may not export!</a></li><li class="outline-list-item level-1"><a href="#b216345050_62">Adventures in Exporting from Nimbus Notes...</a></li><li class="outline-list-item level-0"><a href="#b942953620_969">This is the end of the file</a></li></ul></div></div></div></div>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('div')
        assert tag.name == 'div'

        expected = """<h2>Outline</h2><h4><ol><li><a href="#b1023299123_950">A test note of page content</a></li><ol><li><a href="#b1023299123_1009">Testing lists</a></li><li><a href="#b1023299123_1042">Testing inserted files</a></li><li><a href="#b1023299123_1086">Testing a table</a></li><li><a href="#b1023299123_1130">There are only 3 levels of heading in nimbus</a></li></ol><li><a href="#b788977277_831">heading 1</a></li><ol><li><a href="#b788977277_860">heading 2</a></li><ol><li><a href="#b788977277_889">heading 3</a></li></ol></ol><li><a href="#b1023299123_1757">heading with italic text</a></li><ol><li><a href="#b1023299123_1218">Testing the horizontal line</a></li><li><a href="#b1023299123_1266">Link and embeds</a></li><li><a href="#b992245780_93">Code Blocks</a></li><li><a href="#b992245780_132">Nimbus mentions</a></li><li><a href="#b992245780_175">Quoted text</a></li><li><a href="#b992245780_196">Hints</a></li><li><a href="#b992245780_220">Toggle block</a></li><li><a href="#b2183561539_350">Outline (effectively a linked TOC)</a></li><li><a href="#b992245780_450">Nimbus button</a></li><li><a href="#b992245780_478">Text formatting</a></li><li><a href="#b942953620_901">Testing inserted mp3</a></li><li><a href="#b942953620_1059">Test block sections - may or may not export!</a></li><li><a href="#b216345050_62">Adventures in Exporting from Nimbus Notes...</a></li></ol><li><a href="#b942953620_969">This is the end of the file</a></li></ol></h4>"""

        result = html_nimbus_extractors.extract_from_nimbus_outline(tag, processing_options)

        assert isinstance(result, Outline)
        assert result.html() == expected

Example #14

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_p_or_i_tag(self, html, tag_name, expected,
                                     processing_options):
        """Test passing correct tag"""
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find(tag_name)
        assert tag.name == tag_name

        result = html_data_extractors.extract_from_p_or_i_tag(
            tag, processing_options)
        assert isinstance(result, list)
        assert len(result) == 1
        assert isinstance(result[0], expected)
        assert result[0].contents == 'Some Text'

Example #15

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_head_tag(self, processing_options):
        """Test passing correct tag"""
        html = "<head><title>My Title</title></head>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('head')
        assert tag.name == 'head'

        result = html_data_extractors.extract_from_head_tag(
            tag, processing_options)
        assert isinstance(result, Head)
        assert len(result.contents) == 1
        assert isinstance(result.contents[0], Title)
        assert result.contents[0].contents == 'My Title'

Example #16

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_blockquote(self, processing_options):
        """Test passing correct tag"""
        html = '<blockquote cite="my-citation">My Quote</blockquote>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('blockquote')
        assert tag.name == 'blockquote'

        result = html_data_extractors.extract_from_blockquote(
            tag, processing_options)
        assert isinstance(result, BlockQuote)
        assert len(result.contents) == 1
        assert isinstance(result.contents[0], TextItem)
        assert result.contents[0].contents == 'My Quote'

Example #17

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_body(self, processing_options):
        """Test passing correct tag"""
        html = "<section><title>My Title</title></section>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('section')
        assert tag.name == 'section'

        result = html_data_extractors.extract_from_section(
            tag, processing_options)
        assert isinstance(result, SectionContent)
        assert len(result.contents) == 1
        assert isinstance(result.contents[0], Title)
        assert result.contents[0].contents == 'My Title'

Example #18

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_coloured_text_span(self, processing_options):
        """Test passing correct tag"""
        html = '<span class="font-color" style="color: rgb(237, 84, 84);">This is coloured.</span>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('span')
        assert tag.name == 'span'

        result = html_data_extractors.extract_from_coloured_text_span(
            tag, processing_options)
        assert isinstance(result, TextColorItem)
        assert result.contents == '<span style="color: rgb(237, 84, 84);">This is coloured.</span>'
        assert result.plain_text == 'This is coloured.'
        assert result.processing_options == processing_options

Example #19

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_text_formatting(self, processing_options):
        """Test passing correct tag"""
        html = "<strong>bold text</strong>"
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('strong')
        assert tag.name == 'strong'

        result = html_data_extractors.extract_text_formatting(
            tag, markdown_format_styling.format_styling, processing_options)

        assert isinstance(result, TextFormatItem)
        assert result.format == 'strong'
        assert result.contents[0].contents == 'bold text'
        assert result.processing_options == processing_options

Example #20

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_heading(self, html, tag_name, expected_level,
                                  expected_id, processing_options):
        """Test passing correct tag, confirm heading levels are restricted to 1-6"""
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find(tag_name)
        assert tag.name == tag_name

        result = html_data_extractors.extract_from_heading(
            tag, processing_options, None)
        assert isinstance(result, HeadingItem)
        assert len(result.contents) == 1
        assert result.level == expected_level
        assert result.id == expected_id
        assert isinstance(result.contents[0], TextItem)
        assert result.contents[0].contents == 'My heading'

Example #21

0

Show file

File: test_html_data_extractors.py Project: kevindurston21/YANOM-Note-O-Matic

    def test_extract_from_image_tag(self, html, src, alt, width, height,
                                    processing_options):
        """Test passing correct tag"""
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('img')
        assert tag.name == 'img'

        result = html_data_extractors.extract_from_img_tag(
            tag, processing_options)
        assert isinstance(result, ImageEmbed)

        assert result.href == src
        assert result.contents == alt
        assert result.width == width
        assert result.height == height
        assert result.source_path == Path(src)
        assert result.filename == Path(src).name

Example #22

0

Show file

    def test_nimbus_outline_markdown_output(self, processing_options):
        """Test passing correct tag"""
        html = '<div class="outline" id="b406348235_764"><div contenteditable="false" class="outline-container"><div class="outline-content-wrapper "><div class="outline-header "><div class="outline-left"><div class="outline-expand-icon "> </div></div><div class="outline-name">Outline</div></div><div class="outline-body"><ul class="outline-list outline-numbered"><li class="outline-list-item level-0"><a href="#b1023299123_950">A test note of page content</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1009">Testing lists</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1042">Testing inserted files</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1086">Testing a table</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1130">There are only 3 levels of heading in nimbus</a></li><li class="outline-list-item level-0"><a href="#b788977277_831">heading 1</a></li><li class="outline-list-item level-1"><a href="#b788977277_860">heading 2</a></li><li class="outline-list-item level-2"><a href="#b788977277_889">heading 3</a></li><li class="outline-list-item level-0"><a href="#b1023299123_1757">heading with italic text</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1218">Testing the horizontal line</a></li><li class="outline-list-item level-1"><a href="#b1023299123_1266">Link and embeds</a></li><li class="outline-list-item level-1"><a href="#b992245780_93">Code Blocks</a></li><li class="outline-list-item level-1"><a href="#b992245780_132">Nimbus mentions</a></li><li class="outline-list-item level-1"><a href="#b992245780_175">Quoted text</a></li><li class="outline-list-item level-1"><a href="#b992245780_196">Hints</a></li><li class="outline-list-item level-1"><a href="#b992245780_220">Toggle block</a></li><li class="outline-list-item level-1"><a href="#b2183561539_350">Outline (effectively a linked TOC)</a></li><li class="outline-list-item level-1"><a href="#b992245780_450">Nimbus button</a></li><li class="outline-list-item level-1"><a href="#b992245780_478">Text formatting</a></li><li class="outline-list-item level-1"><a href="#b942953620_901">Testing inserted mp3</a></li><li class="outline-list-item level-1"><a href="#b942953620_1059">Test block sections - may or may not export!</a></li><li class="outline-list-item level-1"><a href="#b216345050_62">Adventures in Exporting from Nimbus Notes...</a></li><li class="outline-list-item level-0"><a href="#b942953620_969">This is the end of the file</a></li></ul></div></div></div></div>'
        soup = helper_functions.make_soup_from_html(html)
        tag = soup.find('div')
        assert tag.name == 'div'

        expected = """## Outline
1. [A test note of page content](#a-test-note-of-page-content)
	1. [Testing lists](#testing-lists)
	2. [Testing inserted files](#testing-inserted-files)
	3. [Testing a table](#testing-a-table)
	4. [There are only 3 levels of heading in nimbus](#there-are-only-3-levels-of-heading-in-nimbus)
2. [heading 1](#heading-1)
	1. [heading 2](#heading-2)
		1. [heading 3](#heading-3)
3. [heading with italic text](#heading-with-italic-text)
	1. [Testing the horizontal line](#testing-the-horizontal-line)
	2. [Link and embeds](#link-and-embeds)
	3. [Code Blocks](#code-blocks)
	4. [Nimbus mentions](#nimbus-mentions)
	5. [Quoted text](#quoted-text)
	6. [Hints](#hints)
	7. [Toggle block](#toggle-block)
	8. [Outline (effectively a linked TOC)](#outline--effectively-a-linked-toc-)
	9. [Nimbus button](#nimbus-button)
	10. [Text formatting](#text-formatting)
	11. [Testing inserted mp3](#testing-inserted-mp3)
	12. [Test block sections - may or may not export!](#test-block-sections---may-or-may-not-export-)
	13. [Adventures in Exporting from Nimbus Notes...](#adventures-in-exporting-from-nimbus-notes...)
4. [This is the end of the file](#this-is-the-end-of-the-file)


"""
        processing_options.export_format = 'gfm'
        result = html_nimbus_extractors.extract_from_nimbus_outline(tag, processing_options)

        assert isinstance(result, Outline)
        assert result.markdown() == expected

Example #23

0

Show file

File: test_helper_functions.py Project: kevindurston21/YANOM-Note-O-Matic

def test_make_soup():
    html = '<p>hello</p>'
    result = helper_functions.make_soup_from_html(html)

    assert isinstance(result, BeautifulSoup)