Esempio n. 1
0
 def test_flatten_respects_hidden_n2(self):
     me = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     expected = '`text text2`'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Esempio n. 2
0
    def test_add_data(self):
        me = MarkdownElement('`', [])
        m2 = MarkdownElement('*', ['text', MarkdownElement('"', ['text2'])])

        me.add_data('text3')
        me.add_data(m2)

        expected = '`text3*text"text2"*`'
        actual = me.flatten()

        self.assertEqual(expected, actual)
Esempio n. 3
0
class HtmlToMarkdown(HTMLParser):
    def __init__(self):
        super().__init__()
        self.current_element = None

    def convert(self, html, flatten=True):
        if isinstance(html, str):
            html = [html]

        self.current_element = MarkdownElement()

        for line in html:
            self.feed(line)

        if flatten:
            return self.current_element.flatten()
        else:
            return self.current_element

    def handle_starttag(self, tag, attrs):  # feed() callback
        new_element = MarkdownElement()

        try:
            new_element.tag = [MAP_TAG_START[tag], MAP_TAG_END[tag]]
        except KeyError:
            new_element.tag = MAP_TAG_START[tag]

        self.set_visibility(new_element)

        self.current_element.add_data(new_element)
        self.current_element = new_element

    def set_visibility(self, element):
        should_hide = self.current_element.tag in PREFMT_TAGS \
            or self.current_element.hidden
        element.hidden = should_hide

    def handle_endtag(self, tag):  # feed() callback
        self.current_element = self.current_element.parent

    def handle_data(self, data):  # feed() callback
        data = self.sanitize(data)
        self.current_element.data.append(data)

    def sanitize(self, data):
        return data.replace(u'\xa0', ' ')
Esempio n. 4
0
 def test_tag_supports_list(self):
     me = MarkdownElement(['- ', '\n'], ['text'])
     expected = '- text\n'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Esempio n. 5
0
 def test_flatten_respects_hidden_n1(self):
     me = MarkdownElement('`', ['text'], hidden=True)
     expected = 'text'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Esempio n. 6
0
 def test_flatten_n2(self):
     me = MarkdownElement('`', ['text ', MarkdownElement('"', ['text2'])])
     expected = '`text "text2"`'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Esempio n. 7
0
 def test_flatten_n1(self):
     me = MarkdownElement('`', ['text'])
     expected = '`text`'
     actual = me.flatten()
     self.assertEqual(expected, actual)