Example #1
0
 def test_flatten_respects_hidden_n2(self):
     me = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     expected = '`text text2`'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Example #2
0
 def test_comparison_equal(self):
     me1 = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     me2 = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     self.assertEqual(me1, me2)
Example #3
0
 def test_comparison_inequal_data_element(self):
     me1 = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     me2 = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     me2.data[1].tag = ''
     self.assertNotEqual(me1, me2)
Example #4
0
 def test_format_asserts_no_more_than_one_empty_line(self):
     md = MarkdownElement(data=[
         'This\n', '\n', 'is\n', '\n', '\n'
         'a\n', '\n', '\n', '\n', 'test'
     ])
     expected = MarkdownElement(
         data=['This\n', '\n', 'is\n', '\n', 'a\n', '\n', 'test'])
     actual = self.mdf.format(md)
     self.assertEqual(expected, actual)
Example #5
0
 def test_comparison_inequal_nested_parent(self):
     me1 = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     me2 = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     me2.data[1].parent = None
     self.assertNotEqual(me1, me2)
Example #6
0
    def handle_starttag(self, tag, attrs):  # feed() callback
        new_element = MarkdownElement()

        try:
            new_element.tag = [MAP_TAG_START[tag], MAP_TAG_END[tag]]
        except KeyError:
            new_element.tag = MAP_TAG_START[tag]

        self.set_visibility(new_element)

        self.current_element.add_data(new_element)
        self.current_element = new_element
Example #7
0
    def convert(self, html, flatten=True):
        if isinstance(html, str):
            html = [html]

        self.current_element = MarkdownElement()

        for line in html:
            self.feed(line)

        if flatten:
            return self.current_element.flatten()
        else:
            return self.current_element
Example #8
0
    def test_add_data(self):
        me = MarkdownElement('`', [])
        m2 = MarkdownElement('*', ['text', MarkdownElement('"', ['text2'])])

        me.add_data('text3')
        me.add_data(m2)

        expected = '`text3*text"text2"*`'
        actual = me.flatten()

        self.assertEqual(expected, actual)
Example #9
0
class HtmlToMarkdown(HTMLParser):
    def __init__(self):
        super().__init__()
        self.current_element = None

    def convert(self, html, flatten=True):
        if isinstance(html, str):
            html = [html]

        self.current_element = MarkdownElement()

        for line in html:
            self.feed(line)

        if flatten:
            return self.current_element.flatten()
        else:
            return self.current_element

    def handle_starttag(self, tag, attrs):  # feed() callback
        new_element = MarkdownElement()

        try:
            new_element.tag = [MAP_TAG_START[tag], MAP_TAG_END[tag]]
        except KeyError:
            new_element.tag = MAP_TAG_START[tag]

        self.set_visibility(new_element)

        self.current_element.add_data(new_element)
        self.current_element = new_element

    def set_visibility(self, element):
        should_hide = self.current_element.tag in PREFMT_TAGS \
            or self.current_element.hidden
        element.hidden = should_hide

    def handle_endtag(self, tag):  # feed() callback
        self.current_element = self.current_element.parent

    def handle_data(self, data):  # feed() callback
        data = self.sanitize(data)
        self.current_element.data.append(data)

    def sanitize(self, data):
        return data.replace(u'\xa0', ' ')
Example #10
0
 def test_preserve_tree(self):
     html = [
         '<em>This is</em> ', '<code><em>some code</em> ',
         '<code><em>nested inside</em></code> ',
         '<em>other code</em></code>. ', '<em>whoa</em>'
     ]
     expected = MarkdownElement(data=[
         MarkdownElement('*', ['This is']), ' ',
         MarkdownElement('`', [
             MarkdownElement('*', ['some code'], hidden=True), ' ',
             MarkdownElement(
                 '`',
                 [MarkdownElement('*', ['nested inside'], hidden=True)],
                 hidden=True), ' ',
             MarkdownElement('*', ['other code'], hidden=True)
         ]), '. ',
         MarkdownElement('*', ['whoa'])
     ])
     actual = self.md.convert(html, flatten=False)
     self.assertEqual(expected, actual)
Example #11
0
 def test_is_text_only_empty(self):
     me = MarkdownElement()
     self.assertFalse(me.is_text_only)
Example #12
0
 def test_default_instance(self):
     me = MarkdownElement()
     self.assertEqual('', me.tag)
     self.assertEqual([], me.data)
     self.assertIsNone(None, me.parent)
     self.assertFalse(me.hidden)
Example #13
0
 def test_nondefault_instance(self):
     me = MarkdownElement('tag', ['data'], hidden=True)
     self.assertEqual('tag', me.tag)
     self.assertEqual(['data'], me.data)
     self.assertTrue(me.hidden)
Example #14
0
 def test_parent_add_data(self):
     child = MarkdownElement('"', ['text2'])
     me = MarkdownElement('`', ['text '])
     me.add_data(child)
     self.assertEqual(me, child.parent)
Example #15
0
 def test_tag_supports_list(self):
     me = MarkdownElement(['- ', '\n'], ['text'])
     expected = '- text\n'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Example #16
0
 def test_comparison_different_types(self):
     me = MarkdownElement(
         '`',
         ['text ', MarkdownElement('*', ['text2'], hidden=True)])
     other = 1
     self.assertNotEqual(me, other)
Example #17
0
 def test_parent_constructor(self):
     child = MarkdownElement('"', ['text2'])
     me = MarkdownElement('`', ['text ', child])
     self.assertEqual(me, child.parent)
Example #18
0
 def test_is_text_only_has_tag(self):
     me = MarkdownElement('tag')
     self.assertFalse(me.is_text_only)
Example #19
0
 def test_is_text_only_has_tag_and_text(self):
     me = MarkdownElement('tag', ['stuff'])
     self.assertFalse(me.is_text_only)
Example #20
0
 def test_is_text_only_text_only(self):
     me = MarkdownElement(data=['stuff'])
     self.assertTrue(me.is_text_only)
Example #21
0
 def test_flatten_n1(self):
     me = MarkdownElement('`', ['text'])
     expected = '`text`'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Example #22
0
 def test_flatten_n2(self):
     me = MarkdownElement('`', ['text ', MarkdownElement('"', ['text2'])])
     expected = '`text "text2"`'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Example #23
0
 def test_flatten_respects_hidden_n1(self):
     me = MarkdownElement('`', ['text'], hidden=True)
     expected = 'text'
     actual = me.flatten()
     self.assertEqual(expected, actual)
Example #24
0
 def test_str_calls_flatten(self):
     me = MarkdownElement('`', ['text ', MarkdownElement('"', ['text2'])])
     expected = '`text "text2"`'
     actual = str(me)
     self.assertEqual(expected, actual)