def test_flatten_respects_hidden_n2(self): me = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) expected = '`text text2`' actual = me.flatten() self.assertEqual(expected, actual)
def test_comparison_equal(self): me1 = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) me2 = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) self.assertEqual(me1, me2)
def test_comparison_inequal_data_element(self): me1 = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) me2 = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) me2.data[1].tag = '' self.assertNotEqual(me1, me2)
def test_format_asserts_no_more_than_one_empty_line(self): md = MarkdownElement(data=[ 'This\n', '\n', 'is\n', '\n', '\n' 'a\n', '\n', '\n', '\n', 'test' ]) expected = MarkdownElement( data=['This\n', '\n', 'is\n', '\n', 'a\n', '\n', 'test']) actual = self.mdf.format(md) self.assertEqual(expected, actual)
def test_comparison_inequal_nested_parent(self): me1 = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) me2 = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) me2.data[1].parent = None self.assertNotEqual(me1, me2)
def handle_starttag(self, tag, attrs): # feed() callback new_element = MarkdownElement() try: new_element.tag = [MAP_TAG_START[tag], MAP_TAG_END[tag]] except KeyError: new_element.tag = MAP_TAG_START[tag] self.set_visibility(new_element) self.current_element.add_data(new_element) self.current_element = new_element
def convert(self, html, flatten=True): if isinstance(html, str): html = [html] self.current_element = MarkdownElement() for line in html: self.feed(line) if flatten: return self.current_element.flatten() else: return self.current_element
def test_add_data(self): me = MarkdownElement('`', []) m2 = MarkdownElement('*', ['text', MarkdownElement('"', ['text2'])]) me.add_data('text3') me.add_data(m2) expected = '`text3*text"text2"*`' actual = me.flatten() self.assertEqual(expected, actual)
class HtmlToMarkdown(HTMLParser): def __init__(self): super().__init__() self.current_element = None def convert(self, html, flatten=True): if isinstance(html, str): html = [html] self.current_element = MarkdownElement() for line in html: self.feed(line) if flatten: return self.current_element.flatten() else: return self.current_element def handle_starttag(self, tag, attrs): # feed() callback new_element = MarkdownElement() try: new_element.tag = [MAP_TAG_START[tag], MAP_TAG_END[tag]] except KeyError: new_element.tag = MAP_TAG_START[tag] self.set_visibility(new_element) self.current_element.add_data(new_element) self.current_element = new_element def set_visibility(self, element): should_hide = self.current_element.tag in PREFMT_TAGS \ or self.current_element.hidden element.hidden = should_hide def handle_endtag(self, tag): # feed() callback self.current_element = self.current_element.parent def handle_data(self, data): # feed() callback data = self.sanitize(data) self.current_element.data.append(data) def sanitize(self, data): return data.replace(u'\xa0', ' ')
def test_preserve_tree(self): html = [ '<em>This is</em> ', '<code><em>some code</em> ', '<code><em>nested inside</em></code> ', '<em>other code</em></code>. ', '<em>whoa</em>' ] expected = MarkdownElement(data=[ MarkdownElement('*', ['This is']), ' ', MarkdownElement('`', [ MarkdownElement('*', ['some code'], hidden=True), ' ', MarkdownElement( '`', [MarkdownElement('*', ['nested inside'], hidden=True)], hidden=True), ' ', MarkdownElement('*', ['other code'], hidden=True) ]), '. ', MarkdownElement('*', ['whoa']) ]) actual = self.md.convert(html, flatten=False) self.assertEqual(expected, actual)
def test_is_text_only_empty(self): me = MarkdownElement() self.assertFalse(me.is_text_only)
def test_default_instance(self): me = MarkdownElement() self.assertEqual('', me.tag) self.assertEqual([], me.data) self.assertIsNone(None, me.parent) self.assertFalse(me.hidden)
def test_nondefault_instance(self): me = MarkdownElement('tag', ['data'], hidden=True) self.assertEqual('tag', me.tag) self.assertEqual(['data'], me.data) self.assertTrue(me.hidden)
def test_parent_add_data(self): child = MarkdownElement('"', ['text2']) me = MarkdownElement('`', ['text ']) me.add_data(child) self.assertEqual(me, child.parent)
def test_tag_supports_list(self): me = MarkdownElement(['- ', '\n'], ['text']) expected = '- text\n' actual = me.flatten() self.assertEqual(expected, actual)
def test_comparison_different_types(self): me = MarkdownElement( '`', ['text ', MarkdownElement('*', ['text2'], hidden=True)]) other = 1 self.assertNotEqual(me, other)
def test_parent_constructor(self): child = MarkdownElement('"', ['text2']) me = MarkdownElement('`', ['text ', child]) self.assertEqual(me, child.parent)
def test_is_text_only_has_tag(self): me = MarkdownElement('tag') self.assertFalse(me.is_text_only)
def test_is_text_only_has_tag_and_text(self): me = MarkdownElement('tag', ['stuff']) self.assertFalse(me.is_text_only)
def test_is_text_only_text_only(self): me = MarkdownElement(data=['stuff']) self.assertTrue(me.is_text_only)
def test_flatten_n1(self): me = MarkdownElement('`', ['text']) expected = '`text`' actual = me.flatten() self.assertEqual(expected, actual)
def test_flatten_n2(self): me = MarkdownElement('`', ['text ', MarkdownElement('"', ['text2'])]) expected = '`text "text2"`' actual = me.flatten() self.assertEqual(expected, actual)
def test_flatten_respects_hidden_n1(self): me = MarkdownElement('`', ['text'], hidden=True) expected = 'text' actual = me.flatten() self.assertEqual(expected, actual)
def test_str_calls_flatten(self): me = MarkdownElement('`', ['text ', MarkdownElement('"', ['text2'])]) expected = '`text "text2"`' actual = str(me) self.assertEqual(expected, actual)