def test_headers(self): html = '\n'.join([ '<h1>H1</h1>', '<h2>H2</h2>', '<h3>H3</h3>', '<h4>H4</h4>', '<h5>H5</h5>', '<h6>H6</h6>', ]) expected = '\n'.join([ '', '# H1', '', '## H2', '', '### H3', '', '#### H4', '', '##### H5', '', '###### H6', '' ]) self.assertEqual(convert(html), expected)
def test_lists_nested(self): html = '''Nested list<ol> <li>item 1</li> <li>item 2 <ul> <li>item 2.1</li> <li>item 2.2 <ul> <li>item 2.2.1</li> </ul> </li> </ul> </li> </ol>''' expected = '\n'.join([ 'Nested list', '', '1. item 1', '2. item 2', '', ' * item 2.1', ' * item 2.2', '', ' * item 2.2.1', '', '', '' ]) self.assertEqual(convert(html), expected)
def main(argv=None): parser = argparse.ArgumentParser() parser.add_argument('file', default='-', nargs='?', help='HTML 正文部分转 Markdown') args = parser.parse_args(argv) html = ''.join([line for line in fileinput.input(args.file)]) print(h2md.convert(html))
def test_quotes(self): html = 'Quotes<blockquote>blockquote</blockquote>' expected = '\n'.join([ 'Quotes', '', '> blockquote', '' ]) self.assertEqual(convert(html), expected)
def test_horizontal_ruler(self): html = 'Horizontal ruler<hr>' expected = '\n'.join([ 'Horizontal ruler', '', '---', '' ]) self.assertEqual(convert(html), expected)
def test_code_block_with_span(self): html = "Code block<pre><code><span>print('hello markdown')<span></code></pre>" expected = '\n'.join([ 'Code block', '```', "print('hello markdown')", '```', '' ]) self.assertEqual(convert(html), expected)
def test_paragraph(self): html = 'Paragraph<p>a</p><p>b</p>' expected = '\n'.join([ 'Paragraph', '', 'a', '', 'b', '' ]) self.assertEqual(convert(html), expected)
def test_code_block_with_lang(self): html = '''Code block:<pre><code class="hljs bash">kubectl <span class="hljs-built_in">exec</span> productpage-v1-54b8b9f55-bx2dq -c istio-proxy -- cat /etc/istio/proxy/envoy-rev0.json > envoy-rev0.json </code></pre>''' expected = '\n'.join([ 'Code block:', '```bash', "kubectl exec productpage-v1-54b8b9f55-bx2dq -c istio-proxy -- cat /etc/istio/proxy/envoy-rev0.json > envoy-rev0.json", "```", '' ]) self.assertEqual(convert(html), expected)
def test_lists_ordered(self): html = '''Ordered list<ol> <li>item 1</li> <li>item 2</li> </ol>''' expected = '\n'.join([ 'Ordered list', '', '1. item 1', '2. item 2', '' ]) self.assertEqual(convert(html), expected)
def test_lists_unordered(self): html = '''Unordered list<ul> <li>item 1</li> <li>item 2</li> </ul>''' expected = '\n'.join([ 'Unordered list', '', '* item 1', '* item 2', '' ]) self.assertEqual(convert(html), expected)
def get_html(url,name): session = HTMLSession() r = session.get('http://www.imooc.com/'+url) content = r.html.find('.content',first = True) file = "imooc/"name+'.md' text = h2md.convert(content.html) file_data = "" # 写入处理后的内容 with open(file, 'w',encoding="utf-8") as f: f.write(text) for line in open(file): if line.find("运行案例") == -1 and line.find("实例演示") == -1 and line.find("复制") == -1: file_data += line with open(file,"w",encoding="utf-8") as f: f.write(file_data)
def test_quotes_nested(self): html = '''Nested quotes<blockquote> blockquote <blockquote>nested</blockquote> <blockquote> nested <blockquote>nested double</blockquote> </blockquote> </blockquote>''' expected = '\n'.join([ 'Nested quotes', '', '> blockquote', '> ', '> > nested', '> ', '> > nested', '> > ', '> > > nested double', '', '', '' ]) self.assertEqual(convert(html), expected)
def test_hyperlinks(self): html = 'Hyperlink <a href="href">text</a>' expected = 'Hyperlink [text](href)' self.assertEqual(convert(html), expected)
def test_image(self): html = 'Image <img alt="alt" src="src" />' expected = 'Image ![alt](src)' self.assertEqual(convert(html), expected)
def test_bold(self): html = 'Bold <b>b</b> <strong>strong</strong>' expected = 'Bold **b** **strong**' self.assertEqual(convert(html), expected)
def test_deleted(self): html = 'Deleted <del>del</del>' expected = 'Deleted ~~del~~' self.assertEqual(convert(html), expected)
def test_other_tags(self): html = """ <center>Pilot Architecture(来自<a href="https://istio.io/docs/concepts/traffic-management/">Isio官网文档</a><sup><a href="#ref01">[1]</a></sup>)</center> """ expected = '<center>Pilot Architecture(来自[Isio官网文档](https://istio.io/docs/concepts/traffic-management/)<sup>[[1]](#ref01)</sup>)</center>' self.assertEqual(convert(html), expected)
def test_block_normal(self): html = 'ABCD<article>EFG<section>HIJ</section></article>' expected = 'EFGHIJ' self.assertEqual(convert(html), expected)
def test_image_no_alt_src(self): html = 'Image <img />' expected = 'Image ![]()' self.assertEqual(convert(html), expected)
def test_lists_unordered_empty(self): html = 'Unordered list<ul></ul>' expected = 'Unordered list' self.assertEqual(convert(html), expected)
def test_lists_ordered_empty(self): html = 'Ordered list<ol></ol>' expected = 'Ordered list' self.assertEqual(convert(html), expected)
def test_nested(self): html = 'Nested <code><del><b><i>nested</i></b></del></code>' expected = 'Nested `~~***nested***~~`' self.assertEqual(convert(html), expected)
def test_hyperlinks_no_content(self): html = '<a href="#href" class="class" title="title"></a>Hyperlink' expected = 'Hyperlink' self.assertEqual(convert(html), expected)
def test_code(self): html = 'Code <code>code</code>' expected = 'Code `code`' self.assertEqual(convert(html), expected)
def test_hyperlinks_no_href(self): html = 'Hyperlink <a>text</a>' expected = 'Hyperlink [text](text)' self.assertEqual(convert(html), expected)
def test_nested_tags_in_lists(self): html = '''Nested tags in lists <ul> <li>item <pre><code>b</code></pre> <a href="href">text</a> <img alt="alt" src="src" /> <h1>H1</h1> <p> <i>i</i> <em>em</em> <b>b</b> <strong>strong</strong> <del>del</del> <code>code</code> </p> <blockquote> blockquote <blockquote>nested</blockquote> </blockquote> <ol> <li>ordered list item</li> <li> <p> <i>i</i> <em>em</em> <b>b</b> <strong>strong</strong> <del>del</del> <code>code</code> </p> </li> </ol> <ul> <li>unordered list item</li> <li> <p> <i>i</i> <em>em</em> <b>b</b> <strong>strong</strong> <del>del</del> <code>code</code> </p> </li> </ul> </li> </ul> ''' expected = '\n'.join([ 'Nested tags in lists', '', '* item', '```', 'b', '```', '[text](href) ![alt](src)', '# H1', '', '*i* *em* **b** **strong** ~~del~~ `code`', '', '> blockquote', '> ', '> > nested', '', '', ' 1. ordered list item', ' 2. *i* *em* **b** **strong** ~~del~~ `code`', '', '', ' * unordered list item', ' * *i* *em* **b** **strong** ~~del~~ `code`', '', '', '' ]) self.assertEqual(convert(html), expected)
def test_italics(self): html = 'Italics <i>i</i> <em>em</em>' expected = 'Italics *i* *em*' self.assertEqual(convert(html), expected)
def test_inline_empty(self): html = 'Inline empty <code></code><del></del><b></b><i></i>' expected = 'Inline empty ' self.assertEqual(convert(html), expected)