def parsing_html(page, page_title=None):
    '''
    Parse the HTML to put it in an envelope. Optional arguments are for
    unittests only.
    '''
    try:
        with open(page, 'r') as contents:
            soupit = BeautifulSoup(contents, 'html.parser')
        with open(page, 'r') as contents2:
            that_page = tocbuilder.parse_it(contents2)
    except OSError:
        soupit = BeautifulSoup(page, 'html.parser')
        that_page = tocbuilder.parse_it(page)
    toc_html = tocbuilder.htmlify(that_page)
    if page_title == None:
        the_title = soupit.title.string
    else:
        the_title = page_title
    set_up_class = Envelope_RAML(page,
                                 soupit.body,
                                 originalFile=page,
                                 title=the_title,
                                 toc=toc_html)
    whole_envelope = set_up_class.make_an_envelope()
    return whole_envelope
Ejemplo n.º 2
0
 def test_parse_it_full_pass(self):
     '''
     Does parse_it work for a full sample?
     '''
     self.maxDiff = None
     html_sample = (
         '<body><h2>Heading 1 h4</h2><p>Random text</p>'
         '<h3>Heading 1.1 h4</h3><p>Random text</p><p>Random text</p>'
         '<h4>Heading 1.1.1 h4</h4><p>Random text</p><p>Random text</p>'
         '<h4>Heading 1.1.2 h4</h4><p>Random text</p>'
         '<h3>Heading 1.2 h4</h3><p>Random text</p><p>Random text</p>'
         '<h4>Heading 1.2.1 h4</h4><p>Random text</p>'
         '<h2>Heading 2 h4</h2><p>Random text</p><p>Random text</p>'
         '<h3>Heading 2.1 h4</h3><p>Random text</p>'
         '<h3>Heading 2.2 h4</h3><p>Random text</p><p>Random text</p>'
         '<p>Random text</p></body>')
     the_result = [
         '<li><a href="#Heading1h4">Heading 1 h4</a></li>',
         [
             '<li><a href="#Heading1.1h4">Heading 1.1 h4</a></li>',
             [
                 '<li><a href="#Heading1.1.1h4">Heading 1.1.1 h4</a></li>',
                 '<li><a href="#Heading1.1.2h4">Heading 1.1.2 h4</a></li>'
             ], '<li><a href="#Heading1.2h4">Heading 1.2 h4</a></li>',
             ['<li><a href="#Heading1.2.1h4">Heading 1.2.1 h4</a></li>']
         ], '<li><a href="#Heading2h4">Heading 2 h4</a></li>',
         [
             '<li><a href="#Heading2.1h4">Heading 2.1 h4</a></li>',
             '<li><a href="#Heading2.2h4">Heading 2.2 h4</a></li>'
         ]
     ]
     the_method = parse_it(html_sample)
     self.assertEqual(the_method, the_result)
Ejemplo n.º 3
0
 def test_parse_it_h2_only(self):
     '''
     Does parse_it work for h2 tags only?
     '''
     self.maxDiff = None
     html_sample = '<body><h2>Heading 1 h2</h2><h2>Heading 2 h2</h2></body>'
     the_result = [
         '<li><a href="#Heading1h2">Heading 1 h2</a></li>',
         '<li><a href="#Heading2h2">Heading 2 h2</a></li>'
     ]
     the_method = parse_it(html_sample)
     self.assertEqual(the_method, the_result)
Ejemplo n.º 4
0
 def test_parse_it_h2_and_h3(self):
     '''
     Does parse_it work for h2 and h3 tags?
     '''
     self.maxDiff = None
     html_sample = ('<body><h2>Heading 1 h3</h2><h3>Heading 1.1 h3</h3>'
                    '<h3>Heading 1.2 h3</h3><h2>Heading 2 h3</h2>'
                    '<h3>Heading 2.1 h3</h3><h3>Heading 2.2 h3</h3></body>')
     # BUG: Need to figure out why the double square bracket appears on the
     # first 2nd level here.
     the_result = [
         '<li><a href="#Heading1h3">Heading 1 h3</a></li>',
         [[
             '<li><a href="#Heading1.1h3">Heading 1.1 h3</a></li>',
             '<li><a href="#Heading1.2h3">Heading 1.2 h3</a></li>'
         ]], '<li><a href="#Heading2h3">Heading 2 h3</a></li>',
         [
             '<li><a href="#Heading2.1h3">Heading 2.1 h3</a></li>',
             '<li><a href="#Heading2.2h3">Heading 2.2 h3</a></li>'
         ]
     ]
     the_method = parse_it(html_sample)
     self.assertEqual(the_method, the_result)