Ejemplo n.º 1
0
    def execute(cls, entrypoint, dest, ext=config.DOCEXTS[0], debug=False):
        """ Factory Method """

        err = 0
        try:
            ttp = PyTTP()
            print(f'- Parsing the entry point: {entrypoint}')
            tutorial = ttp.parse(entrypoint)

            print(f'- Extracting content from host for {tutorial}')
            urls = Parser.extract_href(tutorial.table_contents)
            ttp.extract(tutorial, urls[:2])

            print(f'- Rendering html')
            html = ttp.render(tutorial)

            print(f'- Writting ({ext}) document on disk')
            ttp.write(filename=tutorial.name, data=html, dest=dest, ext=ext)
        except HostNameError as e:
            err = 1
            print('error:', e)
        except EntryPointError as e:
            err = 1
            print(f'error:{entrypoint} is not a valid entry point')
        except NotADirectoryError as e:
            err = 1
            print('error:', e)
        except FileTypeError as e:
            err = 1
            print('error:', e)
        finally:
            return err
Ejemplo n.º 2
0
 def test_extractHref_anchorTagHrefAttrMissing_returnEmptyList(self):
     html = '''<html>
                 <a>link1</a>
                 <a>link2</a>
             </html>'''
     res = Parser.extract_href(html)
     print(res)
     self.assertEqual(res, [])
Ejemplo n.º 3
0
 def test_extractHref_anchorTag_returnList(self):
     html = '<html><a href="/link1"></a><a href="/link 2"></a></html>'
     res = Parser.extract_href(html)
     print(res)
     self.assertIsNotNone(res)
Ejemplo n.º 4
0
 def test_extractHref_anchorTagMissing_returnEmptyList(self):
     html = '<html></html>'
     res = Parser.extract_href('')
     print(res)
     self.assertEqual(res, [])