def execute(cls, entrypoint, dest, ext=config.DOCEXTS[0], debug=False): """ Factory Method """ err = 0 try: ttp = PyTTP() print(f'- Parsing the entry point: {entrypoint}') tutorial = ttp.parse(entrypoint) print(f'- Extracting content from host for {tutorial}') urls = Parser.extract_href(tutorial.table_contents) ttp.extract(tutorial, urls[:2]) print(f'- Rendering html') html = ttp.render(tutorial) print(f'- Writting ({ext}) document on disk') ttp.write(filename=tutorial.name, data=html, dest=dest, ext=ext) except HostNameError as e: err = 1 print('error:', e) except EntryPointError as e: err = 1 print(f'error:{entrypoint} is not a valid entry point') except NotADirectoryError as e: err = 1 print('error:', e) except FileTypeError as e: err = 1 print('error:', e) finally: return err
def test_extractHref_anchorTagHrefAttrMissing_returnEmptyList(self): html = '''<html> <a>link1</a> <a>link2</a> </html>''' res = Parser.extract_href(html) print(res) self.assertEqual(res, [])
def test_extractHref_anchorTag_returnList(self): html = '<html><a href="/link1"></a><a href="/link 2"></a></html>' res = Parser.extract_href(html) print(res) self.assertIsNotNone(res)
def test_extractHref_anchorTagMissing_returnEmptyList(self): html = '<html></html>' res = Parser.extract_href('') print(res) self.assertEqual(res, [])