Ejemplo n.º 1
0
 def gatherLinks(pageURL):
     html_string = ''
     try:
         response = urlopen(pageURL)
         if 'text/html' in response.getheader('Content-Type'):
             html_bytes = response.read()
             html_string = html_bytes.decode("utf-8")
         finderObject = Finder(Spider.baseURL, pageURL)
         finderObject.feed(html_string)
     except Exception as e:
         print(str(e))
         return set()
     return finderObject.pageLink()