Example #1
0
    def crawl(url):
        if not url in spider.links_crawled:
            req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
            with urlopen(req) as urldata:
                data = urldata.read()
            f = Finder()
            f.baseurl=spider.website_url
            data = data.decode('utf-8')
            data = html.unescape(data)
            f.feed(data)
            f.close()
            links=f.return_links()
            spider.links_website.remove(url)
            for val in links:
               spider.links_website.append(val)

            spider.links_crawled.append(url)
            spider.data_dict[url] = f.return_data()