def crawl(url): if not url in spider.links_crawled: req = Request(url, headers={'User-Agent': 'Mozilla/5.0'}) with urlopen(req) as urldata: data = urldata.read() f = Finder() f.baseurl=spider.website_url data = data.decode('utf-8') data = html.unescape(data) f.feed(data) f.close() links=f.return_links() spider.links_website.remove(url) for val in links: spider.links_website.append(val) spider.links_crawled.append(url) spider.data_dict[url] = f.return_data()