def crawler(): '''Function that crawls entire website starting from the categories page. Function creates a list of links from all the pages it had crawled.''' to_crawl = [] crawled = [] # amazon category page l = 'http://www.amazon.in/gp/site-directory/ref=nav_shopall_btn' to_crawl.append(s) h = urllib.request.urlopen(l) crawled.append(s) soup = BeautifulSoup(h) for links in soup.fnd_all('a', href=True): if links.get('class', 'nav_a'): li = 'http://www.amazon.com' + links if li not in crawled: to_crawl.append(li) return to_crawl