def crawl_web(seed, max_depth = 10, max_pages = 1000):
	crawled = set()
	crawl_queue =  [] # priority queue ensures that more "shallow" links are handled first
	index = {}
	graph = {}
	counter = itertools.count()

	"""
	Add set of links to queue of sets, crawled_queue.
	Makes sure links is not in the set of already crawled urls.
	"""
	def add_links(links, depth = 0):
		count = next(counter)
		new_links = links.difference(crawled)
		entry = Links(priority = depth, id = count, links = new_links)
		heapq.heappush(crawl_queue, entry)

	"""
	Adds all of the words in page.content to the index of words
	to sets of urls
	"""
	def index_page(page):
		words = page.content.split()
		for word in words:
			if word in index:
				index[word.lower()].add(page.url)
			else:
				index[word.lower()] = {page.url}

	add_links({seed}, 0)
	pages = 0
	while crawl_queue:
		entry = heapq.heappop(crawl_queue)
		to_crawl = entry.links
		depth = entry.priority
		while to_crawl and pages < max_pages:
			url = to_crawl.pop()
			page = Page(url)
			if page.is_valid() and not url in crawled:
				print url, depth
				pages += 1
				crawled.add(url)
				index_page(page)
				graph[url] = page.outgoing_links
				if depth < max_depth:
					add_links(page.outgoing_links, depth + 1)
	return index, graph