def crawl(start_url): cdb = CrawlDB(settings.DB_FILE) cdb.connect() cdb.enqueue([start_url]) while True: url = cdb.dequeue() if url is False: break if cdb.hasCrawled(url): continue print url status = 0 req = urllib2.Request(str(url)) req.add_header("User-Agent", "couchmap 0.1") request = None try: request = urllib2.urlopen(req) except urllib2.URLError, e: continue except urllib2.HTTPError, e: status = e.code