def main(argv=sys.argv): sites = webinfo.getuncrawled() for i in sites: links = webinfo.getweblinks(i) for j in links: crawl(j) webinfo.markcrawled(i)
def crawlfromdb(): global webinfo,httputil if webinfo.getuncrawled() == False: metainfo = httputil.getmeta(url) links = httputil.getlinks(url) print "meta data size :"+str(len(metainfo))+" links size : "+str(len(links)) webinfo.savemetainfo(url, metainfo) webinfo.saveweblinks(url, links) webinfo.markcrawled(url)