def callbackfunc(request,result): res,resource,pagebuf = result if pagebuf == None : return hreflist = fetchPage.parsePage(pagebuf, resource) for href in hreflist : if PAGESCache.get(href,None) == None : PAGESCache[href] = True else : continue hostname,filename = fetchPage.parse(href) main.putRequest(threadpool.WorkRequest(fetchPage.downPage,args=[hostname,filename],kwds={},callback=callbackfunc)) fetchPage.dealwithResult(res,resource)
def usingOneThread(limit): urlset = open("input.txt","r") start = datetime.datetime.now() for u in urlset: if limit <= 0 : break limit-=1 hostname , filename = fetchPage.parse(u) res= fetchPage.downPage(hostname,filename,0) fetchPage.dealwithResult(res) end = datetime.datetime.now() print "Start at :\t" , start print "End at :\t" , end print "Total Cost :\t" , end - start print 'Total fetched :', statistics.fetched_url
def callbackfunc(request,result): fetchPage.dealwithResult(result[0],result[1])