def main(client): print 'client run' yield client.ping() jobreport = JobReport() jobreport.spiderid = 'spider001' pkg = yield client.get_seeds(jobreport.spiderid, jobreport) print pkg for seed in pkg.seeds: wait = yield client.get_latency_time('spider001', seed.url) print '%s waits %s seconds' % (seed.url, wait) reactor.stop()
def get_latency_time(self, url): domain = get_domain(url) hostinfo = self.hosts[domain] print hostinfo if hostinfo.last_crawl_time is None: hostinfo.last_crawl_time = time.time() return 0 else: left_time = time.time() - hostinfo.last_crawl_time - \ hostinfo.crawl_interval print "left: ", left_time if left_time > 0: hostinfo.last_crawl_time = time.time() return 0 else: return -left_time def status(self): return "unsupported yet" if __name__ == '__main__': service = MemoryBasedSeedsService() report = JobReport() report.spiderid = 'test001' service.get_seeds(report.spiderid, report)
return def get_latency_time(self, url): domain = get_domain(url) hostinfo = self.hosts[domain] print hostinfo if hostinfo.last_crawl_time is None: hostinfo.last_crawl_time = time.time() return 0 else: left_time = time.time() - hostinfo.last_crawl_time - \ hostinfo.crawl_interval print "left: ", left_time if left_time > 0: hostinfo.last_crawl_time = time.time() return 0 else: return -left_time def status(self): return "unsupported yet" if __name__ == '__main__': service = MemoryBasedSeedsService() report = JobReport() report.spiderid = 'test001' service.get_seeds(report.spiderid, report)