예제 #1
0
def main(client):
    print 'client run'
    yield client.ping()

    jobreport = JobReport()
    jobreport.spiderid = 'spider001'
    pkg = yield client.get_seeds(jobreport.spiderid, jobreport)
    print pkg

    for seed in pkg.seeds:
        wait = yield client.get_latency_time('spider001', seed.url)
        print '%s waits %s seconds' % (seed.url, wait)

    reactor.stop()
예제 #2
0
def main(client):
    print 'client run'
    yield client.ping()

    jobreport = JobReport()
    jobreport.spiderid = 'spider001'
    pkg = yield client.get_seeds(jobreport.spiderid, jobreport)
    print pkg

    for seed in pkg.seeds:
        wait = yield client.get_latency_time('spider001', seed.url)
        print '%s waits %s seconds' % (seed.url, wait)

    reactor.stop()
예제 #3
0
    def get_latency_time(self, url):
        domain = get_domain(url)
        hostinfo = self.hosts[domain]
        print hostinfo

        if hostinfo.last_crawl_time is None:
            hostinfo.last_crawl_time = time.time()
            return 0
        else:
            left_time = time.time() - hostinfo.last_crawl_time - \
                hostinfo.crawl_interval

            print "left: ", left_time
            if left_time > 0:
                hostinfo.last_crawl_time = time.time()
                return 0
            else:
                return -left_time

    def status(self):
        return "unsupported yet"


if __name__ == '__main__':
    service = MemoryBasedSeedsService()
    report = JobReport()
    report.spiderid = 'test001'

    service.get_seeds(report.spiderid, report)
예제 #4
0
        return

    def get_latency_time(self, url):
        domain = get_domain(url) 
        hostinfo = self.hosts[domain]
        print hostinfo

        if hostinfo.last_crawl_time is None:
            hostinfo.last_crawl_time = time.time()
            return 0
        else:
            left_time = time.time() - hostinfo.last_crawl_time - \
                hostinfo.crawl_interval

            print "left: ", left_time
            if left_time > 0:
                hostinfo.last_crawl_time = time.time()
                return 0 
            else:
                return -left_time

    def status(self):
        return "unsupported yet"

if __name__ == '__main__':
    service = MemoryBasedSeedsService()
    report = JobReport()
    report.spiderid = 'test001'

    service.get_seeds(report.spiderid, report)