def from_seed(cls, seed): name = get_domain(seed.url) priority = seed.priority crawl_interval = seed.crawl_interval return cls(name=name, priority=priority, crawl_interval=crawl_interval, total_url_num=1)
def add_seed(self, seed): domain = get_domain(seed.url) hostinfo = self.hosts.get(domain) if hostinfo: hostinfo.total_url_num += 1 else: self.hosts[domain] = HostInfo.from_seed(seed) log.msg('add new host:%s' % domain)
def get_latency_time(self, url): domain = get_domain(url) hostinfo = self.hosts[domain] print hostinfo if hostinfo.last_crawl_time is None: hostinfo.last_crawl_time = time.time() return 0 else: left_time = time.time() - hostinfo.last_crawl_time - \ hostinfo.crawl_interval print "left: ", left_time if left_time > 0: hostinfo.last_crawl_time = time.time() return 0 else: return -left_time