Exemple #1
0
class Getter(object):
    def __init__(self):
        """
        初始化数据库和创建爬虫
        """
        self.redis = RedisClient()
        self.crawler = Crawler()

    def is_over_threshold(self):
        """
        判断代理池是否达到上限
        """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    def run(self):
        print('或取器开始运作...')
        # 判断代理池是否达到上限
        if not self.is_over_threshold():
            # 遍历所有的代理网站生成的各自的解析函数
            for crawler_index in range(self.crawler.__CrawlerCount__):
                # 获取对应索引的回调函数
                callback = self.crawler.__CrawlerFunc__[crawler_index]
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    # print(proxy)
                    self.redis.add(proxy)
Exemple #2
0
class Spider_getter():
    def __init__(self):
        self.redis = RedisClient()
        self.crawler = Spider()

    def is_over_threshold(self):
        """
        判断是否达到了代理池限制
        """
        if self.redis.count() >= POOL_UPPER_THRESHOLD:
            return True
        else:
            return False

    """
        调用爬虫类里面的爬取网站函数,进行爬取,将爬取的结果放入redis中
    """

    def begin(self):
        print('获取器开始执行')
        if not self.is_over_threshold():
            for callback_label in range(self.crawler.__CrawlFuncCount__):
                callback = self.crawler.__CrawlFunc__[callback_label]
                # 获取代理
                proxies = self.crawler.get_proxies(callback)
                sys.stdout.flush()
                for proxy in proxies:
                    self.redis.add(proxy)