Example #1
0
def worker(queue_verification):
    spider = XiCi()
    for proxy in spider.generator():
        while True:
            if persister.handler().zcount('index_speed', '-inf',
                                          '+inf') > config.PROXY_STORE_NUM:
                time.sleep(config.PROXY_FULL_SLEEP_SEC)
            elif queue_verification.full():
                time.sleep(0.5)
            else:
                queue_verification.put(proxy)
                break
Example #2
0
def worker(queue_verification):
    spiders = [XiCi(), NianShao()]
    page = 1
    while page < 16:
        for spider in spiders:
            for proxy in spider.generator(page):
                if persister.handler().zcount('index_speed', '-inf',
                                              '+inf') > config.PROXY_STORE_NUM:
                    time.sleep(config.PROXY_FULL_SLEEP_SEC)
                elif queue_verification.full():
                    time.sleep(0.5)
                else:
                    queue_verification.put(proxy)
        if page == 15:
            page = 1
        else:
            page += 1
Example #3
0
def worker(queue_verification):
    """
    工作进程,从各个源来爬取代理
    取消了 www.nianshao.me 的代理爬取,此网站应该是没人维护,已经挂了
    :param queue_verification:
    :return:
    """
    # 好的源排在前面优先被爬取
    spiders = [XiCi(), Kuai()]
    page = 1
    while page <= config.PAGE:
        for spider in spiders:
            for proxy in spider.generator(page):
                if persister.handler().zcount('index_speed', '-inf', '+inf') > config.PROXY_STORE_NUM:
                    time.sleep(config.PROXY_FULL_SLEEP_SEC)
                elif queue_verification.full():
                    time.sleep(1)
                else:
                    queue_verification.put(proxy)
        if page == config.PAGE:
            page = 1
        else:
            page += 1