Esempio n. 1
0
def fetch_ips(q: Queue, validator_queue: Queue):
    logger.debug('fetch_ips...')
    worker = Worker()

    while True:
        try:
            provider: BaseProvider = q.get()()

            provider_name = provider.__class__.__name__

            logger.debug('Get a provider from the provider queue: ' +
                         provider_name)

            for url in provider.urls():
                try:
                    html = worker.get_html(
                        url, render_js=provider.should_render_js())
                except Exception as e:
                    logger.error("worker.get_html failed: %s", e)
                    continue

                if html:
                    proxies = provider.parse(html)

                    for p in proxies:
                        validator_queue.put(p)
                        # logger.debug('Put new proxy ip into queue: {}'.format(p.__str__()))

                    logger.info(
                        ' {}: feed {} potential proxies into the validator queue'
                        .format(provider_name, len(proxies)))
        except (KeyboardInterrupt, InterruptedError, SystemExit):
            worker.stop()
            logger.info('worker_process exited.')
            break
Esempio n. 2
0
def fetch_ips(q: Queue, validator_queue: Queue, run_once=False):
    logger.debug('worker_process started.')
    logger.info('fetching ips...')
    worker = Worker()

    while True:
        try:
            if run_once and q.empty():
                raise SystemExit
                break

            provider: BaseProvider = q.get()

            provider_name = provider.__class__.__name__

            logger.info('Get a provider from the provider queue: ' +
                        provider_name)

            for url in provider.urls():

                html = worker.get_html(url,
                                       render_js=provider.should_render_js())

                if html:
                    proxies = provider.parse(html)

                    for p in proxies:
                        validator_queue.put(p)
                        # logger.debug('Put new proxy ip into queue: {}'.format(p.__str__()))

                    logger.info(
                        ' {}: feed {} potential proxies into the validator queue'
                        .format(provider_name, len(proxies)))
        except (KeyboardInterrupt, InterruptedError, SystemExit):
            worker.stop()
            break
        except pyppeteer.errors.PyppeteerError as e:
            logger.error(
                """pyppeteer.errors.PyppeteerError detected: %s\n
                         'Please make sure you have installed all the dependencies for chromium correctly""",
                e)
            break

    logger.debug('worker_process exited.')