def check_proxy(proxy: str, should_be_valid=True):
    try:
        proxy_validator.retrieve(proxy)
        if not should_be_valid:
            raise AssertionError("Proxy shouldn't be considered as valid")
    except proxy_validator.ValidationError as ex:
        if should_be_valid:
            raise AssertionError(
                "Proxy should be considered as valid. Message: {}".format(ex))
Ejemplo n.º 2
0
    async def process_raw_proxy(self, proxy, collector_id):
        self.logger.debug("processing raw proxy \"{}\"".format(proxy))

        try:
            _, auth_data, domain, port = proxy_validator.retrieve(proxy)
        except proxy_validator.ValidationError as ex:
            self.collectors_logger.error(
                "Collector with id \"{}\" returned bad raw proxy \"{}\". "
                "Message: {}".format(collector_id, proxy, ex)
            )
            return


        # don't care about protocol
        try:
            proxy = await db.get(
                Proxy.select().where(
                    Proxy.auth_data == auth_data,
                    Proxy.domain == domain,
                    Proxy.port == port,
                )
            )

            if proxy.last_check_time + settings.PROXY_NOT_CHECKING_PERIOD >= time.time():
                proxy_short_address = ""
                if auth_data:
                    proxy_short_address += auth_data + "@"

                proxy_short_address += "{}:{}".format(domain, port)

                self.logger.debug(
                    "skipping proxy \"{}\" from collector \"{}\"".format(
                        proxy_short_address, collector_id)
                )
                return
        except Proxy.DoesNotExist:
            pass

        for raw_protocol in range(len(Proxy.PROTOCOLS)):
            while not self.good_proxies_are_processed:
                # TODO: find a better way
                await asyncio.sleep(0.1)

            new_proxy = Proxy()
            new_proxy.raw_protocol = raw_protocol
            new_proxy.auth_data = auth_data
            new_proxy.domain = domain
            new_proxy.port = port

            await self.add_proxy_to_queue(new_proxy, collector_id)
Ejemplo n.º 3
0
async def process_proxy(proxy_url: str):
    async with proxies_semaphore:
        try:
            _, auth_data, domain, port = proxy_validator.retrieve(proxy_url)
        except proxy_validator.ValidationError as ex:
            raise ValueError(
                "Your collector returned bad proxy \"{}\". Message: \"{}\"".
                format(proxy_url, ex))

        is_working = False
        for raw_protocol in range(len(Proxy.PROTOCOLS)):
            proxy_url = "{}://".format(Proxy.PROTOCOLS[raw_protocol])
            if auth_data:
                proxy_url += auth_data + "@"

            proxy_url += domain + ":" + str(port)

            start_checking_time = time.time()
            check_result, checker_additional_info = await proxy_utils.check_proxy(
                proxy_url)
            end_checking_time = time.time()

            if check_result:
                is_working = True
                break

        response_time = end_checking_time - start_checking_time

        color = ''

        if not is_working:
            color = 'red'
        elif response_time < 1:
            color = 'cyan'
        elif response_time < 5:
            color = 'green'
        elif response_time < 10:
            color = 'yellow'
        else:
            color = 'magenta'

        print(colored(' ', on_color='on_' + color), end='')

        sys.stdout.flush()