コード例 #1
0
ファイル: app.py プロジェクト: troeshust96/proxy_py
        async def wrap(self, *args, **kwargs):
            good_proxies_count = await db.count(
                Proxy.select().where(Proxy.number_of_bad_checks == 0))

            bad_proxies_count = await db.count(Proxy.select().where(
                Proxy.number_of_bad_checks > 0,
                Proxy.number_of_bad_checks < settings.DEAD_PROXY_THRESHOLD,
            ))

            dead_proxies_count = await db.count(Proxy.select().where(
                Proxy.number_of_bad_checks >= settings.DEAD_PROXY_THRESHOLD,
                Proxy.number_of_bad_checks <
                settings.DO_NOT_CHECK_ON_N_BAD_CHECKS,
            ))

            not_checked_proxies_count = await db.count(Proxy.select().where(
                Proxy.number_of_bad_checks >=
                settings.DO_NOT_CHECK_ON_N_BAD_CHECKS, ))

            response = {
                "bad_proxies_count": bad_proxies_count,
                "good_proxies_count": good_proxies_count,
                "dead_proxies_count": dead_proxies_count,
                "not_checked_proxies_count": not_checked_proxies_count,
            }

            response.update(await func(self, *args, **kwargs))

            return response
コード例 #2
0
async def number_of_proxies_to_process(timestamp):
    good_proxies_count = await db.count(
        Proxy.select().where(
            Proxy.number_of_bad_checks == 0,
            Proxy.next_check_time < timestamp,
        )
    )

    bad_proxies_count = await db.count(
        Proxy.select().where(
            Proxy.number_of_bad_checks > 0,
            Proxy.number_of_bad_checks < settings.DEAD_PROXY_THRESHOLD,
            Proxy.next_check_time < timestamp,
        )
    )

    dead_proxies_count = await db.count(
        Proxy.select().where(
            Proxy.number_of_bad_checks >= settings.DEAD_PROXY_THRESHOLD,
            Proxy.number_of_bad_checks < settings.DO_NOT_CHECK_ON_N_BAD_CHECKS,
            Proxy.next_check_time < timestamp,
        )
    )

    await db.create(
        NumberOfProxiesToProcess,
        timestamp=timestamp,
        good_proxies=good_proxies_count,
        bad_proxies=bad_proxies_count,
        dead_proxies=dead_proxies_count,
    )
コード例 #3
0
ファイル: processor.py プロジェクト: troeshust96/proxy_py
    async def process_proxies(self):
        while True:
            await asyncio.sleep(0.01)
            try:
                # check good proxies
                proxies = await db.execute(
                    Proxy.select().where(
                        Proxy.number_of_bad_checks == 0,
                        Proxy.next_check_time < time.time(),
                    ).order_by(Proxy.next_check_time).limit(settings.NUMBER_OF_CONCURRENT_TASKS)
                )
                if proxies:
                    self.good_proxies_are_processed = False

                await self.add_proxies_to_queue(proxies)

                if proxies:
                    continue

                self.good_proxies_are_processed = True

                # check bad proxies
                proxies = await db.execute(
                    Proxy.select().where(
                        Proxy.number_of_bad_checks > 0,
                        Proxy.number_of_bad_checks < settings.DEAD_PROXY_THRESHOLD,
                        Proxy.next_check_time < time.time(),
                    ).order_by(Proxy.next_check_time).limit(settings.NUMBER_OF_CONCURRENT_TASKS)
                )

                await self.add_proxies_to_queue(proxies)

                if proxies:
                    continue

                # check dead proxies
                proxies = await db.execute(
                    Proxy.select().where(
                        Proxy.number_of_bad_checks >= settings.DEAD_PROXY_THRESHOLD,
                        Proxy.number_of_bad_checks < settings.DO_NOT_CHECK_ON_N_BAD_CHECKS,
                        Proxy.next_check_time < time.time(),
                    ).order_by(Proxy.next_check_time).limit(settings.NUMBER_OF_CONCURRENT_TASKS)
                )

                await self.add_proxies_to_queue(proxies)
            except KeyboardInterrupt as ex:
                raise ex
            except BaseException as ex:
                self.logger.exception(ex)
                if settings.DEBUG:
                    raise ex

                await asyncio.sleep(settings.SLEEP_AFTER_ERROR_PERIOD)
コード例 #4
0
ファイル: app.py プロジェクト: troeshust96/proxy_py
    async def get_best_http_proxy(self, request):
        proxy_address = (await db.get(Proxy.select().where(
            Proxy.number_of_bad_checks == 0,
            Proxy.raw_protocol == Proxy.PROTOCOLS.index("http"),
        ).order_by(Proxy.response_time))).address

        return web.Response(text=proxy_address)
コード例 #5
0
    async def process_proxy(self, raw_protocol: int, auth_data: str,
                            domain: str, port: int, collector_id):
        async with self.proxies_semaphore:
            self.logger.debug(
                "start processing proxy {}://{}@{}:{} with collector id {}".
                format(raw_protocol, auth_data, domain, port, collector_id))

            if auth_data is None:
                auth_data = ""

            proxy_url = "{}://".format(Proxy.PROTOCOLS[raw_protocol])
            if auth_data:
                proxy_url += auth_data + "@"

            proxy_url += domain + ":" + str(port)

            start_checking_time = time.time()
            check_result, checker_additional_info = await proxy_utils.check_proxy(
                proxy_url)
            end_checking_time = time.time()

            if check_result:
                self.logger.debug("proxy {0} works".format(proxy_url))
                await self.create_or_update_proxy(
                    raw_protocol,
                    auth_data,
                    domain,
                    port,
                    start_checking_time,
                    end_checking_time,
                    checker_additional_info,
                )
            else:
                self.logger.debug("proxy {0} doesn't work".format(proxy_url))
                try:
                    proxy = await db.get(Proxy.select().where(
                        Proxy.raw_protocol == raw_protocol,
                        Proxy.auth_data == auth_data,
                        Proxy.domain == domain,
                        Proxy.port == port,
                    ))

                    proxy.last_check_time = int(time.time())
                    proxy.next_check_time = (proxy.last_check_time +
                                             proxy.checking_period)
                    proxy.number_of_bad_checks += 1
                    proxy.uptime = int(time.time())

                    if proxy.number_of_bad_checks >= settings.DEAD_PROXY_THRESHOLD:
                        proxy.bad_uptime = int(time.time())

                    if (proxy.number_of_bad_checks ==
                            settings.DO_NOT_CHECK_ON_N_BAD_CHECKS):
                        self.logger.debug(
                            "proxy {} isn't checked anymore".format(
                                proxy.to_url()))

                    await db.update(proxy)
                except Proxy.DoesNotExist:
                    pass
コード例 #6
0
ファイル: app.py プロジェクト: zyabik-007/proxy_py
    async def get_proxies_html(self, request):
        proxies = await db.execute(Proxy.select().where(
            Proxy.number_of_bad_checks == 0).order_by(Proxy.response_time))
        proxies = list(proxies)
        current_timestamp = time.time()

        return {
            "proxies": [{
                "address":
                proxy.address,
                "response_time":
                proxy.response_time /
                1000 if proxy.response_time is not None else None,
                "uptime":
                datetime.timedelta(seconds=int(current_timestamp -
                                               proxy.uptime))
                if proxy.uptime is not None else None,
                "bad_uptime":
                datetime.timedelta(seconds=int(current_timestamp -
                                               proxy.bad_uptime))
                if proxy.bad_uptime is not None else None,
                "last_check_time":
                proxy.last_check_time,
                "checking_period":
                proxy.checking_period,
                "number_of_bad_checks":
                proxy.number_of_bad_checks,
                "bad_proxy":
                proxy.bad_proxy,
                "white_ipv4":
                proxy.white_ipv4,
                "location":
                proxy.location,
            } for proxy in proxies]
        }
コード例 #7
0
ファイル: statistics.py プロジェクト: le-fantosme/proxy_py
async def create_proxy_count_item(timestamp):
    good_proxies_count = await db.count(
        Proxy.select().where(Proxy.number_of_bad_checks == 0))
    bad_proxies_count = await db.count(Proxy.select().where(
        Proxy.number_of_bad_checks > 0,
        Proxy.number_of_bad_checks < settings.DEAD_PROXY_THRESHOLD,
    ))
    dead_proxies_count = await db.count(Proxy.select().where(
        Proxy.number_of_bad_checks >= settings.DEAD_PROXY_THRESHOLD))

    await db.create(
        ProxyCountItem,
        timestamp=timestamp,
        good_proxies_count=good_proxies_count,
        bad_proxies_count=bad_proxies_count,
        dead_proxies_count=dead_proxies_count,
    )
コード例 #8
0
ファイル: processor.py プロジェクト: troeshust96/proxy_py
    async def process_raw_proxy(self, proxy, collector_id):
        self.logger.debug("processing raw proxy \"{}\"".format(proxy))

        try:
            _, auth_data, domain, port = proxy_validator.retrieve(proxy)
        except proxy_validator.ValidationError as ex:
            self.collectors_logger.error(
                "Collector with id \"{}\" returned bad raw proxy \"{}\". "
                "Message: {}".format(collector_id, proxy, ex)
            )
            return


        # don't care about protocol
        try:
            proxy = await db.get(
                Proxy.select().where(
                    Proxy.auth_data == auth_data,
                    Proxy.domain == domain,
                    Proxy.port == port,
                )
            )

            if proxy.last_check_time + settings.PROXY_NOT_CHECKING_PERIOD >= time.time():
                proxy_short_address = ""
                if auth_data:
                    proxy_short_address += auth_data + "@"

                proxy_short_address += "{}:{}".format(domain, port)

                self.logger.debug(
                    "skipping proxy \"{}\" from collector \"{}\"".format(
                        proxy_short_address, collector_id)
                )
                return
        except Proxy.DoesNotExist:
            pass

        for raw_protocol in range(len(Proxy.PROTOCOLS)):
            while not self.good_proxies_are_processed:
                # TODO: find a better way
                await asyncio.sleep(0.1)

            new_proxy = Proxy()
            new_proxy.raw_protocol = raw_protocol
            new_proxy.auth_data = auth_data
            new_proxy.domain = domain
            new_proxy.port = port

            await self.add_proxy_to_queue(new_proxy, collector_id)
コード例 #9
0
def proxy_generator():
    for proxy_acc in Proxy.select().where(Proxy.used == False):
        proxy_acc.used = True
        proxy_acc.save()
        yield proxy_acc.host_port