Exemple #1
0
def loop_exception_hook(self, loop, context):
    """
    We are having trouble using log.exception() with exc_info=True and seeing
    the stack trace, so we have a custom log.traceback() method for now.

    >>> self.log.exception(exc, exc_info=True, extra=extra)

    Not sure if we will keep it or not, since it might add some extra
    customization availabilities, but it works right now.
    """
    log = logger.get('handle_exception', sync=True)
    log.debug('Handling Exception')

    # The only benefit including the frame has is that the filename
    # will not be in the logger, it will be in the last place before the
    # logger and this statement.
    try:
        log.traceback(
            context['exception'].__class__,
            context['exception'],
            context['exception'].__traceback__,
        )
    except BlockingIOError:
        log.warning('Could Not Output Traceback due to Blocking IO')

    log.debug('Shutting Down in Exception Handler')
    loop.run_until_complete(shutdown(loop))
class ManagedProxyPool(SimpleProxyPool):

    __NAME__ = "Managed Proxy Pool"
    log = logger.get(__name__, __NAME__)

    async def put(self, proxy, evaluate=True, prepopulation=False):
        """
        [x] NOTE:
        ---------
        We do not want to check confirmation based on the threshold and horizon,
        because that is the definition for confirmed proxies, which should not
        be put in the general pool.
        """
        if evaluate:
            evaluation = proxy.evaluate_for_pool()
            if evaluation.passed:
                await super(ManagedProxyPool, self).put(proxy)
            else:
                # Do not log during prepopulation.
                if not prepopulation:
                    if settings.logging.log_proxy_queue:
                        self.log.debug(f'Cannot Add Proxy to {self.__NAME__}', extra={
                            'other': str(evaluation)
                        })

                    if proxy.confirmed():
                        self.log.warning('Removing Proxy That Was Confirmed from Pool', extra={
                            'proxy': proxy,
                        })
Exemple #3
0
import os
from plumbum.path import LocalPath

from tortoise import fields
from tortoise.models import Model
from tortoise.exceptions import OperationalError, DoesNotExist

from instattack import settings

from instattack.lib import logger
from instattack.lib.utils import stream_raw_data, read_raw_data

from instattack.core.exceptions import DirExists, UserFileExists
from instattack.core.passwords import password_gen

log = logger.get(__name__, subname='User')


class UserAttempt(Model):

    id = fields.IntField(pk=True)
    password = fields.CharField(max_length=100)
    user = fields.ForeignKeyField('models.User', related_name='attempts')
    last_attempt = fields.DatetimeField(null=True, auto_now=True)
    success = fields.BooleanField(default=False)
    num_attempts = fields.IntField(default=1)

    class Meta:
        unique_together = ('password', 'user')

import asyncio
import contextlib
from proxybroker import Broker

from instattack import settings

from instattack.lib import logger
from instattack.core.models import Proxy


log = logger.get(__name__, subname='Proxy Broker')


class ProxyBroker(Broker):

    __name__ = 'Proxy Broker'

    def __init__(self, loop, limit=None):
        """
        If we want the broker to run until we manually stop it, we still need
        to set a limit on initialization for the Proxy Broker package, so we
        set that arbitrarily high.

        [x] TODO:
        ---------
        The limit of the broker (i.e. the collect limit) needs to eventually
        be tied in with the pool in some way, and collection needs to be triggered
        when we start runnign low on prepopulated proxies.
        """
        self.loop = loop
        self._stopped = False
class SimpleProxyPool(AbstractProxyPool):

    __NAME__ = "Simple Proxy Pool"
    log = logger.get(__name__, __NAME__)
Exemple #6
0
import asyncio
import re

from instattack import settings

from instattack.lib import logger

from instattack.core.models import InstagramResult
from instattack.core.exceptions import (InstagramResultError, HTTP_RESPONSE_ERRORS,
    HTTP_REQUEST_ERRORS)


log = logger.get(__name__)


class client:

    def __init__(self, loop, on_error, on_success):
        self.loop = loop
        self.on_error = on_error
        self.on_success = on_success

    def post(self, session, url, proxy, headers=None, data=None):
        return session.post(
            url,
            headers=headers,
            data=data,
            ssl=False,
            proxy=proxy.url  # Only Http Proxies Are Supported by AioHTTP
        )
from termx.ext.utils import progress

from instattack import settings

from instattack.lib import logger
from instattack.lib.utils import limit_as_completed

from instattack.core.proxies import SimpleProxyPool, SimpleProxyManager

from .base import AbstractRequestHandler
from .client import train_client

__all__ = ('TrainHandler', )

log = logger.get(__name__, 'Train Handler')


class TrainHandler(AbstractRequestHandler):

    __name__ = 'Train Handler'
    __proxy_manager__ = SimpleProxyManager
    __proxy_pool__ = SimpleProxyPool
    __client__ = train_client

    async def train(self, limit=None, confirmed=False):
        try:
            results = await asyncio.gather(
                self._train(limit=limit),
                self.proxy_manager.start(limit=limit, confirmed=confirmed),
            )
class BrokeredProxyManager(SimpleProxyManager):

    __NAME__ = "Brokered Proxy Manager"
    log = logger.get(__name__, __NAME__)

    def __init__(self, *args, start_event):
        super(BrokeredProxyManager, self).__init__(*args)
        self.broker = ProxyBroker(self.loop)
        self.start_event = start_event

    async def stop(self):
        if self.broker._started:
            self.broker.stop()

    async def collect(self):
        """
        Retrieves proxies from the broker and converts them to our Proxy model.
        The proxy is then evaluated as to whether or not it meets the standards
        specified and conditionally added to the pool.

        [x] TODO:
        ---------
        We need to move this to the appropriate place, most likely in the manager
        class.

        [!] IMPORTANT:
        -------------
        We should maybe make this more dynamic, and add proxies from the broker
        when the pool drops below the limit.

        Figure out how to make the collect limit more dynamic, or at least calculated
        based on the prepopulated limit.
        >>> collect_limit = max(self._maxsize - self.qsize(), 0)
        """
        self.log.debug('Collecting Proxies')

        count = 0
        collect_limit = 10000  # Arbitrarily high for now.

        async for proxy, created in self.broker.collect(save=True):
            evaluation = proxy.evaluate_for_pool()
            if evaluation.passed:
                await self.put(proxy)

                # Set Start Event on First Proxy Retrieved from Broker
                if self.start_event and not self.start_event.is_set():
                    self.start_event.set()
                    self.log.debug('Setting Start Event', extra={
                        'other': 'Broker Started Sending Proxies'
                    })

            if collect_limit and count == collect_limit:
                break
            count += 1

    async def start(self, limit=None, confirmed=False):
        """
        Retrieves proxies from the queue that is populated from the Broker and
        then puts these proxies in the prioritized heapq pool.

        Prepopulates proxies if the flag is set to put proxies that we previously
        saved into the pool.

        [x] TODO:
        ---------
        We are eventually going to need the relationship between prepopulation
        and collection to be more dynamic and adjust, and collection to trigger
        if we are running low on proxies.
        """
        await super(BrokeredProxyManager, self).start(limit=limit, confirmed=confirmed)

        if settings.proxies.pool.collect:
            # Pool will set start event when it starts collecting proxies.
            await self.pool.collect()
        else:
            if self.start_event.is_set():
                raise ProxyPoolError('Start Event Already Set')

            self.start_event.set()
            self.log.debug('Setting Start Event', extra={
                'other': 'Proxy Pool Prepopulated'
            })
class SmartProxyManager(BrokeredProxyManager):

    __NAME__ = "Smart Proxy Manager"
    log = logger.get(__name__, __NAME__)

    def __init__(self, loop, pool_cls, start_event=None):
        super(SmartProxyManager, self).__init__(loop, pool_cls, start_event=start_event)
        self.lock = asyncio.Lock()

        self.confirmed = ConfirmedQueue(self.pool, self.lock)
        self.hold = HoldQueue(self.pool, self.lock)

        self.confirmed.hold = self.hold
        self.hold.confirmed = self.confirmed

    async def start(self, **kwargs):
        await super(SmartProxyManager, self).start(**kwargs)
        self.log.info(f"Prepopulated {self.confirmed.num_proxies} Confirmed Proxies")

    @property
    def num_proxies(self):
        return (super(SmartProxyManager, self).num_proxies +
            self.confirmed.num_proxies + self.hold.num_proxies)

    async def on_proxy_error(self, proxy, err):
        """
        Callback for case when request notices a response error.
        """
        await super(SmartProxyManager, self).on_proxy_error(proxy, err)
        await self.put(proxy)

    async def on_proxy_success(self, proxy):
        """
        There is a chance that the proxy is already in the good queue... our
        overridden put method handles that.
        """
        await super(SmartProxyManager, self).on_proxy_success(proxy)
        await self.put(proxy)

    async def get(self):
        """
        [x] NOTE:
        --------
        Logging for pulling proxies out of queues is done in the individual
        queues themselves.
        """
        proxy = await self.confirmed.get()
        if proxy:
            return proxy

        proxy = await self.hold.get()
        if proxy:
            return proxy

        return await super(SmartProxyManager, self).get()

    async def put(self, proxy):
        """
        Determines whether or not proxy should be put in the Confirmed Queue,
        the Hold Queue or back in the general pool.  If the proxy is not supposed
        to be held or confirmed, evaluates whether or not the proxy meets the
        specified standards before putting in the general pool.

        -  If `last_request` is None, that means the put method is being called
           from prepopoulation and the proxy should either be put in the General
           Pool or the Confirmed Qeuue.

        -  If `last_request` is not None, and the proxy queue_id designates it is
           in Confirmed Queue or Hold Queue, have Sub Queue handle the rest of the
           put method.

        - If `last_request` is not None and the queue_id does not designate the
          Hold Queue or the Confirmed Queue, put in General Pool.

        [x] NOTE:
        --------
        There may be an edge case where the last request error is a timeout
        error but enough time has passed for it to be immediately usable
        again -> this just means that it will be pulled from Hold Queue faster
        than it otherwise would though.
        """
        last_request = proxy.requests(-1, active=True)
        if last_request is None:
            assert proxy.queue_id is None

            if proxy.confirmed():
                await self.confirmed.raise_if_present(proxy)
                await self.confirmed.put(proxy, prepopulation=True)
            else:
                # Maybe Limit Evaluation if Proxy was Ever Confirmed?
                await super(SmartProxyManager, self).put(proxy, prepopulation=True)

        else:
            assert proxy.queue_id is not None
            if proxy.queue_id == 'confirmed':
                await self.put_from_confirmed(proxy)

            elif proxy.queue_id == 'hold':
                await self.put_from_hold(proxy)

            else:
                await self.put_from_pool(proxy)

    async def put_from_confirmed(self, proxy):
        """
        Takes a proxy that was taken from the Confirmed Queue and determines how to
        handle it based on the new request appended to the proxy's history.

        Since we do not remove proxies from the Confirmed Queue until they fail,
        we do not need to call `self.confirmed.put(proxy)` to maintain it, we just
        don't take it out.

        [x] NOTE:
        ---------
        Proxies in the manager only move up, and not down (with the exception of
        a proxy moving from confirmed to held).  Once a proxy leaves the pool, to
        either go into the Hold Queue or the Confirmed Queue, leaving either one
        of those queues is an indication that we don't want to use it again, so
        we do not put it back in the General Pool.

        [x] NOTE:
        ---------
        Because proxies in the ConfirmedQueue are used simultaneously by multiple
        threads at the same time, and a proxy from the Confirmed Queue is likely to
        cause subsequent successful responses, it is likely that the proxy is
        already in the ConfirmedQueue.

        There seems to be more problems with the Hold Queue and threading race
        conditions than with the Confirmed Queue.

        [x] TODO:
        --------
        Remove sanity checks `raise_if_` once we are more confident in operation
        of the manager.
        """

        # We are temporarily removing proxies from the Confirmed Queue to test out
        # the timeout issues.
        # await self.confirmed.raise_if_missing(proxy)
        await self.hold.warn_if_present(proxy)

        last_request = proxy.requests(-1, active=True)

        # [x] TODO: Figure out a better way of handling this situation.
        # This check has to be done first: proxy can be confirmed over a horizon
        # but still have a more recent timeout error.
        if last_request.was_timeout_error:

            # Since we are removing from Confirmed right now, we don't have to
            # move it, just to put it in.
            await self.hold.put(proxy)
            # await self.confirmed.move_to_hold(proxy)

        # Proxy Still Confirmed Over Horizon - Dont Move Out Yet
        elif proxy.confirmed():
            # Temporary
            if not last_request.confirmed:
                pass
                # errs = proxy.errors_in_horizon()
                # if len(errs) != 0:
                #     if config['instattack']['log.logging']['log_proxy_queue']:
                #         self.log.debug(f'Maintaining Proxy in {self.__NAME__}', extra={
                #             'proxy': proxy,
                #             'data': {
                #                 'Num Errors': len(errs),
                #             }
                #         })

        else:
            # Proxy No Longer Confirmed -> Discard by Removing
            # Temporarily Removing from Confirmed on Get
            # await self.confirmed.remove(proxy)
            pass

    async def put_from_hold(self, proxy):
        """
        Takes a proxy that was taken from the Hold Queue and determines how to
        handle it based on the new request appended to the proxy's history.

        [x] NOTE:
        ---------
        Since the proxy is already in the Hold Queue, the second to last request
        should be a timeout error, otherwise it would not have been sent to the
        Hold Queue to begin with.

        [x] NOTE:
        ---------
        Proxies in the manager only move up, and not down (with the exception of
        a proxy moving from confirmed to held).
            - If a proxy is in the Hold Queue and times out, but then returns a
              confirmed request, we move back up to the Confirmed Queue.
            - If a proxy is in the Hold Queue and returns an error, or times out,
              we discard, not move back to the General Pool.

        [x] TODO:
        --------
        Remove sanity checks `raise_if_` once we are more confident in operation
        of the manager.
        """
        # await self.hold.raise_if_present(proxy)  # Racee Condition - Another thread might be you to it.
        #
        # Don't know why this is failing.
        # await self.confirmed.raise_if_present(proxy)

        # [x] TODO:
        # This Keeps Failing - Only thing I can think of is a Race Condition?
        # We will log warning for now, hopefully find bug.
        last_last_request = proxy.requests(-2, active=True)
        if not last_last_request.was_timeout_error:
            e = ProxyPoolError(
                f"Second to Last Request Should be Timeout Error, "
                f"Not {last_last_request.error}"
            )
            self.log.warning(e)

        last_request = proxy.requests(-1, active=True)

        # Request Confirmed - Move from Hold to Confirmed ^
        if last_request.confirmed:
            # Why were we moving it?  Proxy was removed from hold, it's not
            # in there anymore...
            await self.confirmed.put(proxy)
            # await self.hold.move_to_confirmed(proxy)

        # Another Timeout Error - Increment Timeout and Check Max
        elif last_request.was_timeout_error:
            if last_request.error == last_last_request.error:
                try:
                    proxy.increment_timeout(last_request.error)

                # Proxy Maxes Out -> Discard
                # Should we maybe limit this to discarding only proxies that don't
                # have any recent confirmations?
                except ProxyMaxTimeoutError as e:
                    self.log.info(e)
                    proxy.reset_timeout(last_request.error)
                    pass
                else:
                    # Typical Race Conditions w Hold Queue
                    await self.hold.safe_put(proxy)
            else:
                # Typical Race Conditions w Hold Queue
                await self.hold.safe_put(proxy)
        else:
            # Proxy No Longer Holdable -> Discard
            pass

    async def put_from_pool(self, proxy):
        """
        Takes a proxy that is currently in the General Pool and determines how to
        handle it based on the new request appended to the proxy's history.

        This involves either:
            (1) Putting proxy in Confirmed Queue if it resulted in a successful
                response.
            (2) Putting proxy in Hold Queue if it resulted in a timeout error.
            (3) Putting back in pool.

        [x] TODO:
        --------
        Since we do not return proxies from the Confirmed Queue or Hold Queue
        back to the General Pool, should we discard proxies that have errors
        after being taken out of General Pool?

        For the above, the evaluation will determine whether or not the proxy
        should stay in the General Pool, but this is a little counter-intuitive,
        since we don't apply that same evaluation logic to determine whether or
        not to keep the proxy after it fails in the Confirmed Queue or Hold Queue.

        [x] TODO: Race Condition
        --------
        This is unusual:
        >>>  await self.hold.warn_if_present(proxy)

        This means that a proxy in the pool is already in the Hold Queue - did
        some other thread already put it in there?  Was it not fully removed from
        the Hold Queue?
        """
        await self.hold.warn_if_present(proxy)
        await self.confirmed.raise_if_present(proxy)

        last_request = proxy.requests(-1, active=True)

        if last_request.confirmed:
            await self.confirmed.put(proxy)
        else:
            # [x] NOTE: There really shouldn't be any confirmed proxies in the
            # general pool unless the immediate last request was confirmed.  Once
            # confirmed proxies leave the general pool, they stay out.
            if proxy.confirmed():
                raise ProxyPoolError(
                    f"Should Not be Confirmed Proxy in {self.pool.__NAME__}")

            if last_request.was_timeout_error:
                # Typical Race Conditions w Hold Queue
                await self.hold.safe_put(proxy)
            else:
                await super(SmartProxyManager, self).put(proxy)
class SimpleProxyManager(ProxyManagerInterface):

    __NAME__ = "Simple Proxy Manager"
    log = logger.get(__name__, __NAME__)

    def __init__(self, loop, pool_cls):

        self.loop = loop
        self.pool = pool_cls(loop)
        self.original_num_proxies = 0

    async def start(self, limit=None, confirmed=False):
        """
        Retrieves proxies from the queue that is populated from the Broker and
        then puts these proxies in the prioritized heapq pool.

        Prepopulates proxies if the flag is set to put proxies that we previously
        saved into the pool.

        [x] TODO:
        ---------
        We are eventually going to need the relationship between prepopulation
        and collection to be more dynamic and adjust, and collection to trigger
        if we are running low on proxies.
        """
        self.log.debug('Prepopulating Proxies')

        filter = {}
        if confirmed:
            filter = {'confirmed': True}

        for proxy in await Proxy.filter(**filter).all():
            if limit and self.num_proxies == limit:
                break

            await self.put(proxy)
            self.original_num_proxies += 1

        if self.num_proxies == 0:
            self.log.error('No Proxies to Prepopulate')
            return

        self.log.info(f"Prepopulated {self.num_proxies} Proxies")

    async def stop(self):
        pass

    async def get(self):
        return await self.pool.get()

    async def put(self, proxy, **kwargs):
        await self.pool.put(proxy, **kwargs)

    async def on_proxy_error(self, proxy, exc):
        """
        For training proxies, we only care about storing the state variables
        on the proxy model, and we do not need to put the proxy back in the
        pool, or a designated pool.
        """
        if settings.logging.log_request_errors:
            self.log.error(exc, extra={'proxy': proxy})

        req = ProxyRequest(
            error=exc.__subtype__,
            status_code=exc.status_code,
        )
        proxy.add_failed_request(req)

    async def on_proxy_success(self, proxy):
        """
        For training proxies, we only care about storing the state variables
        on the proxy model, and we do not need to put the proxy back in the
        pool, or a designated pool.
        """
        req = ProxyRequest()
        proxy.add_successful_request(req)
Exemple #11
0
import asyncio
import aiohttp

from instattack import settings
from instattack.lib import logger
from instattack.lib.utils import limit_as_completed, cancel_remaining_tasks

from instattack.core.exceptions import TokenNotFound, PoolNoProxyError
from instattack.core.proxies import SmartProxyManager, ManagedProxyPool

from .base import AbstractRequestHandler
from .client import instagram_client

log = logger.get(__name__, 'Login Handler')


class AbstractLoginHandler(AbstractRequestHandler):

    __client__ = instagram_client

    def __init__(self, *args, **kwargs):
        super(AbstractLoginHandler, self).__init__(*args, **kwargs)

        self.user = self.loop.user
        self.attempts_to_save = []

        self._cookies = None
        self._token = None

    @property
    def token(self):
Exemple #12
0
The above is problematic if there is a client disconnect.

To remedy:

(1)  For client disconnections/fighting against Task cancellation I would
     recommend asyncio.shield. That's why it exists
(2)  In case user wants a way to control tasks in a more granular way, then I
     would recommend aiojobs
(3)  Ofc if user wants to execute background tasks (inside the same loop) I
      would also recommend aiojobs
"""

__all__ = ('AttackHandler', )

log = logger.get(__name__, 'Attack Handler')


class AttackHandler(AbstractLoginHandler):

    __name__ = 'Attack Handler'
    __proxy_manager__ = SmartProxyManager
    __proxy_pool__ = ManagedProxyPool

    def __init__(self, *args, **kwargs):
        super(AttackHandler, self).__init__(*args, **kwargs)

        self.passwords = asyncio.Queue()
        self.num_passwords = 0
        self.stop_event = asyncio.Event()
Exemple #13
0
class ConfirmedQueue(ProxyQueue):

    __NAME__ = 'Confirmed Queue'
    log = logger.get(__name__, __NAME__)

    __queueid__ = 'confirmed'

    def __init__(self, pool, lock):
        super(ConfirmedQueue, self).__init__(lock)
        self.pool = pool
        self.rotating_index = 0
        self.hold = None

        self.times_used = collections.Counter()
        self.mapped = {}

    def validate_for_queue(self, proxy):
        return True

    def raise_for_queue(self, proxy):
        """
        Validates whether or not the proxy is allowed to be in the queue.

        For the Confirmed Queue, we do have to do this on get() and put(),
        since proxies are not completely removed from the queue in the
        get() method.
        """
        if not proxy.confirmed():
            raise ProxyPoolError(f"Found Unconfirmed Proxy in {self.__NAME__}")

        # THIS CAN HAPPEN
        # If one thread times out with a proxy, it will set it's value to a timeout
        # value before it is necessarily removed from the confirmed queue.
        # last_request = proxy.last_request(active=True)
        # if last_request and last_request.was_timeout_error:
        #     raise ProxyPoolError(f"Found Holdable Proxy in {self.__NAME__}")

    async def get(self):
        """
        Retrieves a proxy from the queue and removes it from the queue after
        a certain number of threads have retrieved it.

        This allows multiple threads access to the same proxy, but does not get
        stuck maxing out with timeout errors due to a large number of threads
        simultaneously using the same proxy.
        """
        try:
            proxy = await self._get_proxy()
        except QueueEmpty as e:
            self.log.warning(e)
            return None
        else:
            self.raise_for_queue(proxy)

            # Temporarily Removing Proxies from Confirmed Queue:
            # Issue is that if a proxy from the confirmed queue raises a too many
            # requests exception, it is likely being used at the same time for
            # several requests, which causes it to timeout immediately.
            # await self.remove(proxy)

            return proxy

    def _delete_nth(self, n):
        self._queue.rotate(-n)
        self._queue.popleft()
        self._queue.rotate(n)

    def _get_nth(self, n):
        self._queue.rotate(-n)
        proxy = self._queue.popleft()
        self._queue.rotate(n)
        return proxy

    async def remove(self, proxy):
        """
        [x] Note:
        ---------
        Because proxies in the ConfirmedQueue are used simultaneously by multiple
        threads at the same time (not the case for HeldQueue), it is possible
        that the proxy is already removed from the ConfirmedQueue by the time another
        thread determines it should be removed.
        """
        async with self.lock:
            if proxy not in self._queue:
                raise ProxyPoolError(
                    f'Cannot Remove Proxy from {self.__NAME__}',
                    extra={'proxy': proxy})

            ind = self._queue.index(proxy)
            self._delete_nth(ind)
            del self.times_used[proxy.id]
            del self.mapped[proxy.id]

    async def _get_proxy(self):
        """
        [x] TODO:
        --------
        There may be a smarter way to do this, that involves staggering the
        retrieval of the same proxy with asyncio.sleep() based on the number
        of times it was already pulled out.
        """
        async with self.lock:
            least_common = self.times_used.most_common()
            if not least_common:
                raise QueueEmpty(self)

            proxy_id, count = least_common[:-2:-1][0]
            proxy = self.mapped[proxy_id]

            # As proxies are confirmed and put back in, this might cause issues,
            # since there may wind up being more proxies than the limit of giving
            # out.  We will have to decrement the count when a proxy is put back
            # in.
            if count >= MAX_CONFIRMED_PROXIES:
                raise ProxyPoolError('Count %s exceeds %s.' %
                                     (count, MAX_CONFIRMED_PROXIES))

            # As proxies are confirmed and put back in,
            if count == MAX_CONFIRMED_PROXIES - 1:
                await self.remove(proxy)

                if settings.logging.log_proxy_queue:
                    self.log.debug(
                        f'Returning & Removing Proxy from {self.__NAME__}',
                        extra={
                            'data': {
                                'Times Used':
                                f"{self.times_used[proxy.id] or 0} (Last Allowed)",
                                f'{self.__NAME__} Size': self.qsize(),
                            },
                            'proxy': proxy,
                        })
            else:
                if settings.logging.log_proxy_queue:
                    self.log.debug(f'Returning Proxy from {self.__NAME__}',
                                   extra={
                                       'data': {
                                           'Times Used':
                                           self.times_used[proxy.id] or 0,
                                           f'{self.__NAME__} Size':
                                           self.qsize(),
                                       },
                                       'proxy': proxy,
                                   })

            self.times_used[proxy.id] += 1
            return proxy

    async def move_to_hold(self, proxy):
        last_request = proxy.last_request(active=True)
        assert last_request.was_timeout_error

        await self.remove(proxy)
        await self.hold.put(proxy)

        if settings.logging.log_proxy_queue:
            self.log.debug(
                f'Moving Proxy from {self.__NAME__} to {self.hold.__NAME__}',
                extra={
                    'data': {
                        'Last Request': last_request.error,
                        f"{self.__NAME__} Size": self.qsize(),
                        f"{self.hold.__NAME__} Size": self.hold.qsize(),
                    },
                    'proxy': proxy,
                })

    async def put(self, proxy, prepopulation=False):
        """
        [x] TODO:
        --------
        Depending on treatment of Confirmed Queue when proxies exist in queue
        already, this might be able to moved to base class.

        [x] Note:
        ---------
        Because proxies in the ConfirmedQueue are used simultaneously by multiple
        threads at the same time, and a proxy from the ConfirmedQueue is likely to
        cause subsequent successful responses, it is likely that the proxy is
        already in the ConfirmedQueue.

        This means we have to check before we put in.
        """
        async with self.lock:
            # This might happen a lot because confirmed proxies are not removed
            # from the queue!
            if await self.contains(proxy):
                raise ProxyPoolError(f'Cannot Add Proxy to {self.__NAME__}')

            if proxy.id in self.times_used:
                raise ProxyPoolError('Did not expect proxy to be in count.')

            # Have to initialize so that the _get_proxy() method can find it.
            self.times_used[proxy.id] = 0
            self.mapped[proxy.id] = proxy

            await super(ConfirmedQueue, self).put(proxy,
                                                  prepopulation=prepopulation)
Exemple #14
0
class HoldQueue(ProxyQueue):

    __NAME__ = 'Hold Queue'
    log = logger.get(__name__, __NAME__)

    __queueid__ = 'hold'

    def __init__(self, pool, lock):
        super(HoldQueue, self).__init__(lock)
        self.pool = pool
        self.confirmed = None

    def validate_for_queue(self, proxy):
        return True

    def raise_for_queue(self, proxy):
        """
        Validates whether or not the proxy is allowed to be in the queue.

        For the Hold Queue, we do not have to do this on get(), since we are
        removing the proxy in the put() method, vs. the Confirmed Queue, where
        the proxy stays in the Queue, so has to be validated on get() as well.
        """
        # Proxy can be confirmed over horizon even if the most recent error is a
        # a timeout error, so we cannot do this:
        # >>> if proxy.confirmed_over_threshold_in_horizon():
        # >>>   raise ProxyPoolError("Hold Queue: Found Confirmed Proxy")

        # Most Recent Request Confirmed -> Always Should be in Confirmed Queue
        last_request = proxy.last_request(active=True)
        if last_request.confirmed:
            raise ProxyPoolError(f"Found Confirmed Proxy in {self.__NAME__}")

        if not last_request.was_timeout_error:
            raise ProxyPoolError(
                f"Found Non Holdable Proxy in {self.__NAME__}")

    async def get(self):
        async with self.lock:
            for proxy in self._queue:

                # This raise might be overkill?
                self.raise_for_queue(proxy)

                # Do we need to call self.recycle() at some point?  Do we run
                # the risk of proxies getting stuck in here after their timeouts
                # have passed?
                last_request = proxy.last_request(active=True)
                self.log.critical(
                    f'Checking if Hold Proxy OK {proxy.time_since_used} > {proxy.timeout(last_request.error)}?',
                    extra={'proxy': proxy})  # noqa
                if proxy.time_since_used > proxy.timeout(last_request.error):
                    # await self.remove(proxy)
                    self.log.critical('Hold Proxy Ok')
                    self._queue.remove(proxy)
                    return proxy

        if len(self._queue) == 0:
            self.log.info(f'No Proxy Ready for Use in {self.__NAME__}')
        return None

    async def move_to_confirmed(self, proxy):
        await self.remove(proxy)
        await self.confirmed.put(proxy)

        if settings.logging.log_proxy_queue:
            self.log.debug(
                f'Moving Proxy from {self.__NAME__} to {self.confirmed.__NAME__}',
                extra={
                    'data': {
                        f"{self.__NAME__} Size": self.qsize(),
                        f"{self.confirmed.__NAME__} Size":
                        self.confirmed.qsize(),
                    },
                    'proxy': proxy,
                })

    async def move_to_pool(self, proxy):
        await self.remove(proxy)
        await self.pool.put(proxy)

        if settings.logging.log_proxy_queue:
            self.log.debug(
                f'Moving Proxy from {self.__NAME__} to {self.pool.__NAME__}',
                extra={
                    'data': {
                        f"{self.__NAME__} Size": self.qsize(),
                        f"{self.pool.__NAME__} Size": self.pool.qsize(),
                    },
                    'proxy': proxy,
                })

    # async def recycle(self):
    #     """
    #     Removes proxies from the hold that are no longer required to be in
    #     hold.  If the proxy has been confirmed to have a successful request, the
    #     proxy is put in good, otherwise, the proxy is put back in the pool.
    #     """
    #     async with self.lock:
    #         for proxy in self._queue:
    #             if proxy.hold():
    #                 continue
    #             # Should we use the historical confirmed value or just the last request
    #             # confirmed value?
    #             if proxy.confirmed:
    #                 await self.confirmed.put(proxy)
    #             else:
    #                 await self.pool.put(proxy, evaluate=False)

    async def put(self, proxy):
        """
        [x] TODO:
        --------
        Depending on treatment of Confirmed Queue when proxies exist in queue
        already, this might be able to moved to base class.

        [x] Note:
        ---------
        Proxies in Hold Queue are not used by multiple threads simultaneously,
        so when one thread determines that the proxy should be put in the
        Hold Queue, it should not already be in there.
        """
        async with self.lock:
            if await self.contains(proxy):
                raise ProxyPoolError(f'Cannot Add Proxy to {self.__NAME__}')
            await super(HoldQueue, self).put(proxy)
from dacite import from_dict

import tortoise
from tortoise import fields
from tortoise.models import Model

from instattack import settings
from instattack.lib import logger

from instattack.core.exceptions import ProxyMaxTimeoutError

from .evaluation import evaluate
from .mixins import ProxyMetrics, allow_exception_input


log = logger.get(__name__, subname='Proxy')


@dataclass
class ProxyRequest:

    date: datetime = field(init=False)
    error: str = None
    status_code: int = None

    def __post_init__(self):
        self.date = datetime.now()

    @property
    def confirmed(self):
        return self.error is None