Beispiel #1
0
    def get_non_seed_details(self, queue_id):
        if queue_id is None:
            return []
        query = """
            SELECT * FROM details 
            WHERE queue_id = %(queue_id)s
            AND active=%(active)s
            AND last_used < %(last_used_cutoff)s
            ORDER BY last_used ASC
            LIMIT %(limit)s;
            """

        active_params = {
            'queue_id': queue_id,
            'active': True,
            'last_used_cutoff': LAST_USED_CUTOFF,
            'limit': ACTIVE_LIMIT
        }

        inactive_params = {
            'queue_id': queue_id,
            'active': False,
            'last_used_cutoff': LAST_USED_CUTOFF,
            'limit': INACTIVE_LIMIT
        }

        active = [Detail(**d) for d in self.do_query(query, active_params)]

        inactive = [Detail(**d) for d in self.do_query(query, inactive_params)]

        return active + inactive
Beispiel #2
0
    def get_seed_details(self):
        self.init_seed_details()

        params = {'seed_queue_id': SEED_QUEUE_ID}
        query = """
            SELECT * FROM details 
            WHERE queue_id=%(queue_id)s
            AND active=%(active)s
            AND last_used < %(last_used_cutoff)s
            ORDER BY last_used ASC
            LIMIT %(limit)s;
            """
        a_params = {
            "queue_id": SEED_QUEUE_ID,
            "active": True,
            "limit": INITIAL_SEED_COUNT
        }
        ia_params = {
            "queue_id": SEED_QUEUE_ID,
            "active": False,
            "limit": INITIAL_SEED_COUNT
        }
        a_params['last_used_cutoff'] = LAST_USED_CUTOFF
        ia_params['last_used_cutoff'] = LAST_USED_CUTOFF
        active = [Detail(**d) for d in self.do_query(query, a_params)]
        inactive = [Detail(**d) for d in self.do_query(query, ia_params)]

        return active + inactive
Beispiel #3
0
    def create_new_details(self, queue, count=ACTIVE_LIMIT + INACTIVE_LIMIT):

        fetched_pids_key = "%s%s" % (NEW_QUEUE_PROXY_IDS_PREFIX, queue.domain)
        fetched_pids = list(self.redis_mgr.redis.smembers(fetched_pids_key))
        proxy_ids = self.db_mgr.get_unused_proxy_ids(queue, count,
                                                     fetched_pids)
        for proxy_id in proxy_ids:
            self.redis_mgr.redis.sadd(fetched_pids_key, proxy_id)
            proxy_key = 'p_%s' % proxy_id
            if not self.redis_mgr.redis.exists(proxy_key):
                raise Exception(
                    "Error while trying to create a new detail: proxy key does not exist in redis cache for proxy id %s"
                    % proxy_id)

            if self.redis_mgr.redis.exists('d_%s_%s' %
                                           (queue.queue_key, proxy_key)):

                continue
            detail_kwargs = {
                'proxy_id': proxy_id,
                'proxy_key': proxy_key,
                'queue_id': queue.id(),
                'queue_key': queue.queue_key
            }
            new_detail = Detail(**detail_kwargs)
            self.redis_mgr.register_detail(new_detail, bypass_db_check=True)
Beispiel #4
0
    def dequeue(self):
        if self.is_empty():
            raise RedisDetailQueueEmpty(
                "No proxies available for queue key %s" % self.queue.queue_key)

        detail = Detail(**self.redis.hgetall(self.redis.lpop(self.redis_key)))
        return detail
Beispiel #5
0
 def get_detail_by_queue_and_proxy(self, queue_id, proxy_id):
     query = "SELECT * FROM details WHERE proxy_id=%(proxy_id)s AND queue_id=%(queue_id)s"
     params = {'queue_id': queue_id, 'proxy_id': proxy_id}
     cursor = self.cursor()
     cursor.execute(query, params)
     detail_data = cursor.fetchone()
     if detail_data is None:
         cursor.close()
         return None
     detail = Detail(**detail_data)
     cursor.close()
     return detail
Beispiel #6
0
    def new_proxy(self, address, port, protocol='http'):
        existing = self.redis_mgr.get_proxy_by_address_and_port(address, port)
        if existing is None:

            new_proxy = self.redis_mgr.register_proxy(
                Proxy(address, port, protocol))

            new_detail = Detail(proxy_key=new_proxy.proxy_key,
                                queue_id=SEED_QUEUE_ID)
            try:
                self.redis_mgr.register_detail(new_detail)
                self.redis_mgr.redis.sadd(NEW_DETAILS_SET_KEY,
                                          new_detail.detail_key)
            except DetailExistsException:
                pass

        else:
            logger.warn(
                "proxy with address %s and port %s already exists in the cache/db."
                % (address, port))
Beispiel #7
0
    def sync_to_db(self):
        logging.info("STARTING SYNC")
        new_queues = [
            Queue(**self.redis_mgr.redis.hgetall(q))
            for q in self.redis_mgr.redis.keys("qt_*")
        ]
        new_proxies = [
            Proxy(**self.redis_mgr.redis.hgetall(p))
            for p in self.redis_mgr.redis.keys("pt_*")
        ]
        new_detail_keys = set(
            self.redis_mgr.redis.keys('d_qt*') +
            self.redis_mgr.redis.keys('d_*pt*'))
        for ndk in new_detail_keys:
            self.redis_mgr.redis.sadd(NEW_DETAILS_SET_KEY, ndk)

        new_details = [
            Detail(**self.redis_mgr.redis.hgetall(d))
            for d in list(new_detail_keys)
        ]

        cursor = self.db_mgr.cursor()

        queue_keys_to_id = {}
        proxy_keys_to_id = {}
        for q in new_queues:
            self.db_mgr.insert_queue(q, cursor)
            queue_id = cursor.fetchone()[0]
            queue_keys_to_id[q.queue_key] = queue_id

        for p in new_proxies:
            try:
                self.db_mgr.insert_proxy(p, cursor)
                proxy_id = cursor.fetchone()[0]
                proxy_keys_to_id[p.proxy_key] = proxy_id
            except psycopg2.errors.UniqueViolation as e:

                # existing_proxy = self.db_mgr.get_proxy_by_address_and_port(p.address,p.port)
                proxy_keys_to_id[p.proxy_key] = None

        for d in new_details:
            if d.proxy_id is None:
                new_proxy_id = proxy_keys_to_id[d.proxy_key]
                if new_proxy_id is None:

                    continue
                else:
                    d.proxy_id = new_proxy_id
            if d.queue_id is None:
                d.queue_id = queue_keys_to_id[d.queue_key]
            self.db_mgr.insert_detail(d, cursor)

        changed_detail_keys = self.redis_mgr.redis.sdiff(
            'changed_details', 'new_details')
        changed_details = [
            Detail(**self.redis_mgr.redis.hgetall(d)) for d in
            self.redis_mgr.redis.sdiff('changed_details', 'new_details')
        ]

        for changed in changed_details:
            if (changed.queue_id is None or changed.proxy_id is None):
                raise Exception(
                    "Unable to get a queue_id or proxy_id for an existing detail"
                )

            self.db_mgr.update_detail(changed)

        cursor.close()
        self.redis_mgr.redis.flushall()
        logging.info("SYNC COMPLETE")
        return True
Beispiel #8
0
 def get_all_queue_details(self, queue_key):
     key_match = 'd_%s*' % queue_key
     keys = self.redis.keys(key_match)
     details = [Detail(**self.redis.hgetall(key)) for key in keys]
     return details
Beispiel #9
0
 def get_detail(self, redis_detail_key):
     return Detail(**self.redis.hgetall(redis_detail_key))