Exemple #1
0
 def __init__(self, usage):
     if usage not in SCORE_MAPS.keys():
         # client_logger.warning('task value is invalid, https task will be used')
         usage = 'https'
     self.score_queue = SCORE_MAPS.get(usage)
     self.ttl_queue = TTL_MAPS.get(usage)
     self.speed_queue = SPEED_MAPS.get(usage)
Exemple #2
0
 def __init__(self, usage):
     if usage not in SCORE_MAPS.keys():
         # client_logger.warning('task value is invalid, https task will be used')
         usage = 'https'
     self.score_queue = SCORE_MAPS.get(usage)
     self.ttl_queue = TTL_MAPS.get(usage)
     self.speed_queue = SPEED_MAPS.get(usage)
Exemple #3
0
 def __init__(self, usage, strategy='robin', length=10,
              fast_response=5, redis_args=None):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     :param length: if total available proxies are less than length,
     you must refresh pool
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, if will be needed to
     decide whether a proxy ip should continue to be used
     :param redis_args: redis connetion args, it's a dict, the keys include host, port, db and password
     """
     self.score_queue = SCORE_MAPS.get(usage)
     self.ttl_queue = TTL_MAPS.get(usage)
     self.speed_queue = SPEED_MAPS.get(usage)
     self.strategy = strategy
     # pool is a queue, which is FIFO
     self.pool = list()
     self.length = length
     self.fast_response = fast_response
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     if isinstance(redis_args, dict):
         self.conn = get_redis_conn(**redis_args)
     else:
         self.conn = get_redis_conn()
Exemple #4
0
 def __init__(self, task):
     if task not in SCORE_MAPS.keys():
         print('task value is invalid, https task will be used')
         task = 'https'
     self.score_queue = SCORE_MAPS.get(task)
     self.ttl_queue = TTL_MAPS.get(task)
     self.speed_queue = SPEED_MAPS.get(task)
     self.template_path = SQUID_TEMPLATE_PATH
     self.conf_path = SQUID_CONF_PATH
     if not SQUID_BIN_PATH:
         try:
             r = subprocess.check_output('which squid', shell=True)
             self.squid_path = r.decode().strip()
         except subprocess.CalledProcessError:
             print(
                 'no squid is installed on this machine, or the installed dir '
                 'is not contained in environment path')
     else:
         self.squid_path = SQUID_BIN_PATH
Exemple #5
0
 def set_item_queue(self, url, proxy, score, incr, speed=0):
     items = list()
     tasks = self.https_tasks if 'https' in url else self.http_tasks
     for task in tasks:
         score_item = ProxyScoreItem(url=proxy, score=score, incr=incr)
         ttl_item = ProxyVerifiedTimeItem(url=proxy, verified_time=int(time.time()), incr=incr)
         speed_item = ProxySpeedItem(url=proxy, response_time=speed, incr=incr)
         score_item['queue'] = SCORE_MAPS.get(task)
         ttl_item['queue'] = TTL_MAPS.get(task)
         speed_item['queue'] = SPEED_MAPS.get(task)
         items.append(score_item)
         items.append(ttl_item)
         items.append(speed_item)
     return items
Exemple #6
0
 def set_item_queue(self, url, proxy, score, incr, speed=0):
     items = list()
     tasks = self.https_tasks if 'https' in url else self.http_tasks
     for task in tasks:
         score_item = ProxyScoreItem(url=proxy, score=score, incr=incr)
         ttl_item = ProxyVerifiedTimeItem(url=proxy, verified_time=int(time.time()), incr=incr)
         speed_item = ProxySpeedItem(url=proxy, response_time=speed, incr=incr)
         score_item['queue'] = SCORE_MAPS.get(task)
         ttl_item['queue'] = TTL_MAPS.get(task)
         speed_item['queue'] = SPEED_MAPS.get(task)
         items.append(score_item)
         items.append(ttl_item)
         items.append(speed_item)
     return items
Exemple #7
0
 def __init__(self, usage, strategy='robin', length=10, fast_response=5):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     :param length: if total available proxies are less than length,
     you must refresh pool
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, if will be needed to
     decide whether a proxy ip should continue to be used
     """
     self.score_queue = SCORE_MAPS.get(usage)
     self.ttl_queue = TTL_MAPS.get(usage)
     self.speed_queue = SPEED_MAPS.get(usage)
     self.strategy = strategy
     # pool is a queue, which is FIFO
     self.pool = list()
     self.length = length
     self.fast_response = fast_response
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     self.conn = get_redis_conn()