コード例 #1
0
ファイル: py_cli.py プロジェクト: Alien-Leon/phone_analysis
 def __init__(self, usage, strategy='robin', length=10,
              fast_response=5, redis_args=None):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     :param length: if total available proxies are less than length,
     you must refresh pool
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, if will be needed to
     decide whether a proxy ip should continue to be used
     :param redis_args: redis connetion args, it's a dict, the keys include host, port, db and password
     """
     self.score_queue = SCORE_MAPS.get(usage)
     self.ttl_queue = TTL_MAPS.get(usage)
     self.speed_queue = SPEED_MAPS.get(usage)
     self.strategy = strategy
     # pool is a queue, which is FIFO
     self.pool = list()
     self.length = length
     self.fast_response = fast_response
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     if isinstance(redis_args, dict):
         self.conn = get_redis_conn(**redis_args)
     else:
         self.conn = get_redis_conn()
コード例 #2
0
ファイル: py_cli.py プロジェクト: zhiyue/haipproxy
 def __init__(self,
              usage,
              strategy='robin',
              fast_response=5,
              redis_args=None):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     you must refresh pool
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, if will be needed to
     decide whether a proxy ip should continue to be used
     :param redis_args: redis connetion args, it's a dict, the keys
     include host, port, db and password
     """
     # if there are multi parent classes, super is only used for the first parent according to MRO
     super().__init__(usage)
     self.strategy = strategy
     # pool is a queue, which is FIFO
     self.pool = list()
     self.fast_response = fast_response
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     if isinstance(redis_args, dict):
         self.conn = get_redis_conn(**redis_args)
     else:
         self.conn = get_redis_conn()
     t = threading.Thread(target=self._refresh_periodically)
     t.setDaemon(True)
     t.start()
コード例 #3
0
ファイル: py_cli.py プロジェクト: yb123speed/haipproxy
 def __init__(self, usage, strategy='robin', fast_response=5, redis_args=None):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     you must refresh pool
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, it will be needed to
     decide whether a proxy ip should continue to be used
     :param redis_args: redis connetion args, it's a dict, whose keys
     include host, port, db and password
     """
     # if there are multi parent classes, super is only used for the first parent according to MRO
     super().__init__(usage)
     self.strategy = strategy
     # pool is a queue, which is FIFO
     self.pool = list()
     self.fast_response = fast_response
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     if isinstance(redis_args, dict):
         self.conn = get_redis_conn(**redis_args)
     else:
         self.conn = get_redis_conn()
     t = threading.Thread(target=self._refresh_periodically)
     t.setDaemon(True)
     t.start()
コード例 #4
0
ファイル: py_cli.py プロジェクト: suoyiguke/py_popularize
 def __init__(self,
              usage,
              strategy='robin',
              fast_response=5,
              score_map=SCORE_MAPS,
              ttl_map=TTL_MAPS,
              speed_map=SPEED_MAPS,
              longest_response_time=LONGEST_RESPONSE_TIME,
              lowest_score=LOWEST_SCORE,
              ttl_validated_resource=TTL_VALIDATED_RESOURCE,
              min_pool_size=LOWEST_TOTAL_PROXIES,
              all_data=DATA_ALL,
              redis_args=None):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, it will be needed to
     decide whether a proxy ip should continue to be used
     :param score_map: score map of your project, default value is SCORE_MAPS in haipproxy.config.settings
     :param ttl_map: ttl map of your project, default value is TTL_MAPS in haipproxy.config.settings
     :param speed_map: speed map of your project, default value is SPEED_MAPS in haipproxy.config.settings
     :param ttl_validated_resource: time of latest validated proxies
     :param min_pool_size: min pool size of self.pool
     :param all_data: all proxies are stored in this set
     :param redis_args: redis connetion args, it's a dict, whose keys include host, port, db and password
     """
     # if there are multi parent classes, super is only used for the first parent according to MRO
     if usage not in score_map.keys():
         # client_logger.warning('task value is invalid, https task will be used')
         usage = 'https'
     score_queue = score_map.get(usage)
     ttl_queue = ttl_map.get(usage)
     speed_queue = speed_map.get(usage)
     super().__init__(score_queue, ttl_queue, speed_queue,
                      longest_response_time, lowest_score,
                      ttl_validated_resource, min_pool_size)
     self.strategy = strategy
     # pool is a FIFO queue
     self.pool = list()
     self.min_pool_size = min_pool_size
     self.fast_response = fast_response
     self.all_data = all_data
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     if isinstance(redis_args, dict):
         self.conn = get_redis_conn(**redis_args)
     else:
         self.conn = get_redis_conn()
     t = threading.Thread(target=self._refresh_periodically)
     t.setDaemon(True)
     t.start()
コード例 #5
0
ファイル: scheduler.py プロジェクト: polokobe/haipproxy
    def schedule_task_with_lock(self, task):
        """Crawler scheduler filters tasks according to task type"""
        if not task.get('enable'):
            return None
        task_queue = task.get('task_queue')
        if task_queue not in self.task_queues:
            return None

        conn = get_redis_conn()
        task_name = task.get('name')
        internal = task.get('internal')
        urls = task.get('resource')
        lock_indentifier = acquire_lock(conn, task_name)
        if not lock_indentifier:
            return False

        pipe = conn.pipeline(True)
        try:
            now = int(time.time())
            pipe.hget(TIMER_RECORDER, task_name)
            r = pipe.execute()[0]
            if not r or (now - int(r.decode('utf-8'))) >= internal * 60:
                pipe.lpush(task_queue, *urls)
                pipe.hset(TIMER_RECORDER, task_name, now)
                pipe.execute()
                # scheduler_logger.info('crawler task {} has been stored into redis successfully'.format(task_name))
                return True
            else:
                return None
        finally:
            release_lock(conn, task_name, lock_indentifier)
コード例 #6
0
ファイル: scheduler.py プロジェクト: yb123speed/haipproxy
    def schedule_task_with_lock(self, task):
        """Crawler scheduler filters tasks according to task type"""
        if not task.get('enable'):
            return None
        task_queue = task.get('task_queue')
        if task_queue not in self.task_queues:
            return None

        conn = get_redis_conn()
        task_name = task.get('name')
        interval = task.get('interval')
        urls = task.get('resource')
        lock_indentifier = acquire_lock(conn, task_name)
        if not lock_indentifier:
            return False

        pipe = conn.pipeline(True)
        try:
            now = int(time.time())
            pipe.hget(TIMER_RECORDER, task_name)
            r = pipe.execute()[0]
            if not r or (now - int(r.decode('utf-8'))) >= interval * 60:
                pipe.lpush(task_queue, *urls)
                pipe.hset(TIMER_RECORDER, task_name, now)
                pipe.execute()
                # scheduler_logger.info('crawler task {} has been stored into redis successfully'.format(task_name))
                return True
            else:
                return None
        finally:
            release_lock(conn, task_name, lock_indentifier)
コード例 #7
0
ファイル: sales_save_data.py プロジェクト: zhanrendong/jkzx1
def save_old_sales_data():
    r = utils.get_redis_conn(domain)
    sales_result = r.get('data:sales')
    sales = JSONDecoder().decode(bytes.decode(sales_result))
    sub_branch_result = r.get('data:sub_branch')
    sub_branch = JSONDecoder().decode(bytes.decode(sub_branch_result))

    headers = utils.login(domain, login_body)
    sub_map = {}
    branch_map = {}
    for branch in sub_branch:
        sub_name = branch['subsidiary']
        if sub_name not in sub_map:
            sub_result = utils.call_request(domain, 'reference-data-service',
                                            'refSubsidiaryCreate',
                                            {'subsidiaryName': sub_name},
                                            headers)['result']
            sub_map[sub_name] = sub_result['subsidiaryId']
        branch_name = branch['branch']
        branch_result = utils.call_request(
            domain, 'reference-data-service', 'refBranchCreate', {
                'subsidiaryId': sub_map[sub_name],
                'branchName': branch_name
            }, headers)['result']
        branch_map[branch_name] = branch_result['branchId']
    for sale in sales:
        sale_name = sale['salesName']
        branch_id = branch_map[sale['branch']]
        utils.call_request(domain, 'reference-data-service', 'refSalesCreate',
                           {
                               'branchId': branch_id,
                               'salesName': sale_name
                           }, headers)
コード例 #8
0
ファイル: sales_get_data.py プロジェクト: zhanrendong/jkzx1
def get_old_sales_data():
    headers = utils.login(domain, login_body)
    sales = utils.call_request(domain, 'reference-data-service',
                               'refSalesList', {}, headers)['result']
    sub_branch = utils.call_request(domain, 'reference-data-service',
                                    'refSubsidiaryBranchList', {},
                                    headers)['result']
    print(sub_branch)
    print(sales)

    r = utils.get_redis_conn(domain)
    sales_result = JSONEncoder().encode(sales)
    sub_branch_result = JSONEncoder().encode(sub_branch)
    r.set('data:sales', str(sales_result))
    r.set('data:sub_branch', str(sub_branch_result))
コード例 #9
0
ファイル: py_cli.py プロジェクト: yssource/haipproxy
 def __init__(self, usage, strategy='robin', length=10, fast_response=5):
     """
     :param usage: one of SCORE_MAPS's keys, such as https
     :param length: if total available proxies are less than length,
     you must refresh pool
     :param strategy: the load balance of proxy ip, the value is
     one of ['robin', 'greedy']
     :param fast_response: if you use greedy strategy, if will be needed to
     decide whether a proxy ip should continue to be used
     """
     self.score_queue = SCORE_MAPS.get(usage)
     self.ttl_queue = TTL_MAPS.get(usage)
     self.speed_queue = SPEED_MAPS.get(usage)
     self.strategy = strategy
     # pool is a queue, which is FIFO
     self.pool = list()
     self.length = length
     self.fast_response = fast_response
     self.handlers = [RobinStrategy(), GreedyStrategy()]
     self.conn = get_redis_conn()
コード例 #10
0
ファイル: squid.py プロジェクト: jedicode/WeiboProject
    def update_conf(self):
        conn = get_redis_conn()
        start_time = int(time.time()) - TTL_VALIDATED_RESOURCE * 60
        pipe = conn.pipeline(False)
        pipe.zrevrangebyscore(self.score_queue, '+inf', LOWEST_SCORE)
        pipe.zrevrangebyscore(self.ttl_queue, '+inf', start_time)
        pipe.zrangebyscore(self.speed_queue, 0, 1000 * LONGEST_RESPONSE_TIME)
        scored_proxies, ttl_proxies, speed_proxies = pipe.execute()
        proxies = scored_proxies and ttl_proxies and speed_proxies

        if not proxies:
            proxies = scored_proxies and ttl_proxies

        if not proxies:
            proxies = ttl_proxies

        proxies = decode_all(proxies)
        conts = list()
        with open(self.template_path, 'r') as fr, open(self.conf_path,
                                                       'w') as fw:
            original_conf = fr.read()
            if not proxies:
                fw.write(original_conf)
                client_logger.info('no proxies got at this turn')
            else:
                conts.append(original_conf)
                # if two proxies use the same ip and different ports and no name
                # if assigned,cache_peer error will raise.
                for index, proxy in enumerate(proxies):
                    _, ip_port = proxy.split('://')
                    ip, port = ip_port.split(':')
                    conts.append(
                        self.default_conf_detail.format(ip, port, index))
                conts.extend(self.other_confs)
                conf = '\n'.join(conts)
                fw.write(conf)
        # in docker, execute with shell will fail
        subprocess.call([self.squid_path, '-k', 'reconfigure'], shell=False)
        client_logger.info('update squid conf successfully')
コード例 #11
0
ファイル: squid.py プロジェクト: yb123speed/haipproxy
 def update_conf(self):
     conn = get_redis_conn()
     proxies = self.get_available_proxies(conn)
     conts = list()
     with open(self.template_path, 'r') as fr, open(self.conf_path, 'w') as fw:
         original_conf = fr.read()
         if not proxies:
             fw.write(original_conf)
             # client_logger.info('no proxies got at this turn')
         else:
             conts.append(original_conf)
             # if two proxies use the same ip and different ports and no name
             # is assigned, cache_peer error will raise.
             for index, proxy in enumerate(proxies):
                 _, ip_port = proxy.split('://')
                 ip, port = ip_port.split(':')
                 conts.append(self.default_conf_detail.format(ip, port, index))
             conts.extend(self.other_confs)
             conf = '\n'.join(conts)
             fw.write(conf)
     # in docker, execute with shell will fail
     subprocess.call([self.squid_path, '-k', 'reconfigure'], shell=False)
コード例 #12
0
    def schedule_task_with_lock(self, task):
        """Validator scheduler filters tasks according to task name
        since it's task name stands for task type"""
        if not task.get('enable'):
            return None
        task_queue = task.get('task_queue')
        if task_queue not in self.task_queues:
            return None

        conn = get_redis_conn()
        internal = task.get('internal')
        task_name = task.get('name')
        resource_queue = task.get('resource')
        lock_indentifier = acquire_lock(conn, task_name)
        if not lock_indentifier:
            return False
        pipe = conn.pipeline(True)
        try:
            now = int(time.time())
            pipe.hget(TIMER_RECORDER, task_name)
            pipe.zrevrangebyscore(resource_queue, '+inf', '-inf')
            r, proxies = pipe.execute()
            if not r or (now - int(r.decode('utf-8'))) >= internal * 60:
                if not proxies:
                    print('fetched no proxies from task {}'.format(task_name))
                    return None

                pipe.sadd(task_queue, *proxies)
                pipe.hset(TIMER_RECORDER, task_name, now)
                pipe.execute()
                print(
                    'validator task {} has been stored into redis successfully'
                    .format(task_name))
                return True
            else:
                return None
        finally:
            release_lock(conn, task_name, lock_indentifier)
コード例 #13
0
ファイル: squid.py プロジェクト: zhiyue/haipproxy
 def update_conf(self):
     conn = get_redis_conn()
     proxies = self.get_available_proxies(conn)
     conts = list()
     with open(self.template_path, 'r') as fr, open(self.conf_path, 'w') as fw:
         original_conf = fr.read()
         if not proxies:
             fw.write(original_conf)
             # client_logger.info('no proxies got at this turn')
         else:
             conts.append(original_conf)
             # if two proxies use the same ip and different ports and no name
             # if assigned,cache_peer error will raise.
             for index, proxy in enumerate(proxies):
                 _, ip_port = proxy.split('://')
                 ip, port = ip_port.split(':')
                 conts.append(self.default_conf_detail.format(ip, port, index))
             conts.extend(self.other_confs)
             conf = '\n'.join(conts)
             fw.write(conf)
     # in docker, execute with shell will fail
     subprocess.call([self.squid_path, '-k', 'reconfigure'], shell=False)
     # client_logger.info('update squid conf successfully')
コード例 #14
0
ファイル: scheduler.py プロジェクト: yb123speed/haipproxy
    def schedule_task_with_lock(self, task):
        """Validator scheduler filters tasks according to task name
        since its task name stands for task type"""
        if not task.get('enable'):
            return None
        task_queue = task.get('task_queue')
        if task_queue not in self.task_queues:
            return None

        conn = get_redis_conn()
        interval = task.get('interval')
        task_name = task.get('name')
        resource_queue = task.get('resource')
        lock_indentifier = acquire_lock(conn, task_name)
        if not lock_indentifier:
            return False
        pipe = conn.pipeline(True)
        try:
            now = int(time.time())
            pipe.hget(TIMER_RECORDER, task_name)
            pipe.zrevrangebyscore(resource_queue, '+inf', '-inf')
            r, proxies = pipe.execute()
            if not r or (now - int(r.decode('utf-8'))) >= interval * 60:
                if not proxies:
                    # scheduler_logger.warning('fetched no proxies from task {}'.format(task_name))
                    print('fetched no proxies from task {}'.format(task_name))
                    return None

                pipe.sadd(task_queue, *proxies)
                pipe.hset(TIMER_RECORDER, task_name, now)
                pipe.execute()
                # scheduler_logger.info('validator task {} has been stored into redis successfully'.format(task_name))
                return True
            else:
                return None
        finally:
            release_lock(conn, task_name, lock_indentifier)
コード例 #15
0
 def open_spider(self, spider):
     self.redis_con = get_redis_conn(db=META_DATA_DB)
コード例 #16
0
ファイル: controller.py プロジェクト: suoyiguke/py_popularize
def init_db():
    redis_client = get_redis_conn(**redis_args)
    return redis_client
コード例 #17
0
ファイル: zhihu_spider.py プロジェクト: yb123speed/haipproxy
def init_db():
    redis_client = get_redis_conn(db=1)
    return redis_client
コード例 #18
0
ファイル: crawler.py プロジェクト: yb123speed/haipproxy
 def __init__(self, retries=5):
     self.retries = retries
     self.fetcher = ProxyFetcher('zhihu', strategy='greedy')
     self.conn = get_redis_conn(db=1)
     self.scheme = 'https'
コード例 #19
0
ファイル: zhihu_spider.py プロジェクト: zqqq/haipproxy
def init_db():
    redis_client = get_redis_conn(db=1)
    return redis_client
コード例 #20
0
ファイル: crawler.py プロジェクト: suoyiguke/py_popularize
 def __init__(self, retries=5):
     self.retries = retries
     self.fetcher = ProxyFetcher('zhihu', **self.client_configs)
     self.conn = get_redis_conn(**self.redis_args)
     self.scheme = 'https'
コード例 #21
0
 def __init__(self, retries=5):
     self.retries = retries
     self.fetcher = ProxyFetcher('zhihu', strategy='greedy')
     self.conn = get_redis_conn(db=1)
     self.scheme = 'https'
コード例 #22
0
ファイル: redis_spiders.py プロジェクト: yb123speed/haipproxy
    def setup_redis(self, crawler):
        """send signals when the spider is free"""
        self.redis_batch_size = SPIDER_FEED_SIZE
        self.redis_con = get_redis_conn()

        crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
コード例 #23
0
    def setup_redis(self, crawler):
        """send signals when the spider is free"""
        self.redis_batch_size = SPIDER_FEED_SIZE
        self.redis_con = get_redis_conn()

        crawler.signals.connect(self.spider_idle, signal=signals.spider_idle)
コード例 #24
0
                for i in redis_conn.lrange(word + '_url_list', 0, -1)
            ]
            tfidf_list = [
                float(i.decode())
                for i in redis_conn.lrange(word + '_tfidf_list', 0, -1)
            ]
            for url in url_list:
                if url in result_dict:
                    result_dict[url] += (1 + tfidf_list[url_list.index(url)])
                else:
                    result_dict[url] = tfidf_list[url_list.index(url)]
    sorted_url_list =[[url_tuple[0], redis_conn.hget('url2title', url_tuple[0]).decode()]\
                    for url_tuple in sorted(result_dict.items(), key=lambda d:d[1], reverse=True)[:20]]

    for url in sorted_url_list:
        cur.execute("""select content from url_hash where url_hash=%s""",
                    (mymd5(url[0]), ))
        url.append(find_keyword(cur.fetchone()[0], keyword_list))

    return jsonify(
        dict(lengths=len(sorted_url_list),
             urls=sorted_url_list,
             keyword=keyword,
             keyword_list=keyword_list)), 200


if __name__ == '__main__':
    redis_conn = get_redis_conn(sys.argv[1])
    mysql_conn = get_MySQL_conn(sys.argv[1])
    all_word_set = redis_conn.smembers('all_word_list')
    app.run(debug=True)
コード例 #25
0
# encoding=utf-8
import sys
sys.path.append("/root/audit")
from utils import get_redis_conn
from config import *

################################################
#
# 用于手动添加订单数据,数据从nginx+lua中的日志获取
# 日志文件:'/var/log/nginx/charging_resource.log'
#
################################################

redis_con = get_redis_conn()

data = 'POST++instance++5b26203117ff41cbae690248106b96cd++{"server": {"security_groups": [{"name": "default"}], "OS-DCF:diskConfig": "MANUAL", "id": "d6bd688f-f77c-4a06-adb8-5ea8728723cf", "links": [{"href": "http://nova/v2.1/daa89f2bdee1431abd2794fd38598da9/servers/d6bd688f-f77c-4a06-adb8-5ea8728723cf", "rel": "self"}, {"href": "http://nova/daa89f2bdee1431abd2794fd38598da9/servers/d6bd688f-f77c-4a06-adb8-5ea8728723cf", "rel": "bookmark"}], "adminPass": "******"}}'


def add_data2redis(data):
    redis_con.sadd(charging_data, data)


if __name__ == '__main__':
    add_data2redis(data=data)

コード例 #26
0
 def __init__(self, proxy_mode=1, retries=5):
     self.proxy_mode = proxy_mode
     self.retries = retries
     self.fetcher = ProxyFetcher('zhihu', strategy='greedy', length=5)
     self.conn = get_redis_conn(db=1)
コード例 #27
0
ファイル: pipelines.py プロジェクト: yb123speed/haipproxy
 def open_spider(self, spider):
     self.redis_con = get_redis_conn(db=META_DATA_DB)
コード例 #28
0
 def __init__(self):
     self.redis_con = get_redis_conn(**redis_args)