Esempio n. 1
0
    def _get_all_ip_proxy(self, _k=high_proxy_list_key_name) -> list:
        '''
        得到所有ip proxy
        :param _k: 原始值
        :return:
        '''
        _ = deserializate_pickle_object(
            self.redis_cli.get(name=get_uuid3(_k)) or dumps([]))

        return _
Esempio n. 2
0
from fzutils.data.pickle_utils import (
    deserializate_pickle_object,
    serialize_obj_item_2_dict,
)
from fzutils.safe_utils import get_uuid3
from fzutils.sql_utils import BaseRedisCli
from fzutils.data.list_utils import list_remove_repeat_dict
from fzutils.linux_utils import _get_simulate_logger
from fzutils.ip_utils import get_local_external_network_ip

lg = set_logger(log_file_name=SPIDER_LOG_PATH +
                str(get_shanghai_time())[0:10] + '.log',
                console_log_level=INFO,
                file_log_level=ERROR)
redis_cli = BaseRedisCli()
_key = get_uuid3(proxy_list_key_name)  # 存储proxy_list的key
_h_key = get_uuid3(high_proxy_list_key_name)
# 本地外网ip值
local_ip = ''
# ip pool obj
ip_pools_obj = IpPoolsObj(_k=high_proxy_list_key_name)


def get_proxy_process_data():
    '''
    抓取代理并更新redis中的值
    :return:
    '''
    def _create_tasks_list(**kwargs):
        urls = kwargs.get('urls')
        page_range = kwargs.get('page_range', {})
Esempio n. 3
0
# coding:utf-8
'''
@author = super_fazai
@File    : utils.py
@connect : [email protected]
'''

from fzutils.sql_utils import BaseRedisCli
from fzutils.safe_utils import get_uuid3
from fzutils.data.pickle_utils import deserializate_pickle_object
from pprint import pprint
from pickle import dumps

print(get_uuid3('proxy_tasks'))
# _ = BaseRedisCli()
# pprint(deserializate_pickle_object(_.get('5e421d78-a394-3b44-aae1-fd86aa127255') or dumps([])))
Esempio n. 4
0
 def __init__(self):
     self.redis_cli = BaseRedisCli()
     # self._k = get_uuid3('proxy_tasks')
     self._k = get_uuid3(high_proxy_list_key_name)
Esempio n. 5
0
from fzutils.time_utils import get_shanghai_time
from fzutils.internet_utils import (
    get_random_pc_ua,
    get_random_phone_ua,
)
from fzutils.safe_utils import get_uuid3
from fzutils.data.pickle_utils import deserializate_pickle_object
from fzutils.celery_utils import init_celery_app
from fzutils.sql_utils import BaseRedisCli
from fzutils.common_utils import json_2_dict

app = init_celery_app()
lg = get_task_logger(
    'proxy_tasks')  # 当前task的logger对象, tasks内部保持使用原生celery log对象
_key = get_uuid3('proxy_tasks')  # 存储proxy_list的key
redis_cli = BaseRedisCli()


@app.task  # task修饰的方法无法修改类属性
def _get_proxy(random_parser_list_item_index, proxy_url) -> list:
    '''
    spiders: 获取代理高匿名ip
    :return:
    '''
    def parse_body(body):
        '''解析url body'''
        _ = []
        parser_obj = parser_list[random_parser_list_item_index]
        try:
            part_selector = parser_obj.get('part', '')
Esempio n. 6
0
 def __init__(self):
     self.redis_cli = BaseRedisCli()
     self._k = get_uuid3('proxy_tasks')
Esempio n. 7
0
# coding:utf-8
'''
@author = super_fazai
@File    : utils.py
@connect : [email protected]
'''

from fzutils.sql_utils import BaseRedisCli
from fzutils.safe_utils import get_uuid3
from fzutils.data.pickle_utils import deserializate_pickle_object
from fzutils.linux_utils import kill_process_by_name
from fzutils.time_utils import get_shanghai_time
from fzutils.common_utils import get_random_int_number
from fzutils.common_utils import retry
from pprint import pprint
from pickle import dumps
from time import sleep
from random import choice
from settings import high_proxy_list_key_name

print(get_uuid3('proxy_tasks'))
print(get_uuid3(high_proxy_list_key_name))
_ = BaseRedisCli()
# pprint(deserializate_pickle_object(_.get('5e421d78-a394-3b44-aae1-fd86aa127255') or dumps([])))

# 清除celery workers
kill_process_by_name(process_name='celery')