def _get_all_ip_proxy(self, _k=high_proxy_list_key_name) -> list: ''' 得到所有ip proxy :param _k: 原始值 :return: ''' _ = deserializate_pickle_object( self.redis_cli.get(name=get_uuid3(_k)) or dumps([])) return _
from fzutils.data.pickle_utils import ( deserializate_pickle_object, serialize_obj_item_2_dict, ) from fzutils.safe_utils import get_uuid3 from fzutils.sql_utils import BaseRedisCli from fzutils.data.list_utils import list_remove_repeat_dict from fzutils.linux_utils import _get_simulate_logger from fzutils.ip_utils import get_local_external_network_ip lg = set_logger(log_file_name=SPIDER_LOG_PATH + str(get_shanghai_time())[0:10] + '.log', console_log_level=INFO, file_log_level=ERROR) redis_cli = BaseRedisCli() _key = get_uuid3(proxy_list_key_name) # 存储proxy_list的key _h_key = get_uuid3(high_proxy_list_key_name) # 本地外网ip值 local_ip = '' # ip pool obj ip_pools_obj = IpPoolsObj(_k=high_proxy_list_key_name) def get_proxy_process_data(): ''' 抓取代理并更新redis中的值 :return: ''' def _create_tasks_list(**kwargs): urls = kwargs.get('urls') page_range = kwargs.get('page_range', {})
# coding:utf-8 ''' @author = super_fazai @File : utils.py @connect : [email protected] ''' from fzutils.sql_utils import BaseRedisCli from fzutils.safe_utils import get_uuid3 from fzutils.data.pickle_utils import deserializate_pickle_object from pprint import pprint from pickle import dumps print(get_uuid3('proxy_tasks')) # _ = BaseRedisCli() # pprint(deserializate_pickle_object(_.get('5e421d78-a394-3b44-aae1-fd86aa127255') or dumps([])))
def __init__(self): self.redis_cli = BaseRedisCli() # self._k = get_uuid3('proxy_tasks') self._k = get_uuid3(high_proxy_list_key_name)
from fzutils.time_utils import get_shanghai_time from fzutils.internet_utils import ( get_random_pc_ua, get_random_phone_ua, ) from fzutils.safe_utils import get_uuid3 from fzutils.data.pickle_utils import deserializate_pickle_object from fzutils.celery_utils import init_celery_app from fzutils.sql_utils import BaseRedisCli from fzutils.common_utils import json_2_dict app = init_celery_app() lg = get_task_logger( 'proxy_tasks') # 当前task的logger对象, tasks内部保持使用原生celery log对象 _key = get_uuid3('proxy_tasks') # 存储proxy_list的key redis_cli = BaseRedisCli() @app.task # task修饰的方法无法修改类属性 def _get_proxy(random_parser_list_item_index, proxy_url) -> list: ''' spiders: 获取代理高匿名ip :return: ''' def parse_body(body): '''解析url body''' _ = [] parser_obj = parser_list[random_parser_list_item_index] try: part_selector = parser_obj.get('part', '')
def __init__(self): self.redis_cli = BaseRedisCli() self._k = get_uuid3('proxy_tasks')
# coding:utf-8 ''' @author = super_fazai @File : utils.py @connect : [email protected] ''' from fzutils.sql_utils import BaseRedisCli from fzutils.safe_utils import get_uuid3 from fzutils.data.pickle_utils import deserializate_pickle_object from fzutils.linux_utils import kill_process_by_name from fzutils.time_utils import get_shanghai_time from fzutils.common_utils import get_random_int_number from fzutils.common_utils import retry from pprint import pprint from pickle import dumps from time import sleep from random import choice from settings import high_proxy_list_key_name print(get_uuid3('proxy_tasks')) print(get_uuid3(high_proxy_list_key_name)) _ = BaseRedisCli() # pprint(deserializate_pickle_object(_.get('5e421d78-a394-3b44-aae1-fd86aa127255') or dumps([]))) # 清除celery workers kill_process_by_name(process_name='celery')