################################## # 爬虫管理(包含)# ################################## from common.log.log_util import LogUtil as log import sys import os import traceback import importlib.util from common.manager import * logger = log.getLogger(__name__) class PluginManager(Manager): def __init__(self): super(PluginManager, self).__init__() pass def _load_module(self): spd_home = os.path.dirname(os.path.abspath(__file__)) results = [] for root, plugin_dirs, files in os.walk(spd_home): for name in files: results = self._add_newest_module(results, os.path.join(root, name), r'plg.*\.py?$') for result in results: path = os.path.dirname(result) if path not in sys.path: sys.path.append(path) filename, ext = os.path.splitext(result)
# __*__coding:utf-8__*__ ####################### # 检索url主机信息模块 ####################### from common.log.log_util import LogUtil as log from common.plugin import Plugin from functools import partial from pluginbase import PluginBase from common.net.webUtil import Request from common.utils.printdata import * logging = log.getLogger(__name__) class CifyPlugin(Plugin): def __init__(self): super(CifyPlugin, self).__init__() self._id = 10004 self.plugin_dict = {} self.load_plugins() self.http_client = Request() def load_plugins(self): try: here = os.path.abspath(os.path.dirname(__file__)) get_path = partial(os.path.join, here) plugin_dir = get_path('cms') plugin_base = PluginBase(package='waf_plugins', searchpath=[plugin_dir])
from xml.dom.minidom import parse from common.log.log_util import LogUtil import sys from common.net.webUtil import Request from common.net.url import WrappedUrl from common.net.proxy.rulermanager import RulerManager import random import os logger = LogUtil.getLogger('debug') class IPProxy(object): def __init__(self): self.config_path = os.path.dirname(__file__) + '/config_ruler.xml' self.rulerManager = RulerManager() self.ip_list = [] self.web = Request() def generate(self): # 生成ip代理ip池 ruler_id, url = self.config_read() wurl = WrappedUrl(url, allow_cache=True) self.ip_list = self.get_ip_list(wurl, ruler_id) proxy_ip = self.get_random_ip() return proxy_ip def get_ip_list(self, wurl, ruler_id): resp = self.web.request(wurl) web_text = resp.content self.rulerManager.load() ruler_list, ruler_hash = self.rulerManager.get_modules()