Example #1
0
##################################
# 爬虫管理(包含)#
##################################
from common.log.log_util import LogUtil as log
import sys
import os
import traceback
import importlib.util
from common.manager import *

logger = log.getLogger(__name__)


class PluginManager(Manager):
    def __init__(self):
        super(PluginManager, self).__init__()
        pass

    def _load_module(self):
        spd_home = os.path.dirname(os.path.abspath(__file__))
        results = []
        for root, plugin_dirs, files in os.walk(spd_home):
            for name in files:
                results = self._add_newest_module(results,
                                                  os.path.join(root, name),
                                                  r'plg.*\.py?$')
        for result in results:
            path = os.path.dirname(result)
            if path not in sys.path:
                sys.path.append(path)
            filename, ext = os.path.splitext(result)
Example #2
0
# __*__coding:utf-8__*__

#######################
# 检索url主机信息模块
#######################
from common.log.log_util import LogUtil as log
from common.plugin import Plugin
from functools import partial
from pluginbase import PluginBase
from common.net.webUtil import Request
from common.utils.printdata import *

logging = log.getLogger(__name__)


class CifyPlugin(Plugin):
    def __init__(self):
        super(CifyPlugin, self).__init__()
        self._id = 10004
        self.plugin_dict = {}
        self.load_plugins()
        self.http_client = Request()

    def load_plugins(self):
        try:
            here = os.path.abspath(os.path.dirname(__file__))
            get_path = partial(os.path.join, here)
            plugin_dir = get_path('cms')

            plugin_base = PluginBase(package='waf_plugins',
                                     searchpath=[plugin_dir])
Example #3
0
from xml.dom.minidom import parse
from common.log.log_util import LogUtil
import sys
from common.net.webUtil import Request
from common.net.url import WrappedUrl
from common.net.proxy.rulermanager import RulerManager
import random
import os

logger = LogUtil.getLogger('debug')


class IPProxy(object):
    def __init__(self):
        self.config_path = os.path.dirname(__file__) + '/config_ruler.xml'
        self.rulerManager = RulerManager()
        self.ip_list = []
        self.web = Request()

    def generate(self):  # 生成ip代理ip池
        ruler_id, url = self.config_read()
        wurl = WrappedUrl(url, allow_cache=True)
        self.ip_list = self.get_ip_list(wurl, ruler_id)
        proxy_ip = self.get_random_ip()
        return proxy_ip

    def get_ip_list(self, wurl, ruler_id):
        resp = self.web.request(wurl)
        web_text = resp.content
        self.rulerManager.load()
        ruler_list, ruler_hash = self.rulerManager.get_modules()