Exemple #1
0
 def __init__(self):
     # task队列
     self._idowntskqueue = queue.Queue()
     # cmd队列
     self._cmdqueue = queue.Queue()
     # iscantask队列
     self._iscanqueue = queue.Queue()
     # iscouttask队列
     self._iscoutqueue = queue.Queue()
     # autotask队列
     self._autoqueue = queue.Queue()
     # 新增dnsreq队列
     self._dnsqueue = queue.Queue()
     # --------------------------------------这里可能会增加iscan和iscout两个队列
     self._logger: MsLogger = MsLogManager.get_logger("TaskManager")
     # spidermanager
     self._task_allot = SpiderManagerAllot()
     # cmdmanager
     self._cmd_allot = CmdManager()
     # taskparser
     self._taskparser: TaskFileParser = TaskFileParser()
     # ---------------------------------------------------新增iscantask
     self._iscan_allot = ScanManager()
     # ---------------------------------------------------新增iscouttask
     self._iscout_allot = ScoutManager()
     # ---------------------------------------------------新增autotask
     self._autotask_allot = AutoTaskManager()
     # -----------------------------------------------新增dns
     self._dnsreq = DnsReq()
Exemple #2
0
    def __init__(
            self,
            description: str,
            platform: str,
            datamatcher: DataMatcher = None,
            maxsegcount: int = 1000,
            enc: str = "utf-8",
    ):
        if not isinstance(description, str) or description == "":
            raise Exception("Invalid description for outputer")
        if not isinstance(platform, str) or platform == "":
            raise Exception("Invalid platform for outputer")

        self._description: str = description  # 当前输出器唯一描述信息
        self._platform: str = platform  # 当前输出器所属平台
        self._maxsegcount: int = 1000  # 单次发送数据的最大数据段数量
        if isinstance(maxsegcount, int) and maxsegcount > 0:
            self._maxsegcount = maxsegcount

        self._datamacher: DataMatcher = DataMatcher()
        if isinstance(datamatcher, DataMatcher):
            self._datamacher = datamatcher

        if not isinstance(
                enc, str) or enc == "" or not charsets.contains_charset(enc):
            raise Exception("Invalid charset param 'enc' for OutputerBase")
        self._enc: str = enc

        self._logger: MsLogger = MsLogManager.get_logger("Output_{}".format(
            self._platform))
Exemple #3
0
 def __init__(self):
     self._logger: MsLogger = MsLogManager.get_logger("IdownClient")
     # 初始化sqlite, 最开始就初始化,因为需要存入数据和读取数据
     DbManager()
     self._inputmanagement = InputManagement(inputconfig, self.on_data_in)
     # proxy代理池管理器
     self._loaded_proxy_spiders = []
     # self._init_proxy()
     #  --------------------------------task相关
     self._taskmanger = TaskManager()
     self._taskdownload = TaskDownload()
     self._cookie_keeper = TaskCookieAlive()
     self._clientcollect = CollectClientInfo()
     # 新增iscantask下载启动
     self._iscandownload = ScanDownloadManager()
     # 新增iscouttask下载启动
     self._iscoutdownload = ScoutDownloadManager()
     # 新增autotask下载启动
     self._automateddownload = AutoTaskDownloadManager()
     # 新增删除程序产生无用文件 by judy 2020/08/20
     self._dppf = DPPF()
     # 初始化输出器
     OutputManagement.static_initial(outputconfig, stdconfig)
     # 任务启动开关
     self.__switch = eval(basic_client_config.clientbusiness)
     self.all_business = EClientBusiness.ALL.value
Exemple #4
0
    def __init__(self,
                 uniquename: str,
                 fields: dict,
                 extendfields: dict = None):
        if not isinstance(uniquename, str) or uniquename == "":
            raise Exception("Specified converter unique name is invalid.")
        if not isinstance(fields, dict) or len(fields) < 1:
            raise Exception(
                "Specified converter fields is not a dict or no field specified: %s"
                % uniquename)

        for field in fields.values():
            if not isinstance(field, ConverterField):
                raise Exception(
                    "Specified converter filed is not a ConverterField")

        self._uniquename: str = uniquename
        self._fields: dict = fields

        self._extendfields: dict = {}
        if isinstance(extendfields, dict) and len(extendfields) > 0:
            for ext in extendfields.items():
                if not isinstance(ext[0], str) or ext[0] == "":
                    continue
                self._extendfields[ext[0]] = ext[1]

        self._logger: MsLogger = MsLogManager.get_logger(self._uniquename)
Exemple #5
0
    def __init__(self, task: IscanTask):
        self.task = task
        self.suffix = "iscan_search"
        self.tmppath = clienttaskconfig.tmppath
        self.outpath = clienttaskconfig.outputpath
        # self._ha = HttpAccess()
        # 插件名字
        self._name = type(self).__name__

        self._logger: MsLogger = MsLogManager.get_logger(
            f"{self._name}_{self.task.taskid}"
        )

        self._sqlfunc = DbManager

        # 公用信息
        # 限制搜索条数
        self.count = 0
        self.count_limit = int(self.task.cmd.stratagyscan.search.count)
        # 查询的条数不能为0
        if self.count_limit == 0:
            raise Exception("Task count cant be None")

        # 新增暂停下载功能, 停止标志, 默认不停止, 1表示继续下载不停False, 0表示停止True
        self._stop_sign = False
        # 程序执行中
        self._running = True
        # 日志log后缀,create by judy 2020/08/12
        self._log_suffix = "prg_log"
        self.output_count = 0
Exemple #6
0
    def __init__(self, task):
        self.task = task
        self._logger: MsLogger = MsLogManager.get_logger("webalyzer")

        self._cmsver_lower = {}
        for k, v in cmsver.items():
            self._cmsver_lower[k.lower()] = v
Exemple #7
0
class ZmapParser:
    """zmap parser"""

    _logger: MsLogger = MsLogManager.get_logger("ZmapParser")

    def __init__(self):
        self._name = type(self).__name__

    @classmethod
    def parse_open_ports(cls,
                         task: IscanTask,
                         level: int,
                         outfi: str,
                         port: str,
                         transprotocol: str = 'tcp') -> iter:
        """yield Port"""
        try:
            if not os.path.isfile(outfi):
                return

            with open(outfi, mode='r', encoding='utf-8') as fs:
                while True:
                    line = fs.readline().strip()
                    if not isinstance(line, str) or line == '':
                        break

                    ip = line.strip()
                    p: PortInfo = PortInfo(task, level, ip, int(port),
                                           transprotocol)
                    yield p

        except Exception:
            cls._logger.error("Parse zmap scan result error: {}".format(
                traceback.format_exc()))
Exemple #8
0
    def __init__(self):
        self._logger: MsLogger = MsLogManager.get_logger("Nmap")

        self._scanner_openports: NmapScannerOpenPorts = NmapScannerOpenPorts()
        self._scanner_alivehosts: NmapScannerAliveHosts = NmapScannerAliveHosts(
        )
        self.tmpdir = clienttaskconfig.tmppath
Exemple #9
0
 def __init__(self):
     self._spideradapter = SpiderAdapter()
     self._sqlfunc = DbManager
     self._logger: MsLogger = MsLogManager.get_logger('SpiderManagent')
     self._spider_dealing_dict: dict = {}
     self._spider_threads_locker = threading.Lock()
     self._clientid = basic_client_config.clientid
Exemple #10
0
    def __init__(self):
        self._sqlfunc = DbManager
        self._logger: MsLogger = MsLogManager.get_logger('AutoPlugin')
        self.tmppath = clienttaskconfig.tmppath
        self.outpath = clienttaskconfig.outputpath

        # 文件锁
        self.file_locker = threading.RLock()
Exemple #11
0
    def __init__(self):
        ProxySpiderbase.__init__(self, False)

        self._loggr: MsLogger = MsLogManager.get_logger(
            self.__class__.__name__)

        # 慢点,interval=1表示1秒/次请求
        self._ha: HttpAccess = HttpAccess(interval=1)
Exemple #12
0
    def __init__(
        self,
        uniquename: str,
        datamatcher: DataMatcher,
        maxwaitcount: int = 1,
        maxwaittime: float = 3,
        relation_inputer_src: list = None,
    ):
        DealerBase.__init__(self, datamatcher)

        if not isinstance(uniquename, str) or uniquename == "":
            raise Exception("Task dispatcher uniquename cannot be None")

        self._strategymngr = StrategyManager()

        self._uniquename = uniquename

        self._logger: MsLogger = MsLogManager.get_logger("dispatcher_%s" %
                                                         uniquename)

        # 各字段
        self._relation_inputer_src = relation_inputer_src

        self._maxwaitcount: int = 1
        if isinstance(maxwaitcount, int) and maxwaitcount > 0:
            self._maxwaitcount = maxwaitcount

        self._maxwaittime: float = 3
        if type(maxwaittime) in [int, float] and maxwaittime > 0:
            self._maxwaittime = maxwaittime

        # 增时睡眠时间
        self._increace_sleep_sec: int = 1

        # 各执行器
        self._stdconvert = StandardConvertManagement(stdconvertconfig)

        # 处理队列,线程
        self._task_queue = queue.Queue()
        self._dispatch_queue: dict = {}
        self._dispatch_queue_locker = threading.RLock()
        # 用于存放data->tasks映射关系,方便根据任务处理结果处理文件
        self._data_map: list = []
        self._data_map_locker = threading.RLock()

        self._t_task_allocate = threading.Thread(target=self._task_allocate,
                                                 name="taskallocate_{}".format(
                                                     self._uniquename),
                                                 daemon=True)
        self._t_task_ok_judge = threading.Thread(target=self._task_ok_judge,
                                                 name="taskokjudge_{}".format(
                                                     self._uniquename),
                                                 daemon=True)
        self._timeok: bool = False
        self._timeok_locker = threading.Lock()
        self._t_timer = threading.Thread(target=self._timer_thread,
                                         name="tasktimer",
                                         daemon=True)
Exemple #13
0
    def __init__(self, servicetype: str) -> None:
        if not isinstance(servicetype, str) or servicetype == "":
            raise Exception(
                "Invalid service type for initialing Logic Banner Grabber.")

        self._servicetype: str = servicetype
        self._name: str = f"LD_{self._servicetype}"

        self._logger: MsLogger = MsLogManager.get_logger(self._name)
Exemple #14
0
    def static_init(
            cls,
            proxy_fetch_thread_count: int = 1,
            max_pool_item_count: int = 100,
            verify_thread_count: int = 5,
            recheck_interval_sec: float = 180,
            dbconfig: ProxyDbConfig = None,
            logger_hook=None,
            proxyspiders: list = None,
    ):
        """
        provide uniform proxy API\n
        proxy_fetch_thread_count: 设置每一个代理抓取插件的抓取线程数\n
        max_pool_item_count: 设置本地代理池最大代理IP数量,超过此数量将暂停抓取\n
        verify_thread_count: 代理IP有效性验证的线程数\n
        recheck_interval_sec: 库中已有代理IP重新验证时间间隔,秒\n
        dbconfig: 本地存储配置\n
        logger_hook: 传入一个函数,用于打印日志,函数参数为:\n
            log_func(self, msg:str, level: MsLogLevel)\n
        proxyspiders: 代理IP爬虫实例列表\n
                
        使用流程:
        1. 创建你的ProxySpider,继承于ProxySpiderbase,并实现。\n
        manager = ProxyManager()\n
        spider = 你的ProxSpider()\n
        manager.append_custom_proxy_spider(spider)\n
        manager.append/set_proxy_fetch_settings(params)\n
        manager.start_fetch_proxy()\n
        ProxyItem = manager.get_one_proxy(params)\n
        proxies = manager.get_proxies(count, params)\n
        manager.stop_fetch_proxy()\n
        manager.dispose()\n
        """

        if cls.__inst_initialed:
            return
        with cls.__inst_locker:
            if cls.__inst_initialed:
                return

            cls._logger = MsLogManager.get_logger(ProxyMngr.__name__)

            if not callable(logger_hook):
                logger_hook = cls._loghook
            cls.__inst: ProxyManager = ProxyManager(
                proxy_fetch_thread_count=proxy_fetch_thread_count,
                max_pool_item_count=max_pool_item_count,
                verify_thread_count=verify_thread_count,
                recheck_interval_sec=recheck_interval_sec,
                dbconfig=dbconfig,
                logger_hook=logger_hook,
                proxyspiders=proxyspiders,
            )

            cls._pool = cls.__inst.pool

            cls.__inst_initialed = True
Exemple #15
0
 def __init__(self, loggername: str = None):
     # 插件名字
     self._name = type(self).__name__
     if isinstance(loggername, str) and loggername != "":
         self._name = loggername
     self._logger: MsLogger = MsLogManager.get_logger(f"{self._name}")
     # 验证码的有效时间,一般为3分钟
     self._effective_time = 60
     self._sqlfunc = DbManager
Exemple #16
0
    def __init__(self):
        ProxySpiderbase.__init__(self, False)

        self._logger: MsLogger = MsLogManager.get_logger(
            self.__class__.__name__)

        self._reproxy = re.compile(r"([\d.]+?):(\d+)", re.S)

        # 用于验证HTTP代理的,http访问器;interval两个HTTP请求之间的间隔时间(秒)
        self._ha: HttpAccess = HttpAccess(interval=1)
    def __init__(self, cfg: ConvertConfig):
        if not isinstance(cfg, ConvertConfig) or cfg is None:
            raise Exception("Invalid standard converter config.")

        for cvtr in cfg._converters.values():
            if not issubclass(cvtr.__class__, ConverterBase):
                raise Exception("Specified convert is invalid.")

        self.converterconfig = cfg
        self._logger: MsLogger = MsLogManager.get_logger("stdconvertmanager")
Exemple #18
0
    def __init__(self, cfg: InputConfig, ondatain: callable):
        if not isinstance(cfg, InputConfig) or cfg is None:
            raise Exception("Specified inputer config is invalid")

        self._config = cfg
        self._ondatain: callable = ondatain

        # 对外输出队列,避免数据监视器线程阻塞
        self._datain_queue: queue.Queue = queue.Queue()
        self._logger: MsLogger = MsLogManager.get_logger("inputmanager")
Exemple #19
0
    def __init__(self):
        self._logger: MsLogger = MsLogManager.get_logger("idownserver")
        OutputManagement.static_initial(outputconfig, stdconfig)
        self._inputmanagement = InputManagement(inputconfig, self.on_data_in)
        self._servicemanager = ServiceManager()

        # 隔段时间就生成一次dicapp(dicapp后面可能会是动态的,因为采集端可能掉线,崩掉等)
        self._t_dicapp = threading.Thread(target=self._generate_dicapp,
                                          name='gen_dicapp',
                                          daemon=True)
Exemple #20
0
    def __init__(self):
        ProxySpiderbase.__init__(self, False)

        self._logger: MsLogger = MsLogManager.get_logger(self.__class__.__name__)

        self._reproxy = re.compile(r'"([\d.]+?):(\d+)"', re.S)

        # 用于验证HTTP代理的,http访问器;interval两个HTTP请求之间的间隔时间(秒)
        self._ha: HttpAccess = HttpAccess(interval=1)
        # 付费齐云代理的key
        self.key = 'dd0b192e8199af0b47faf005aac4483b1efff860'
Exemple #21
0
    def __init__(self):
        self._logger: MsLogger = MsLogManager.get_logger("ServiceManager")

        # 所有处理器集合
        # 采集端状态管理器
        # 新任务分配器
        # 任务回馈数据处理器
        # 结果数据处理器
        ServiceManager._dealers = dealerconfig._dealers

        # 任务发送器(这个处理器不接收任何文件,是处理本地数据库消息的处理器,所以单独出来)
        self._delivermanager = TaskDeliverManager()
Exemple #22
0
    def __init__(self, name: str, weight: float = 1, isforced: bool = True):
        if not isinstance(name, str) or name == "":
            raise Exception("Strategy param name is invalid.")
        if not type(weight) in [int, float] or weight < 0:
            raise Exception("Strategy param weight is invalid: %s" % name)
        if not isinstance(isforced, bool):
            raise Exception("Strategy param isforced is invalid: %s" % name)

        self._name = name
        self._weight = weight
        self._isforced = isforced

        self._logger: MsLogger = MsLogManager.get_logger("strategy_%s" % name)
Exemple #23
0
    def __init__(self):
        self._cookie_queue = Queue()
        self._sqlfunc = DbManager
        self._cookie_keeper = SpiderCookieKeep()
        self._logger: MsLogger = MsLogManager.get_logger("CookieKeepAlive")
        # 默认配置
        _defaultcmd: str = self._sqlfunc.get_default_idown_cmd().get("cmd")
        self.d_cmd = IdownCmd(_defaultcmd)

        # 正在处理的任务队列
        self._dealing_queue: dict = {}
        # 正在处理新任务队列,如果有新任务是不会执行循环下载任务的

        self._dealing_queue_locker = threading.Lock()
Exemple #24
0
    def __init__(self, token: str):
        assert isinstance(token, str)
        self._token: str = token
        self._header = self._header % self._token

        self._is_logined: bool = False
        self._login_locker = threading.RLock()

        self._logger: MsLogger = MsLogManager.get_logger("GitAPIv4")
        self._ha: HttpAccess = HttpAccess()

        self._user_name: str = None
        self._user_login: str = None
        self._user_id: str = None
 def __init__(self):
     # 正在处理的任务队列
     self._spider_manage_queue_dict: dict = {}
     self._spider_manage_dealing_queue_locker = threading.Lock()
     self._logger: MsLogger = MsLogManager.get_logger("SpiderManagerAllot")
     self._batch_login_test = SpiderBatchLoginTest()
     self._download_task_store = SpiderDownloadTaskStore()
     self._login_only = SpiderLoginOnly()
     self._logout = SpiderLogout()
     self._online_check = SpiderOnlineCheck()
     self._register_check = SpiderRegisterCheck()
     self._store_vercode = SpiderStoreInput()
     # 默认配置
     _defcmdstr: str = DbManager.get_default_idown_cmd().get("cmd")
     self.defcmd: IdownCmd = IdownCmd(_defcmdstr)
Exemple #26
0
    def __init__(self,
                 task: Task,
                 appcfg: AppCfg,
                 clientid: str,
                 logger_name_ext: str = ""):
        if not isinstance(task, Task):
            raise Exception("Task is invalid.")
        if not isinstance(appcfg, AppCfg):
            raise Exception("AppConfig is invalid.")
        if not isinstance(clientid, str) or clientid == "":
            raise Exception("Invalid clientid")

        self.task = task
        self._clientid: str = clientid
        self._appcfg = appcfg

        # logger和插件名
        self._name = type(self).__name__
        loggername = f"{self._name}_{self.task.batchid}"
        if not logger_name_ext is None and not logger_name_ext == "":
            loggername += "_{}".format(logger_name_ext)
        self._logger: MsLogger = MsLogManager.get_logger(loggername)

        # Http库对象
        self._ha: HttpAccess = HttpAccess()

        # 一些通用字段,用于存放当前插件登陆的账号的一些到处都要用的信息
        self._userid: str = None  # 网站对用户的唯一识别标识
        self._account: str = self.task.account  # 可以用于登陆的账号名
        self._username: str = None  # 用户昵称
        self._globaltelcode: str = self.task.globaltelcode  # 国际区号
        self._phone: str = self.task.phone  # 电话
        self._url: str = self.task.url
        self._host: str = self.task.host
        self._cookie: str = self.task.cookie

        # 一些状态对象
        self._errorcount: int = 0
        self.is_running: bool = False
        self.running_task = []
        # 验证码有效时间定为900秒, 15分钟足够了,一般验证码的有效时间最高也就10分钟
        self._effective_time = 900
        # self._outputtgfile = OutputManage()
        self._sqlfunc = DbManager
        # 线程运行
        self._running = True
        # 停止标志,默认不停止, 1表示继续下载不停False,0表示停止True
        self._stop_sign = False
Exemple #27
0
    def __init__(self, explicitfilters: ExplicitFilters = None):
        # 其他
        self._logger: MsLogger = MsLogManager.get_logger(
            self.__class__.__name__)
        # 初始化策略器

        self._polling_index: int = 0
        self._polling_index_locker = threading.RLock()

        # 这里是直接手动排序好的...后面直接for循环调用即可...
        self.all_stgs: list = StrategyBuisinessBase.__stgconfig._strategies
        self.forced_stgs: list = [s for s in self.all_stgs if s._isforced]
        self.unforced_stgs: list = [
            s for s in self.all_stgs if not s._isforced
        ]

        self._explicit_filters: ExplicitFilters = explicitfilters
    def __init__(self, toolmark: str):

        if not isinstance(toolmark, str) or toolmark == "":
            raise Exception("Zgrab2 scanner toolmark is invalid")

        self._logger: MsLogger = MsLogManager.get_logger(type(self).__name__)

        self._toolmark: str = toolmark

        self._tmpdir: str = os.path.abspath(tmpdir)
        if not isinstance(self._tmpdir, str):
            self._tmpdir = os.path.abspath("./_clienttmpdir")
        self._tmpdir = os.path.abspath(
            os.path.join(self._tmpdir, self._toolmark))
        if os.path.isdir(self._tmpdir):
            helper_dir.remove_dirs(self._tmpdir)

        os.makedirs(self._tmpdir)
Exemple #29
0
    def __init__(self):

        self._strategies: list = []

        StrategyBuisinessBase.static_init(stgconfig)
        self._stgidowntask: StrategyIDownTask = StrategyIDownTask()
        self._strategies.append(self._stgidowntask)

        self._stgiscantask: StrategyIScanTask = StrategyIScanTask()
        self._strategies.append(self._stgiscantask)

        self._stgiscouttask: StrategyIScoutTask = StrategyIScoutTask()
        self._strategies.append(self._stgiscouttask)

        self._stgautotask: StrategyAutoTask = StrategyAutoTask()
        self._strategies.append(self._stgautotask)

        # 其他
        self._logger: MsLogger = MsLogManager.get_logger("strategymanager")
Exemple #30
0
    def __init__(self, task: IscoutTask):
        self.task = task
        self.tmppath = clienttaskconfig.tmppath
        self.outpath = clienttaskconfig.outputpath
        self._ha = HttpAccess()
        # 插件名字
        self._name = type(self).__name__
        self._logger: MsLogger = MsLogManager.get_logger(
            f"{self._name}_{self.task.taskid}")
        self._sqlfunc = DbManager

        # 最大的输出条数
        self.max_output = 10000
        # 新增reason字段,需要对应打击武器的
        self.dtools = dtools
        # 新增数据统计,modify by judy 2020/08/10
        self.output_count = 0
        # 日志log后缀,create by judy 2020/08/12
        self._log_suffix = 'prg_log'