def __init__(self): # task队列 self._idowntskqueue = queue.Queue() # cmd队列 self._cmdqueue = queue.Queue() # iscantask队列 self._iscanqueue = queue.Queue() # iscouttask队列 self._iscoutqueue = queue.Queue() # autotask队列 self._autoqueue = queue.Queue() # 新增dnsreq队列 self._dnsqueue = queue.Queue() # --------------------------------------这里可能会增加iscan和iscout两个队列 self._logger: MsLogger = MsLogManager.get_logger("TaskManager") # spidermanager self._task_allot = SpiderManagerAllot() # cmdmanager self._cmd_allot = CmdManager() # taskparser self._taskparser: TaskFileParser = TaskFileParser() # ---------------------------------------------------新增iscantask self._iscan_allot = ScanManager() # ---------------------------------------------------新增iscouttask self._iscout_allot = ScoutManager() # ---------------------------------------------------新增autotask self._autotask_allot = AutoTaskManager() # -----------------------------------------------新增dns self._dnsreq = DnsReq()
def __init__( self, description: str, platform: str, datamatcher: DataMatcher = None, maxsegcount: int = 1000, enc: str = "utf-8", ): if not isinstance(description, str) or description == "": raise Exception("Invalid description for outputer") if not isinstance(platform, str) or platform == "": raise Exception("Invalid platform for outputer") self._description: str = description # 当前输出器唯一描述信息 self._platform: str = platform # 当前输出器所属平台 self._maxsegcount: int = 1000 # 单次发送数据的最大数据段数量 if isinstance(maxsegcount, int) and maxsegcount > 0: self._maxsegcount = maxsegcount self._datamacher: DataMatcher = DataMatcher() if isinstance(datamatcher, DataMatcher): self._datamacher = datamatcher if not isinstance( enc, str) or enc == "" or not charsets.contains_charset(enc): raise Exception("Invalid charset param 'enc' for OutputerBase") self._enc: str = enc self._logger: MsLogger = MsLogManager.get_logger("Output_{}".format( self._platform))
def __init__(self): self._logger: MsLogger = MsLogManager.get_logger("IdownClient") # 初始化sqlite, 最开始就初始化,因为需要存入数据和读取数据 DbManager() self._inputmanagement = InputManagement(inputconfig, self.on_data_in) # proxy代理池管理器 self._loaded_proxy_spiders = [] # self._init_proxy() # --------------------------------task相关 self._taskmanger = TaskManager() self._taskdownload = TaskDownload() self._cookie_keeper = TaskCookieAlive() self._clientcollect = CollectClientInfo() # 新增iscantask下载启动 self._iscandownload = ScanDownloadManager() # 新增iscouttask下载启动 self._iscoutdownload = ScoutDownloadManager() # 新增autotask下载启动 self._automateddownload = AutoTaskDownloadManager() # 新增删除程序产生无用文件 by judy 2020/08/20 self._dppf = DPPF() # 初始化输出器 OutputManagement.static_initial(outputconfig, stdconfig) # 任务启动开关 self.__switch = eval(basic_client_config.clientbusiness) self.all_business = EClientBusiness.ALL.value
def __init__(self, uniquename: str, fields: dict, extendfields: dict = None): if not isinstance(uniquename, str) or uniquename == "": raise Exception("Specified converter unique name is invalid.") if not isinstance(fields, dict) or len(fields) < 1: raise Exception( "Specified converter fields is not a dict or no field specified: %s" % uniquename) for field in fields.values(): if not isinstance(field, ConverterField): raise Exception( "Specified converter filed is not a ConverterField") self._uniquename: str = uniquename self._fields: dict = fields self._extendfields: dict = {} if isinstance(extendfields, dict) and len(extendfields) > 0: for ext in extendfields.items(): if not isinstance(ext[0], str) or ext[0] == "": continue self._extendfields[ext[0]] = ext[1] self._logger: MsLogger = MsLogManager.get_logger(self._uniquename)
def __init__(self, task: IscanTask): self.task = task self.suffix = "iscan_search" self.tmppath = clienttaskconfig.tmppath self.outpath = clienttaskconfig.outputpath # self._ha = HttpAccess() # 插件名字 self._name = type(self).__name__ self._logger: MsLogger = MsLogManager.get_logger( f"{self._name}_{self.task.taskid}" ) self._sqlfunc = DbManager # 公用信息 # 限制搜索条数 self.count = 0 self.count_limit = int(self.task.cmd.stratagyscan.search.count) # 查询的条数不能为0 if self.count_limit == 0: raise Exception("Task count cant be None") # 新增暂停下载功能, 停止标志, 默认不停止, 1表示继续下载不停False, 0表示停止True self._stop_sign = False # 程序执行中 self._running = True # 日志log后缀,create by judy 2020/08/12 self._log_suffix = "prg_log" self.output_count = 0
def __init__(self, task): self.task = task self._logger: MsLogger = MsLogManager.get_logger("webalyzer") self._cmsver_lower = {} for k, v in cmsver.items(): self._cmsver_lower[k.lower()] = v
class ZmapParser: """zmap parser""" _logger: MsLogger = MsLogManager.get_logger("ZmapParser") def __init__(self): self._name = type(self).__name__ @classmethod def parse_open_ports(cls, task: IscanTask, level: int, outfi: str, port: str, transprotocol: str = 'tcp') -> iter: """yield Port""" try: if not os.path.isfile(outfi): return with open(outfi, mode='r', encoding='utf-8') as fs: while True: line = fs.readline().strip() if not isinstance(line, str) or line == '': break ip = line.strip() p: PortInfo = PortInfo(task, level, ip, int(port), transprotocol) yield p except Exception: cls._logger.error("Parse zmap scan result error: {}".format( traceback.format_exc()))
def __init__(self): self._logger: MsLogger = MsLogManager.get_logger("Nmap") self._scanner_openports: NmapScannerOpenPorts = NmapScannerOpenPorts() self._scanner_alivehosts: NmapScannerAliveHosts = NmapScannerAliveHosts( ) self.tmpdir = clienttaskconfig.tmppath
def __init__(self): self._spideradapter = SpiderAdapter() self._sqlfunc = DbManager self._logger: MsLogger = MsLogManager.get_logger('SpiderManagent') self._spider_dealing_dict: dict = {} self._spider_threads_locker = threading.Lock() self._clientid = basic_client_config.clientid
def __init__(self): self._sqlfunc = DbManager self._logger: MsLogger = MsLogManager.get_logger('AutoPlugin') self.tmppath = clienttaskconfig.tmppath self.outpath = clienttaskconfig.outputpath # 文件锁 self.file_locker = threading.RLock()
def __init__(self): ProxySpiderbase.__init__(self, False) self._loggr: MsLogger = MsLogManager.get_logger( self.__class__.__name__) # 慢点,interval=1表示1秒/次请求 self._ha: HttpAccess = HttpAccess(interval=1)
def __init__( self, uniquename: str, datamatcher: DataMatcher, maxwaitcount: int = 1, maxwaittime: float = 3, relation_inputer_src: list = None, ): DealerBase.__init__(self, datamatcher) if not isinstance(uniquename, str) or uniquename == "": raise Exception("Task dispatcher uniquename cannot be None") self._strategymngr = StrategyManager() self._uniquename = uniquename self._logger: MsLogger = MsLogManager.get_logger("dispatcher_%s" % uniquename) # 各字段 self._relation_inputer_src = relation_inputer_src self._maxwaitcount: int = 1 if isinstance(maxwaitcount, int) and maxwaitcount > 0: self._maxwaitcount = maxwaitcount self._maxwaittime: float = 3 if type(maxwaittime) in [int, float] and maxwaittime > 0: self._maxwaittime = maxwaittime # 增时睡眠时间 self._increace_sleep_sec: int = 1 # 各执行器 self._stdconvert = StandardConvertManagement(stdconvertconfig) # 处理队列,线程 self._task_queue = queue.Queue() self._dispatch_queue: dict = {} self._dispatch_queue_locker = threading.RLock() # 用于存放data->tasks映射关系,方便根据任务处理结果处理文件 self._data_map: list = [] self._data_map_locker = threading.RLock() self._t_task_allocate = threading.Thread(target=self._task_allocate, name="taskallocate_{}".format( self._uniquename), daemon=True) self._t_task_ok_judge = threading.Thread(target=self._task_ok_judge, name="taskokjudge_{}".format( self._uniquename), daemon=True) self._timeok: bool = False self._timeok_locker = threading.Lock() self._t_timer = threading.Thread(target=self._timer_thread, name="tasktimer", daemon=True)
def __init__(self, servicetype: str) -> None: if not isinstance(servicetype, str) or servicetype == "": raise Exception( "Invalid service type for initialing Logic Banner Grabber.") self._servicetype: str = servicetype self._name: str = f"LD_{self._servicetype}" self._logger: MsLogger = MsLogManager.get_logger(self._name)
def static_init( cls, proxy_fetch_thread_count: int = 1, max_pool_item_count: int = 100, verify_thread_count: int = 5, recheck_interval_sec: float = 180, dbconfig: ProxyDbConfig = None, logger_hook=None, proxyspiders: list = None, ): """ provide uniform proxy API\n proxy_fetch_thread_count: 设置每一个代理抓取插件的抓取线程数\n max_pool_item_count: 设置本地代理池最大代理IP数量,超过此数量将暂停抓取\n verify_thread_count: 代理IP有效性验证的线程数\n recheck_interval_sec: 库中已有代理IP重新验证时间间隔,秒\n dbconfig: 本地存储配置\n logger_hook: 传入一个函数,用于打印日志,函数参数为:\n log_func(self, msg:str, level: MsLogLevel)\n proxyspiders: 代理IP爬虫实例列表\n 使用流程: 1. 创建你的ProxySpider,继承于ProxySpiderbase,并实现。\n manager = ProxyManager()\n spider = 你的ProxSpider()\n manager.append_custom_proxy_spider(spider)\n manager.append/set_proxy_fetch_settings(params)\n manager.start_fetch_proxy()\n ProxyItem = manager.get_one_proxy(params)\n proxies = manager.get_proxies(count, params)\n manager.stop_fetch_proxy()\n manager.dispose()\n """ if cls.__inst_initialed: return with cls.__inst_locker: if cls.__inst_initialed: return cls._logger = MsLogManager.get_logger(ProxyMngr.__name__) if not callable(logger_hook): logger_hook = cls._loghook cls.__inst: ProxyManager = ProxyManager( proxy_fetch_thread_count=proxy_fetch_thread_count, max_pool_item_count=max_pool_item_count, verify_thread_count=verify_thread_count, recheck_interval_sec=recheck_interval_sec, dbconfig=dbconfig, logger_hook=logger_hook, proxyspiders=proxyspiders, ) cls._pool = cls.__inst.pool cls.__inst_initialed = True
def __init__(self, loggername: str = None): # 插件名字 self._name = type(self).__name__ if isinstance(loggername, str) and loggername != "": self._name = loggername self._logger: MsLogger = MsLogManager.get_logger(f"{self._name}") # 验证码的有效时间,一般为3分钟 self._effective_time = 60 self._sqlfunc = DbManager
def __init__(self): ProxySpiderbase.__init__(self, False) self._logger: MsLogger = MsLogManager.get_logger( self.__class__.__name__) self._reproxy = re.compile(r"([\d.]+?):(\d+)", re.S) # 用于验证HTTP代理的,http访问器;interval两个HTTP请求之间的间隔时间(秒) self._ha: HttpAccess = HttpAccess(interval=1)
def __init__(self, cfg: ConvertConfig): if not isinstance(cfg, ConvertConfig) or cfg is None: raise Exception("Invalid standard converter config.") for cvtr in cfg._converters.values(): if not issubclass(cvtr.__class__, ConverterBase): raise Exception("Specified convert is invalid.") self.converterconfig = cfg self._logger: MsLogger = MsLogManager.get_logger("stdconvertmanager")
def __init__(self, cfg: InputConfig, ondatain: callable): if not isinstance(cfg, InputConfig) or cfg is None: raise Exception("Specified inputer config is invalid") self._config = cfg self._ondatain: callable = ondatain # 对外输出队列,避免数据监视器线程阻塞 self._datain_queue: queue.Queue = queue.Queue() self._logger: MsLogger = MsLogManager.get_logger("inputmanager")
def __init__(self): self._logger: MsLogger = MsLogManager.get_logger("idownserver") OutputManagement.static_initial(outputconfig, stdconfig) self._inputmanagement = InputManagement(inputconfig, self.on_data_in) self._servicemanager = ServiceManager() # 隔段时间就生成一次dicapp(dicapp后面可能会是动态的,因为采集端可能掉线,崩掉等) self._t_dicapp = threading.Thread(target=self._generate_dicapp, name='gen_dicapp', daemon=True)
def __init__(self): ProxySpiderbase.__init__(self, False) self._logger: MsLogger = MsLogManager.get_logger(self.__class__.__name__) self._reproxy = re.compile(r'"([\d.]+?):(\d+)"', re.S) # 用于验证HTTP代理的,http访问器;interval两个HTTP请求之间的间隔时间(秒) self._ha: HttpAccess = HttpAccess(interval=1) # 付费齐云代理的key self.key = 'dd0b192e8199af0b47faf005aac4483b1efff860'
def __init__(self): self._logger: MsLogger = MsLogManager.get_logger("ServiceManager") # 所有处理器集合 # 采集端状态管理器 # 新任务分配器 # 任务回馈数据处理器 # 结果数据处理器 ServiceManager._dealers = dealerconfig._dealers # 任务发送器(这个处理器不接收任何文件,是处理本地数据库消息的处理器,所以单独出来) self._delivermanager = TaskDeliverManager()
def __init__(self, name: str, weight: float = 1, isforced: bool = True): if not isinstance(name, str) or name == "": raise Exception("Strategy param name is invalid.") if not type(weight) in [int, float] or weight < 0: raise Exception("Strategy param weight is invalid: %s" % name) if not isinstance(isforced, bool): raise Exception("Strategy param isforced is invalid: %s" % name) self._name = name self._weight = weight self._isforced = isforced self._logger: MsLogger = MsLogManager.get_logger("strategy_%s" % name)
def __init__(self): self._cookie_queue = Queue() self._sqlfunc = DbManager self._cookie_keeper = SpiderCookieKeep() self._logger: MsLogger = MsLogManager.get_logger("CookieKeepAlive") # 默认配置 _defaultcmd: str = self._sqlfunc.get_default_idown_cmd().get("cmd") self.d_cmd = IdownCmd(_defaultcmd) # 正在处理的任务队列 self._dealing_queue: dict = {} # 正在处理新任务队列,如果有新任务是不会执行循环下载任务的 self._dealing_queue_locker = threading.Lock()
def __init__(self, token: str): assert isinstance(token, str) self._token: str = token self._header = self._header % self._token self._is_logined: bool = False self._login_locker = threading.RLock() self._logger: MsLogger = MsLogManager.get_logger("GitAPIv4") self._ha: HttpAccess = HttpAccess() self._user_name: str = None self._user_login: str = None self._user_id: str = None
def __init__(self): # 正在处理的任务队列 self._spider_manage_queue_dict: dict = {} self._spider_manage_dealing_queue_locker = threading.Lock() self._logger: MsLogger = MsLogManager.get_logger("SpiderManagerAllot") self._batch_login_test = SpiderBatchLoginTest() self._download_task_store = SpiderDownloadTaskStore() self._login_only = SpiderLoginOnly() self._logout = SpiderLogout() self._online_check = SpiderOnlineCheck() self._register_check = SpiderRegisterCheck() self._store_vercode = SpiderStoreInput() # 默认配置 _defcmdstr: str = DbManager.get_default_idown_cmd().get("cmd") self.defcmd: IdownCmd = IdownCmd(_defcmdstr)
def __init__(self, task: Task, appcfg: AppCfg, clientid: str, logger_name_ext: str = ""): if not isinstance(task, Task): raise Exception("Task is invalid.") if not isinstance(appcfg, AppCfg): raise Exception("AppConfig is invalid.") if not isinstance(clientid, str) or clientid == "": raise Exception("Invalid clientid") self.task = task self._clientid: str = clientid self._appcfg = appcfg # logger和插件名 self._name = type(self).__name__ loggername = f"{self._name}_{self.task.batchid}" if not logger_name_ext is None and not logger_name_ext == "": loggername += "_{}".format(logger_name_ext) self._logger: MsLogger = MsLogManager.get_logger(loggername) # Http库对象 self._ha: HttpAccess = HttpAccess() # 一些通用字段,用于存放当前插件登陆的账号的一些到处都要用的信息 self._userid: str = None # 网站对用户的唯一识别标识 self._account: str = self.task.account # 可以用于登陆的账号名 self._username: str = None # 用户昵称 self._globaltelcode: str = self.task.globaltelcode # 国际区号 self._phone: str = self.task.phone # 电话 self._url: str = self.task.url self._host: str = self.task.host self._cookie: str = self.task.cookie # 一些状态对象 self._errorcount: int = 0 self.is_running: bool = False self.running_task = [] # 验证码有效时间定为900秒, 15分钟足够了,一般验证码的有效时间最高也就10分钟 self._effective_time = 900 # self._outputtgfile = OutputManage() self._sqlfunc = DbManager # 线程运行 self._running = True # 停止标志,默认不停止, 1表示继续下载不停False,0表示停止True self._stop_sign = False
def __init__(self, explicitfilters: ExplicitFilters = None): # 其他 self._logger: MsLogger = MsLogManager.get_logger( self.__class__.__name__) # 初始化策略器 self._polling_index: int = 0 self._polling_index_locker = threading.RLock() # 这里是直接手动排序好的...后面直接for循环调用即可... self.all_stgs: list = StrategyBuisinessBase.__stgconfig._strategies self.forced_stgs: list = [s for s in self.all_stgs if s._isforced] self.unforced_stgs: list = [ s for s in self.all_stgs if not s._isforced ] self._explicit_filters: ExplicitFilters = explicitfilters
def __init__(self, toolmark: str): if not isinstance(toolmark, str) or toolmark == "": raise Exception("Zgrab2 scanner toolmark is invalid") self._logger: MsLogger = MsLogManager.get_logger(type(self).__name__) self._toolmark: str = toolmark self._tmpdir: str = os.path.abspath(tmpdir) if not isinstance(self._tmpdir, str): self._tmpdir = os.path.abspath("./_clienttmpdir") self._tmpdir = os.path.abspath( os.path.join(self._tmpdir, self._toolmark)) if os.path.isdir(self._tmpdir): helper_dir.remove_dirs(self._tmpdir) os.makedirs(self._tmpdir)
def __init__(self): self._strategies: list = [] StrategyBuisinessBase.static_init(stgconfig) self._stgidowntask: StrategyIDownTask = StrategyIDownTask() self._strategies.append(self._stgidowntask) self._stgiscantask: StrategyIScanTask = StrategyIScanTask() self._strategies.append(self._stgiscantask) self._stgiscouttask: StrategyIScoutTask = StrategyIScoutTask() self._strategies.append(self._stgiscouttask) self._stgautotask: StrategyAutoTask = StrategyAutoTask() self._strategies.append(self._stgautotask) # 其他 self._logger: MsLogger = MsLogManager.get_logger("strategymanager")
def __init__(self, task: IscoutTask): self.task = task self.tmppath = clienttaskconfig.tmppath self.outpath = clienttaskconfig.outputpath self._ha = HttpAccess() # 插件名字 self._name = type(self).__name__ self._logger: MsLogger = MsLogManager.get_logger( f"{self._name}_{self.task.taskid}") self._sqlfunc = DbManager # 最大的输出条数 self.max_output = 10000 # 新增reason字段,需要对应打击武器的 self.dtools = dtools # 新增数据统计,modify by judy 2020/08/10 self.output_count = 0 # 日志log后缀,create by judy 2020/08/12 self._log_suffix = 'prg_log'