def scan_plugin_fixture(): report_model = ReportModel("www.test-host.com_80") Communicator().set_internal_shared("report_model", report_model) Communicator().set_internal_shared("failed_task_set", set()) plugin_ins = sql_basic.ScanPlugin() plugin_ins.send_request = types.MethodType(send_request, plugin_ins) plugin_ins.report = types.MethodType(report, plugin_ins) yield plugin_ins
def new_scanner(self, module_params): """ 创建一个新的扫描任务 Parameters: module_params - dict, 结构为{ "host":str, 目标主机, "port":int, 目标端口 "config": dict, 配置信息 } Raises: exceptions.MaximumScannerExceede - 扫描任务数量到达上限,引发此异常 exceptions.TargetIsScanning - 指定目标正在被其他任务扫描,引发此异常 """ self._check_alive() idle_scanner = None for scanner_id in range(self.max_scanner): if self.scanner_list[scanner_id] is None: idle_scanner = scanner_id break if idle_scanner is None: raise exceptions.MaximumScannerExceede for item in self.scanner_list: if item is not None: if item["host"] == module_params["host"] and item["port"] == module_params["port"]: raise exceptions.TargetIsScanning host_port = module_params["host"] + "_" + str(module_params["port"]) self._incremental_update_config(host_port, {}) scanner_process_kwargs = { "module_cls": modules.Scanner, "instance_id": idle_scanner, "module_params": { "host": module_params["host"], "port": module_params["port"] } } Communicator().reset_all_value("Scanner_" + str(idle_scanner)) pid = ForkProxy().fork(scanner_process_kwargs) new_scanner_info = { "pid": pid, "host": module_params["host"], "port": module_params["port"], "cancel": 0, "pause": 0 } Communicator().set_value("pid", pid, "Scanner_" + str(idle_scanner)) self.scanner_list[idle_scanner] = new_scanner_info
def send_data(self, rasp_result_ins): """ 向rasp_result_queue发送RaspResult实例 Parameters: rasp_result_ins - 待发送的RaspResult实例 """ queue_name = "rasp_result_queue_" + str( rasp_result_ins.get_result_queue_id()) Logger().info("Send scan request data with id:{} to queue:{}".format( rasp_result_ins.get_request_id(), queue_name)) Communicator().send_data(queue_name, rasp_result_ins) Communicator().increase_value("rasp_result_request")
def launch(self): """ 启动器主函数 """ self._set_affinity() Communicator() Logger().init_module_logger() ForkProxy() Logger().info("Launcher init success!") preprocessor_proc = modules.Process(modules.Preprocessor) preprocessor_proc.start() self.preprocessor_pid = preprocessor_proc.pid Logger().info("Preprocessor fork success!") monitor_proc = modules.Process(modules.Monitor) monitor_proc.start() self.monitor_pid = monitor_proc.pid Logger().info("Monitor fork success!") signal.signal(signal.SIGCHLD, self._wait_child) try: ForkProxy().listen() except KeyboardInterrupt: self.exit = True
async def _fetch_task_from_db(self): """ 从数据库中获取当前扫描目标的非扫描请求(新扫描任务) """ await self.new_scan_model.mark_result(self.mark_id, list(self.failed_task_set)) self.failed_task_set.clear() sleep_interval = 1 continuously_sleep = 0 while True: if Communicator().get_value("cancel") != 0: break data_list = await self.new_scan_model.get_new_scan(self.fetch_count) data_count = len(data_list) if data_count > 0: for item in data_list: for plugin_name in self.plugin_loaded: # item 格式: {"id": id, "data":rasp_result_json} self.plugin_loaded[plugin_name].add_task(item) Logger().debug("Send task with id: {} to plugins.".format(item["id"])) self.scan_queue_remaining += data_count return else: if continuously_sleep < 10: continuously_sleep += 1 await asyncio.sleep(sleep_interval * continuously_sleep)
def kill_scanner(self, scanner_id): """ 强制结束一个扫描进程进程 Parameters: scanner_id - int类型, 要结束的扫描进程的id Returns: 成功结束返回True,否则返回false """ pid = self._scanner_info.get_pid(scanner_id) if pid is None: raise exceptions.InvalidScannerId try: proc = psutil.Process(pid) except psutil.NoSuchProcess: Logger().warning("Try to kill not running scanner!") raise exceptions.InvalidScannerId proc.terminate() try: proc.wait(timeout=5) except psutil.TimeoutExpired: proc.kill() proc.wait(timeout=5) if proc.is_running(): return False else: self._scanner_info.remove_scanner_info(scanner_id) module_name = "Scanner_" + str(scanner_id) Communicator().set_value("pid", 0, module_name) return True
async def post(self): """ 处理POST请求 """ try: data = self.request.body headers = self.request.headers content_type = self.request.headers.get("Content-Type", "None") if not content_type.startswith("application/json"): raise exceptions.ContentTypeInvalid Logger().info("Received request data: " + data.decode('utf-8')) rasp_result_ins = rasp_result.RaspResult(data) if rasp_result_ins.is_scan_result(): self.send_data(rasp_result_ins) else: await self.dedup_data(rasp_result_ins) self.write('{"status": 0, "msg":"ok"}\n') except exceptions.OriExpectedException as e: self.write('{"status": 1, "msg":"data invalid"}\n') Communicator().increase_value("invalid_data") Logger().warning( "Invalid data: {} posted to http server, rejected!".format( data)) except Exception as e: Logger().error( "Unexpected error occured when process data:{}".format(data), exc_info=e) self.send_error(500) return
def add_result(self, rasp_result): """ 添加一个RaspResult实例到缓存队列并触发对应的数据到达事件, 同时清空缓存中过期的实例 若RaspResult实例的id未通过register_result方法注册,则直接丢弃 Parameters: rasp_result - 待添加的RaspResult实例 """ scan_request_id = rasp_result.get_scan_request_id() try: self.rasp_result_collection[scan_request_id][2] = rasp_result self.rasp_result_collection[scan_request_id][0].set() except KeyError: Communicator().increase_value("dropped_rasp_result") Logger().warning("Drop no registered rasp result data: {}".format(str(rasp_result))) while True: try: key = next(iter(self.rasp_result_collection)) except StopIteration: break if self.rasp_result_collection[key][1] < time.time(): if type(self.rasp_result_collection[key][0]) is not dict: Logger().debug("Rasp result with id: {} timeout, dropped".format(key)) self.rasp_result_collection.popitem(False) else: break
async def async_run(self): """ 协程主函数 """ # 注册信号处理 loop = asyncio.get_event_loop() for signame in {'SIGINT', 'SIGTERM'}: loop.add_signal_handler( getattr(signal, signame), functools.partial(self._exit, signame, loop)) # 初始化context await audit_tools.context.Context().async_init() # 启动插件 plugin_tasks = [] for plugin_name in self.plugin_loaded: plugin_tasks.append( loop.create_task(self.plugin_loaded[plugin_name].async_run())) # 启动获取扫描结果队列的协程 task_fetch_rasp_result = loop.create_task(self._fetch_from_queue()) # 执行获取新扫描任务 await self._fetch_new_scan() # 结束所有协程任务,reset共享内存 task_fetch_rasp_result.cancel() await asyncio.wait({task_fetch_rasp_result}) for task in plugin_tasks: task.cancel() await asyncio.wait(set(plugin_tasks), return_when=asyncio.ALL_COMPLETED) Communicator().reset_all_value()
def get_scan_plugin_logger(self, plugin_name): """ 配置扫描插件logger Parameters: plugin_name - str, 扫描插件名 Returns: Logger , 生成的logger """ log_path = self.module_log_path handler = logging.handlers.RotatingFileHandler( log_path + "/plugin_" + plugin_name + ".log", mode='a', maxBytes=Config().get_config("log.rotate_size") * 1024 * 1024, backupCount=Config().get_config("log.rotate_num")) module_name = Communicator().get_module_name() logger = logging.getLogger("openrasp_iast.module_name_" + plugin_name) log_fmt = '[%(asctime)s - %(levelname)s] %(message)s [file: %(pathname)s , line %(lineno)d]' date_fmt = '%Y-%m-%d %H:%M:%S' fmt = logging.Formatter(fmt=log_fmt, datefmt=date_fmt) handler.setFormatter(fmt) logger.parent = None logger.propagate = False logger.handlers = [] logger.addHandler(handler) logger.setLevel(self._log_level) return logger
def new_scanner(self, module_params): """ 创建一个新的扫描任务 Parameters: module_params - dict, 结构为{ "host":str, 目标主机, "port":int, 目标端口 "config": dict, 配置信息 } Raises: exceptions.MaximumScannerExceede - 扫描任务数量到达上限,引发此异常 exceptions.TargetIsScanning - 指定目标正在被其他任务扫描,引发此异常 """ host = module_params["host"] port = module_params["port"] host_port = common.concat_host(host, port) if self._scanner_info.is_scanning(host_port): raise exceptions.TargetIsScanning idle_scanner = self._scanner_info.get_idle_scanner() if idle_scanner is None: raise exceptions.MaximumScannerExceede # 确保数据库中存在扫描配置 self._config.mod_config(host_port, {}) # 启动扫描进程 scanner_process_kwargs = { "module_cls": modules.Scanner, "instance_id": idle_scanner, "module_params": { "host": host, "port": port } } Communicator().reset_all_value("Scanner_" + str(idle_scanner)) pid = ForkProxy().fork(scanner_process_kwargs) # 在共享内存中记录pid Communicator().set_value("pid", pid, "Scanner_" + str(idle_scanner)) # 记录扫描器id相关信息 self._scanner_info.set_scanner_info(idle_scanner, pid, host, port)
def _incremental_update_config(self, host_port, config): """ 增量更新扫描的运行时配置 Paramerters: host_port - str, 目标主机host_port config - dict, 更新的config """ config_model = ConfigModel(table_prefix="", use_async=True, create_table=True, multiplexing_conn=True) if host_port not in self._config_cache: origin_config_json = config_model.get(host_port) if origin_config_json is None: origin_config = self._default_config else: origin_config = json.loads(origin_config_json) else: origin_config = self._config_cache[host_port] version = origin_config["version"] if "scan_plugin_status" in config: for plugin_name in config["scan_plugin_status"]: origin_config["scan_plugin_status"][plugin_name][ "enable"] = config["scan_plugin_status"][plugin_name][ "enable"] if "scan_rate" in config: for key in config["scan_rate"]: if config["scan_rate"][key] >= 0: origin_config["scan_rate"][key] = config["scan_rate"][key] if origin_config["scan_rate"][ "min_request_interval"] > origin_config["scan_rate"][ "max_request_interval"]: origin_config["scan_rate"][ "max_request_interval"] = origin_config["scan_rate"][ "min_request_interval"] if "white_url_reg" in config: origin_config["white_url_reg"] = config["white_url_reg"] if "scan_proxy" in config: origin_config["scan_proxy"] = config["scan_proxy"] # 更新db、cache、和共享内存中的配置version origin_config["version"] = version + 1 config_model.update(host_port, json.dumps(origin_config)) self._config_cache[host_port] = origin_config # 更新速率控制 scanner_id = self._scannner_info.get_scanner_id(host_port) if scanner_id is not None: self._set_boundary_value(scanner_id, origin_config["scan_rate"]) Communicator().set_value("config_version", origin_config["version"], "Scanner_" + str(scanner_id))
async def dedup_data(self, rasp_result_ins): """ 对非扫描请求new_request_data进行去重 Parameters: rasp_result_ins - 待去重的RaspResult实例 Raises: exceptions.DatabaseError - 插入数据失败抛出此异常 """ self.update_setting() hash_str = self.dedup_plugin.get_hash(rasp_result_ins) if hash_str is None: Logger().debug( "Drop white list request with request_id: {}".format( rasp_result_ins.get_request_id())) Communicator().increase_value("duplicate_request") else: host_port = rasp_result_ins.get_host_port() try: self.dedup_lru.check(host_port, hash_str) Logger().info( "Drop duplicate request with request_id: {} (request in lru)" .format(rasp_result_ins.get_request_id())) Communicator().increase_value("duplicate_request") except KeyError: rasp_result_ins.set_hash(hash_str) try: data_stored = await self.new_request_storage.put( rasp_result_ins) except exceptions.DatabaseError as e: self.dedup_lru.delete_key(host_port, hash_str) raise e else: if data_stored: Logger().info( "Get new request with request_id: {}".format( rasp_result_ins.get_request_id())) Communicator().increase_value("new_request") else: Logger().info( "Drop duplicate request with request_id: {}". format(rasp_result_ins.get_request_id())) Communicator().increase_value("duplicate_request")
def _init_db(self): """ 初始化数据库 """ model_prefix = self.target_host + "_" + str(self.target_port) self.new_scan_model = NewRequestModel(model_prefix) self.new_scan_model.reset_unscanned_item() report_model = ReportModel(model_prefix) Communicator().set_internal_shared("report_model", report_model)
def update_setting(self): """ 检查并更新运行时配置 """ action = Communicator().get_preprocessor_action() if action is not None: # 执行清空lru的action for host_port in action["lru_clean"]: self.new_request_storage.reset(host_port) self.dedup_lru.clean_lru(host_port)
def test_send_rasp_result_data(preprocessor_fixture): """ 测试rasp_result型json data处理 """ json_data = http_data["rasp_result"] try: r = http_sender.send_json(json_data, api_path) except Exception as e: assert False == e else: assert r.status_code == 200 assert json.loads(r.text)["status"] == 0 data = None for i in range(10): try: data = Communicator().get_data_nowait("rasp_result_queue_0") except Exception: time.sleep(0.5) assert data.get_request_id() == json_data["context"]["requestId"]
def __init__(self): """ 初始化 """ is_scanner = Communicator().get_module_name().startswith("Scanner") if is_scanner: self.logger = Logger().get_scan_plugin_logger( self.plugin_info["name"]) else: self.logger = Logger() self._enable = True # 插件是否启用 self._white_reg = None # 扫描url白名单 self._proxy_url = None # 扫描使用的代理 self._scan_queue = queue.Queue() # 任务队列 self._last_scan_id = 0 # 最近扫描完成的任务在数据库中的id self._scan_num = 0 # 当前已扫描url数量 self._has_failed_reuqest = False # 标记扫描中存在连接失败的请求 self._request_timeout = Config().get_config("scanner.request_timeout") self._max_concurrent_task = Config().get_config( "scanner.max_concurrent_request") # 共享的report_model 和 failed_task_set 需要在实例化ScanPluginBase类之前设置 try: self._report_model = Communicator().get_internal_shared( "report_model") self._failed_set = Communicator().get_internal_shared( "failed_task_set") except exceptions.InternalSharedKeyError as e: Logger().error( "Try to init scan_plugin before set internal shared key in Communicator! Check 'error.log' for more information." ) sys.exit(1) self._request_session = audit_tools.Session() self._request_data = audit_tools.RequestData self.mutant_helper = audit_tools.MutantHelper() self.checker = audit_tools.Checker() if is_scanner: self.logger.info("Scanner plugin {} init success!".format( self.plugin_info["name"]))
def _is_pause(self): """ 判定是否暂停Scanner Returns: boolean """ if Communicator().get_value("pause", self.module_name) == 0: return False else: return True
def test_launcher(): proc = multiprocessing.Process(target=Launcher().launch) proc.start() time.sleep(2) module_procs = psutil.Process(proc.pid).children(recursive=True) assert len(module_procs) > 2 proc.terminate() proc.join(5) if proc.is_alive(): raise Exception( "launcher process with pid {} may not be killed success!") time.sleep(Config().get_config("monitor.schedule_interval") * 2) for child in module_procs: try: child.wait(5) except psutil.TimeoutExpired: assert False Communicator.reset()
def _check_alive(self): """ 刷新当前扫描任务存活状态 """ reset_list = [] for scanner_id in range(self.max_scanner): if self.scanner_list[scanner_id] is not None: pid = Communicator().get_value("pid", "Scanner_" + str(scanner_id)) if pid == 0: reset_list.append(scanner_id) for scanner_id in reset_list: self.scanner_list[scanner_id] = None
def _is_req_reach_limit(self): """ 判断并发请求是否到达上限 Returns: boolean """ max_req = Communicator().get_value("max_concurrent_request") if self.current_requests_num >= max_req: return True else: return False
def get_auto_start(self): """ 获取自动启动扫描开关状态 Returns: bool, 是否开启自启动扫描 """ if Communicator().get_value("auto_start", "Monitor") == 1: return True else: return False
def __init__(self, **kwargs): """ 初始化 """ # kwargs 参数初始化 self.target_host = kwargs["host"] self.target_port = kwargs["port"] self._init_scan_config() # 用于记录失败请求并标记 self.failed_task_set = set() Communicator().set_internal_shared("failed_task_set", self.failed_task_set) self.module_id = Communicator().get_module_name().split("_")[-1] Communicator().set_value("max_concurrent_request", 1) Communicator().set_value("request_interval", Config().get_config("scanner.min_request_interval")) self._init_db() self._init_plugin() # 更新运行时配置 self._update_scan_config()
def __init__(self, table_prefix=None, use_async=True, create_table=True, multiplexing_conn=False): """ 初始化 Parameters: table_prefix - 表名前缀,由扫描目标的 host + "_" + str(port) 组成 use_async - 是否开启数据库连接的异步查询功能,默认为True create_table - 数据表不存在时是否创建,默认为True multiplexing_conn - 是否复用连接,为True时,相同的Model的实例会使用同一个连接, 为int时使用该int大小的连接池, 默认为False Raises: create_table为Fasle且目标数据表不存在时,引发exceptions.TableNotExist """ self.use_async = use_async try: if multiplexing_conn: database = BaseModel.mul_database else: if self.use_async: database = peewee_async.MySQLDatabase(**self.connect_para) else: database = peewee.MySQLDatabase(**self.connect_para) database.connect() # table_prefix 为None则不建立数据表实例,仅用于调用基类方法 if table_prefix is not None: self._model = self._create_model(database, table_prefix) if not self._model.table_exists(): if create_table: try: database.create_tables([self._model]) Logger().debug("Create table {}_{}".format( table_prefix, self.__class__.__name__)) if self.__class__.__name__ == "NewRequestModel": Communicator().update_target_list_status() except peewee.InternalError: pass else: raise exceptions.TableNotExist self.database = database except exceptions.TableNotExist as e: self._handle_exception( "Table with prefix {} not found!".format(table_prefix), e) except Exception as e: self._handle_exception("Mysql Connection Fail!", e)
async def _fetch_from_queue(self): """ 获取扫描请求的RaspResult, 并分发给扫描插件 """ queue_name = "rasp_result_queue_" + self.module_id sleep_interval = 0.1 continuously_sleep = 0 Logger().debug("Fetch task is running, use queue: " + queue_name) while True: if Communicator().get_value("config_version") > self.scan_config["version"]: self._update_scan_config() try: data = Communicator().get_data_nowait(queue_name) Logger().debug("From rasp_result_queue got data: " + str(data)) result_receiver.RaspResultReceiver().add_result(data) Logger().debug("Send data to rasp_result receiver: {}".format( data.get_request_id())) continuously_sleep = 0 except exceptions.QueueEmpty: if continuously_sleep < 10: continuously_sleep += 1 await asyncio.sleep(sleep_interval * continuously_sleep)
def test_clean_lru(preprocessor_fixture): """ 测试清除lru """ helper.clean_table("www.test-host.com_80_ResultList") Communicator().set_clean_lru(["www.test-host.com_80"]) try: r = http_sender.send_json(http_data["new_request_1"], api_path) except Exception as e: assert False else: assert r.status_code == 200 assert json.loads(r.text)["status"] == 0 assert helper.get_data_count("www.test-host.com_80_ResultList") == 1
def preprocessor_fixture(): helper.reset_db() Communicator() Logger() module_proc = modules.Process(modules.Preprocessor) module_proc.start() yield module_proc root_proc = psutil.Process(module_proc.pid) procs = root_proc.children(recursive=True) try: root_proc.terminate() root_proc.wait(10) module_proc.join(5) for p in procs: p.terminate() p.wait(10) except psutil.TimeoutExpired: raise Exception("Module process may not be killed success!") helper.reset_db() Communicator.reset()
def _terminate_modules(self): """ 结束其他所有模块 """ all_procs = [] scanner_num = Config().get_config("scanner.max_module_instance") for i in range(scanner_num): pid = Communicator().get_value("pid", "Scanner_" + str(i)) if pid != 0: all_procs.append(pid) all_procs.append(Communicator().get_value("pid", "Preprocessor")) all_procs += Communicator().get_pre_http_pid() for pid in all_procs: if pid != 0: self._kill_proc_tree(pid) ppid = os.getppid() if ppid > 1: try: p = psutil.Process(ppid) p.kill() except Exception as e: Logger().error("Kill launcher failed", exc_info=e)
def get_tables(self): """ 获取所有扫描目标对应的数据库表前缀 """ table_status = Communicator().get_target_list_status() if table_status > self._table_status: tables = BaseModel(multiplexing_conn=True).get_tables() result_tables = [] for table_name in tables: if table_name.lower().endswith("_resultlist"): host_port = table_name[:-11] result_tables.append(host_port) self._table_list = result_tables self._table_status = table_status return self._table_list
def run(self): """ 启动http server """ server = tornado.httpserver.HTTPServer( self.app, max_buffer_size=Config().get_config("preprocessor.max_buffer_size")) try: server.bind(Config().get_config("preprocessor.http_port")) except OSError as e: Logger().critical("Preprocessor bind port error!", exc_info=e) sys.exit(1) else: # 这里会创建多个子进程,需要重新初始化Communicator server.start(Config().get_config("preprocessor.process_num")) Communicator().init_new_module(type(self).__name__) # 记录pid while True: if Communicator().set_pre_http_pid(os.getpid()): break else: pids = ", ".join(str(x) for x in Communicator().get_pre_http_pid()) Logger().error("Preprocessor HTTP Server set pid failed! Running pids: {}".format(pids)) time.sleep(3) tornado.ioloop.IOLoop.current().start()