def _worker(self): current_name = threading.current_thread().name logger.info("{} start!".format(current_name)) while self.is_running(): # 获取任务信息,没有取到就继续循环 task_priority, search_task = self.get_task_from_queue() if not task_priority or not search_task: continue # 解析数据内容 srid = search_task.get("search_rule_id") rule_name = search_task.get("search_rule_name") rule_content = search_task.get("search_rule_content") logger.debug("parse task data done.") # 循环请求每一页 for page_num in range(1, self.search_page_max_size + 1): # 构建请求数据 request_data = self.build_request_data(rule_content, page_num) request_header = self.build_request_header() if request_header is None: logger.error( "No available token found. Jumping search operator.") break # 发起请求,如果response为None,说明收到了结束信号,直接break response = self._request_page(request_header, request_data) if response is None: break logger.debug("response.text: {}".format(response.text)) # logger.debug("response header: {}".format(response.headers)) # 正常内容 开始解析内容 # return_val = { # "filter_tasks": [], # "has_next_page": True, # "error": None # } results = self.parse_response(response, srid, rule_name) if results["error"]: # 解析有问题,这里是否需要重新请求当前页? continue # 将生成的filter_task放入filter队列 for task in results["filter_tasks"]: self.push_to_queue(task_priority, task) # 根据has_next_page字段决定是否请求下一页 if not results["has_next_page"]: logger.debug( "Jump remains page because of 'has_next_page' is False." ) break logger.info("{} end!".format(current_name))
def __sigint_signal_handler(self, sig, frame): """处理 CTRL+C 信号""" logger.info("Receive exit signal.") self.Engines.REFRESH_ENGINE.stop() self.Engines.SEARCH_ENGINE.stop() self.Engines.FILTER_ENGINE.stop() self.Engines.SAVE_ENGINE.stop() self.Engines.MONITOR_REFRESH_ENGINE.stop() self.Engines.MONITOR_ENGINE.stop() self.Engines.MONITOR_SAVE_ENGINE.stop()
def _worker(self): logger.info("RefreshEngine start!") refresh_task_queue = self.app_ctx.MessageQueues.SEARCH_TASK_QUEUE while self.status == self.EngineStatus.RUNNING: logger.debug("start build search task.") rows = GeyeSearchRuleModel.objects.filter(is_deleted=0, status=1).all() current_time = datetime.datetime.now() for row in rows: delay = int(row.delay) if row.last_refresh_time + datetime.timedelta( minutes=delay) < current_time: # 该刷新了,添加到任务队列中去 # 添加一个字典,如果后续改成分布式,需要改成JSON字符串 # Task格式: # tuple(priority, _task) # build task _data = { "search_rule_id": row.id, "search_rule_name": row.name, "search_rule_content": row.rule, } # task = (row.priority, _data) task = PriorityTask(row.priority, _data) logger.debug("task: {}".format(task)) while True: try: refresh_task_queue.put_nowait(task) break except queue.Full: logger.warning("SearchTask队列已满,等待3秒后重试") self.ev.wait(3) continue # 更新任务的最后刷新时间 row.last_refresh_time = current_time row.save() self.ev.wait(settings.REFRESH_INTERVAL) logger.info("RefreshEngine end!")
def _worker(self): logger.info("{} start!".format(self.name)) while self.status == self.EngineStatus.RUNNING: task_priority, task = self.get_task_from_queue() if not task_priority or not task: continue filter_task = task["filter_task"] if GeyeLeaksModel.instance.is_exist(filter_task["sha"]): # 已经有这条记录了,continue continue # 存储数据 try: GeyeLeaksModel.objects.create( repo_name=filter_task["repo_name"], author=filter_task["author"], path=filter_task["path"], filename=filter_task["filename"], sha=filter_task["sha"], full_code_url=filter_task["full_code_url"], url=filter_task["url"], code=task["code"], srid=filter_task["srid"], frid=task["frid"], status=task["status"], pushed=task["pushed"], ) except DBError as e: logger.error("SaveEngine error: {}".format(e)) # todo: send error message continue # post-action # todo: send notification # todo: clone repo logger.info("{} end!".format(self.name))
def _worker(self): logger.info("{name} start!".format(name=self.name)) while self.__running(): logger.debug("start build monitor task.") rows: List[GeyeMonitorRules] = GeyeMonitorRules.instance.get_all() current_time = datetime.datetime.now() for _row in rows: interval = _row.interval if _row.last_fetch_time + datetime.timedelta( minutes=interval) < current_time: task = PriorityTask( _row.priority, { "task_type": _row.task_type, "event_type": _row.event_type, "rule_content": _row.rule_content, "rule_id": _row.id, }) logger.debug( "Create monitor task: {task}".format(task=task)) while self.__running(): try: self._monitor_task_queue.put_nowait(task) break except queue.Full: self.ev.wait(3) continue # 更新rule的最后刷新时间 _row.last_fetch_time = current_time _row.save() self.ev.wait(30) logger.info("{name} stop!".format(name=self.name))
def _worker(self): c_name = threading.current_thread().name logger.info("{} start!".format(c_name)) self._real_worker() logging.info("{} stop.".format(c_name))
def _worker(self): logger.info("{name} start!".format(name=self.name)) while self.is_running(): # 从队列中获取任务 task_priority, task = self.__get_task_from_queue() if not task_priority or not task: continue # 任务格式 # { # "data": [{}..], # "monitor_rule_id": int, # } # data中的每一项结构如下 # { # "event_id": event_id, # "event_type": event_type, # "actor_url": actor_url, # "actor_login": actor_login, # "actor_display_name": actor_display_name, # "repo_name": repo_name, # "repo_url": repo_url, # "org_name": org_name, # "org_url": org_url, # "created_time": created_time, # "payloads": {} # } monitor_rule_id = task.get("monitor_rule_id") if not monitor_rule_id: continue dataset = task.get("data") for item in dataset: e_id = item.get("event_id") if not e_id: continue # 检查event_id是否已经存在了 if GeyeMonitorResultsModel.instance.is_exist_by_event_id(event_id=e_id): continue monitor_results = GeyeMonitorResultsModel() monitor_results.monitor_rule_id = monitor_rule_id monitor_results.event_id = e_id monitor_results.event_type = item.get("event_type") monitor_results.actor_url = item.get("actor_url") monitor_results.actor_login = item.get("actor_login") monitor_results.actor_display_name = item.get("actor_display_name") monitor_results.org_name = item.get("org_name") monitor_results.org_url = item.get("org_url") monitor_results.repo_url = item.get("repo_url") monitor_results.repo_name = item.get("repo_name") monitor_results.event_created_time = item.get("created_time") monitor_results.content = json.dumps(item.get("payloads") or {}) monitor_results.save() logger.info("{name} stop!".format(name=self.name))
def _worker(self): current_name = threading.current_thread().name logger.info("{} start!".format(current_name)) while self.status == self.EngineStatus.RUNNING: # task_priority其实就是search rule中指定的优先级 task_priority, task = self.get_task_from_queue() if not task or not task_priority: continue # 预先过滤一次hash值,如果已经泄露的表中存在这样的hash,跳过后续的检查 # 可能会有漏报 # 某文件已经命中规则A,存入表中 # 当匹配规则B时,会导致跳过匹配该文件 # result = self.check_hash(task) # 获取所有需要filter的规则,先全局filter,再子filter all_filter_rules: List[ GeyeFilterRuleModel] = self.get_filter_rules(task["srid"]) logger.debug("Get all filter rules: {}".format(all_filter_rules)) # 获取完整的代码 response_result = self.get_raw_code(task["full_code_url"]) if not response_result["success"]: # 失败了,把任务重新放回队列 # 这里可能导致worker卡死 # self.put_task_to_queue(target_queue=self.filter_task_queue, task=(task_priority, task)) # logger.debug("Re-put done. continue.") logger.error( "获取raw code失败,URL:{url}".format(url=task["full_code_url"])) continue raw_code = response_result["code"] # 按照规则开始匹配 logger.debug("#### [start] SEARCH RULE: {}".format( task["search_rule_name"])) logger.debug("#### Content URL: {}".format(task["full_code_url"])) for _rule in all_filter_rules: logger.debug("==== filter rule: {}, content: {}".format( _rule, _rule.rule)) result = self.do_filter(_rule, task, raw_code) # 匹配过程中有错误,直接终止匹配 if not result or result["error"]: break # 根据规则的正向/反向,获取是否命中 # hit变量表示是否命中规则 if _rule.rule_type == 1: # 正向匹配,匹配到算命中 hit = True if result["found"] else False elif _rule.rule_type == 2: # 反向匹配,没有匹配到算命中 hit = True if not result["found"] else False else: logger.error("Error rule_type: {}".format(_rule.rule_type)) break logger.debug("filter end. hit result: %s", hit) # 根据匹配结果,决定是向下匹配还是存起来 if hit: _action = _rule.action # 1-啥也不做,继续下一条匹配,不保存,可以用于其他规则的前置 # 2-设为误报,结束匹配,不保存,可以排除掉一定不是敏感信息泄露的内容 # 3-设为误报,结束匹配,保存,可以排除掉一定不是敏感信息泄露的内容 # 4-设为确认,结束匹配,保存,确定规则 # 5-设为待确认,结束匹配,保存 if _action == 1: logger.debug("Action: None -> continue next.") continue elif _action == 2: logger.debug( "Action: Ignore -> no save -> end filter.") break elif _action == 3: logger.debug("Action: Ignore -> save -> end filter.") save_task = (task_priority, { "code": result["code"], "status": LeaksStatusConstant.IGNORE, "pushed": 0, "frid": _rule.id, "filter_task": task, "filter_rule_name": _rule.name }) self.put_task_to_queue( save_task, target_queue=self.save_task_queue) break elif _action == 4: logger.debug("Action: Confirm -> save -> end filter.") save_task = (task_priority, { "code": result["code"], "status": LeaksStatusConstant.CONFIRM, "pushed": 0, "frid": _rule.id, "filter_task": task, "filter_rule_name": _rule.name }) self.put_task_to_queue( save_task, target_queue=self.save_task_queue) break elif _action == 5: logger.debug( "Action: To-be-confirmed -> save -> end filter.") save_task = (task_priority, { "code": result["code"], "status": LeaksStatusConstant.TO_BE_CONFIRMED, "pushed": 0, "frid": _rule.id, "filter_task": task, "filter_rule_name": _rule.name }) self.put_task_to_queue( save_task, target_queue=self.save_task_queue) break else: logger.error( "Unknown action value: {}".format(_action)) else: logger.debug("no hit, continue filter next rule.") continue logger.debug("#### [end] SEARCH RULE: {}".format( task["search_rule_name"])) logger.info("{} end!".format(current_name))
def _worker(self): logger.info("{name} start!".format(name=self.name)) while self.__is_running(): task_priority, task = self.__get_task() if task_priority is None or task is None: self.__wait(1) continue # 解析task中的数据 # { # "task_type": _row.task_type, # 可选值来自 MonitorTaskTypeConstant,监控的维度 # "event_type": _row.event_type, # 可选值来自MonitorEventTypeConstant,监控的事件类型,多个值用逗号分隔 # "rule_content": _row.rule_content, # 根据task_type有不同含义 # "rule_id": _row.id, # } logger.debug("get task: {}".format(task)) task_type = task.get("task_type", None) event_type: str = task.get("event_type", None) rule_content = task.get("rule_content", None) monitor_rule_id = task.get("rule_id", None) if not task_type or not event_type or not rule_content or not monitor_rule_id: self.__wait(1) continue # 根据task_type 获取不同的API接口 api_url = MonitorAPIUrl.get(task_type, None) if not api_url: logger.error("task_type有误,无法获取API!") continue api_url = api_url.format(**json.loads(rule_content)) # 请求API获取数据 results = self.__fetch_api(api_url) if not results["success"]: logger.error( "Fetch API failed! {err}".format(err=results["reason"])) continue logger.debug("results: {}".format(results)) # 从API的返回中parse对应的时间内容,event_type可以为多个事件,返回格式如下 # ret_val = { # "success": False, # "message": "Unknown Error", # "data": [], # typing: List[Dict] # } parse_result = EventParser.parse(event_type.split(","), results["data"]) if not parse_result.get("success"): logger.error(parse_result.get("message")) continue else: # 把数据扔到队列里去,把event存起来 self.__put_task( task_priority, { "data": parse_result.get("data"), "monitor_rule_id": monitor_rule_id, }) logger.info("{name} stop!".format(name=self.name))