Beispiel #1
0
    def _worker(self):
        current_name = threading.current_thread().name
        logger.info("{} start!".format(current_name))

        while self.is_running():
            # 获取任务信息,没有取到就继续循环
            task_priority, search_task = self.get_task_from_queue()
            if not task_priority or not search_task:
                continue

            # 解析数据内容
            srid = search_task.get("search_rule_id")
            rule_name = search_task.get("search_rule_name")
            rule_content = search_task.get("search_rule_content")
            logger.debug("parse task data done.")

            # 循环请求每一页
            for page_num in range(1, self.search_page_max_size + 1):
                # 构建请求数据
                request_data = self.build_request_data(rule_content, page_num)
                request_header = self.build_request_header()
                if request_header is None:
                    logger.error(
                        "No available token found. Jumping search operator.")
                    break

                # 发起请求,如果response为None,说明收到了结束信号,直接break
                response = self._request_page(request_header, request_data)
                if response is None:
                    break
                logger.debug("response.text: {}".format(response.text))

                # logger.debug("response header: {}".format(response.headers))
                # 正常内容 开始解析内容
                # return_val = {
                #     "filter_tasks": [],
                #     "has_next_page": True,
                #     "error": None
                # }
                results = self.parse_response(response, srid, rule_name)
                if results["error"]:
                    # 解析有问题,这里是否需要重新请求当前页?
                    continue

                # 将生成的filter_task放入filter队列
                for task in results["filter_tasks"]:
                    self.push_to_queue(task_priority, task)

                # 根据has_next_page字段决定是否请求下一页
                if not results["has_next_page"]:
                    logger.debug(
                        "Jump remains page because of 'has_next_page' is False."
                    )
                    break

        logger.info("{} end!".format(current_name))
Beispiel #2
0
    def __sigint_signal_handler(self, sig, frame):
        """处理 CTRL+C 信号"""
        logger.info("Receive exit signal.")

        self.Engines.REFRESH_ENGINE.stop()
        self.Engines.SEARCH_ENGINE.stop()
        self.Engines.FILTER_ENGINE.stop()
        self.Engines.SAVE_ENGINE.stop()
        self.Engines.MONITOR_REFRESH_ENGINE.stop()
        self.Engines.MONITOR_ENGINE.stop()
        self.Engines.MONITOR_SAVE_ENGINE.stop()
Beispiel #3
0
    def _worker(self):

        logger.info("RefreshEngine start!")

        refresh_task_queue = self.app_ctx.MessageQueues.SEARCH_TASK_QUEUE

        while self.status == self.EngineStatus.RUNNING:
            logger.debug("start build search task.")
            rows = GeyeSearchRuleModel.objects.filter(is_deleted=0,
                                                      status=1).all()
            current_time = datetime.datetime.now()

            for row in rows:
                delay = int(row.delay)
                if row.last_refresh_time + datetime.timedelta(
                        minutes=delay) < current_time:
                    # 该刷新了,添加到任务队列中去
                    # 添加一个字典,如果后续改成分布式,需要改成JSON字符串
                    # Task格式:
                    #   tuple(priority, _task)

                    # build task
                    _data = {
                        "search_rule_id": row.id,
                        "search_rule_name": row.name,
                        "search_rule_content": row.rule,
                    }
                    # task = (row.priority, _data)
                    task = PriorityTask(row.priority, _data)
                    logger.debug("task: {}".format(task))
                    while True:
                        try:
                            refresh_task_queue.put_nowait(task)
                            break
                        except queue.Full:
                            logger.warning("SearchTask队列已满,等待3秒后重试")
                            self.ev.wait(3)
                            continue

                    # 更新任务的最后刷新时间
                    row.last_refresh_time = current_time
                    row.save()

            self.ev.wait(settings.REFRESH_INTERVAL)

        logger.info("RefreshEngine end!")
Beispiel #4
0
    def _worker(self):
        logger.info("{} start!".format(self.name))

        while self.status == self.EngineStatus.RUNNING:
            task_priority, task = self.get_task_from_queue()
            if not task_priority or not task:
                continue

            filter_task = task["filter_task"]

            if GeyeLeaksModel.instance.is_exist(filter_task["sha"]):
                # 已经有这条记录了,continue
                continue

            # 存储数据
            try:
                GeyeLeaksModel.objects.create(
                    repo_name=filter_task["repo_name"],
                    author=filter_task["author"],
                    path=filter_task["path"],
                    filename=filter_task["filename"],
                    sha=filter_task["sha"],
                    full_code_url=filter_task["full_code_url"],
                    url=filter_task["url"],
                    code=task["code"],
                    srid=filter_task["srid"],
                    frid=task["frid"],
                    status=task["status"],
                    pushed=task["pushed"],
                )
            except DBError as e:
                logger.error("SaveEngine error: {}".format(e))
                # todo: send error message
                continue

            # post-action
            # todo: send notification
            # todo: clone repo

        logger.info("{} end!".format(self.name))
    def _worker(self):
        logger.info("{name} start!".format(name=self.name))

        while self.__running():
            logger.debug("start build monitor task.")

            rows: List[GeyeMonitorRules] = GeyeMonitorRules.instance.get_all()
            current_time = datetime.datetime.now()

            for _row in rows:
                interval = _row.interval
                if _row.last_fetch_time + datetime.timedelta(
                        minutes=interval) < current_time:
                    task = PriorityTask(
                        _row.priority, {
                            "task_type": _row.task_type,
                            "event_type": _row.event_type,
                            "rule_content": _row.rule_content,
                            "rule_id": _row.id,
                        })
                    logger.debug(
                        "Create monitor task: {task}".format(task=task))
                    while self.__running():
                        try:
                            self._monitor_task_queue.put_nowait(task)
                            break
                        except queue.Full:
                            self.ev.wait(3)
                            continue

                    # 更新rule的最后刷新时间
                    _row.last_fetch_time = current_time
                    _row.save()

            self.ev.wait(30)

        logger.info("{name} stop!".format(name=self.name))
Beispiel #6
0
 def _worker(self):
     c_name = threading.current_thread().name
     logger.info("{} start!".format(c_name))
     self._real_worker()
     logging.info("{} stop.".format(c_name))
Beispiel #7
0
    def _worker(self):
        logger.info("{name} start!".format(name=self.name))

        while self.is_running():
            # 从队列中获取任务
            task_priority, task = self.__get_task_from_queue()
            if not task_priority or not task:
                continue

            # 任务格式
            # {
            #   "data": [{}..],
            #   "monitor_rule_id": int,
            # }
            # data中的每一项结构如下
            # {
            #     "event_id": event_id,
            #     "event_type": event_type,
            #     "actor_url": actor_url,
            #     "actor_login": actor_login,
            #     "actor_display_name": actor_display_name,
            #     "repo_name": repo_name,
            #     "repo_url": repo_url,
            #     "org_name": org_name,
            #     "org_url": org_url,
            #     "created_time": created_time,
            #     "payloads": {}
            # }

            monitor_rule_id = task.get("monitor_rule_id")
            if not monitor_rule_id:
                continue

            dataset = task.get("data")
            for item in dataset:

                e_id = item.get("event_id")
                if not e_id:
                    continue

                # 检查event_id是否已经存在了
                if GeyeMonitorResultsModel.instance.is_exist_by_event_id(event_id=e_id):
                    continue

                monitor_results = GeyeMonitorResultsModel()
                monitor_results.monitor_rule_id = monitor_rule_id
                monitor_results.event_id = e_id
                monitor_results.event_type = item.get("event_type")

                monitor_results.actor_url = item.get("actor_url")
                monitor_results.actor_login = item.get("actor_login")
                monitor_results.actor_display_name = item.get("actor_display_name")

                monitor_results.org_name = item.get("org_name")
                monitor_results.org_url = item.get("org_url")

                monitor_results.repo_url = item.get("repo_url")
                monitor_results.repo_name = item.get("repo_name")

                monitor_results.event_created_time = item.get("created_time")

                monitor_results.content = json.dumps(item.get("payloads") or {})

                monitor_results.save()

        logger.info("{name} stop!".format(name=self.name))
Beispiel #8
0
    def _worker(self):
        current_name = threading.current_thread().name

        logger.info("{} start!".format(current_name))

        while self.status == self.EngineStatus.RUNNING:
            # task_priority其实就是search rule中指定的优先级
            task_priority, task = self.get_task_from_queue()
            if not task or not task_priority:
                continue

            # 预先过滤一次hash值,如果已经泄露的表中存在这样的hash,跳过后续的检查
            # 可能会有漏报
            # 某文件已经命中规则A,存入表中
            # 当匹配规则B时,会导致跳过匹配该文件
            # result = self.check_hash(task)

            # 获取所有需要filter的规则,先全局filter,再子filter
            all_filter_rules: List[
                GeyeFilterRuleModel] = self.get_filter_rules(task["srid"])
            logger.debug("Get all filter rules: {}".format(all_filter_rules))

            # 获取完整的代码
            response_result = self.get_raw_code(task["full_code_url"])
            if not response_result["success"]:
                # 失败了,把任务重新放回队列
                # 这里可能导致worker卡死
                # self.put_task_to_queue(target_queue=self.filter_task_queue, task=(task_priority, task))
                # logger.debug("Re-put done. continue.")
                logger.error(
                    "获取raw code失败,URL:{url}".format(url=task["full_code_url"]))
                continue
            raw_code = response_result["code"]

            # 按照规则开始匹配
            logger.debug("#### [start] SEARCH RULE: {}".format(
                task["search_rule_name"]))
            logger.debug("#### Content URL: {}".format(task["full_code_url"]))
            for _rule in all_filter_rules:
                logger.debug("==== filter rule: {}, content: {}".format(
                    _rule, _rule.rule))
                result = self.do_filter(_rule, task, raw_code)

                # 匹配过程中有错误,直接终止匹配
                if not result or result["error"]:
                    break

                # 根据规则的正向/反向,获取是否命中
                # hit变量表示是否命中规则
                if _rule.rule_type == 1:
                    # 正向匹配,匹配到算命中
                    hit = True if result["found"] else False
                elif _rule.rule_type == 2:
                    # 反向匹配,没有匹配到算命中
                    hit = True if not result["found"] else False
                else:
                    logger.error("Error rule_type: {}".format(_rule.rule_type))
                    break
                logger.debug("filter end. hit result: %s", hit)

                # 根据匹配结果,决定是向下匹配还是存起来
                if hit:
                    _action = _rule.action
                    # 1-啥也不做,继续下一条匹配,不保存,可以用于其他规则的前置
                    # 2-设为误报,结束匹配,不保存,可以排除掉一定不是敏感信息泄露的内容
                    # 3-设为误报,结束匹配,保存,可以排除掉一定不是敏感信息泄露的内容
                    # 4-设为确认,结束匹配,保存,确定规则
                    # 5-设为待确认,结束匹配,保存
                    if _action == 1:
                        logger.debug("Action: None -> continue next.")
                        continue
                    elif _action == 2:
                        logger.debug(
                            "Action: Ignore -> no save -> end filter.")
                        break
                    elif _action == 3:
                        logger.debug("Action: Ignore -> save -> end filter.")
                        save_task = (task_priority, {
                            "code": result["code"],
                            "status": LeaksStatusConstant.IGNORE,
                            "pushed": 0,
                            "frid": _rule.id,
                            "filter_task": task,
                            "filter_rule_name": _rule.name
                        })
                        self.put_task_to_queue(
                            save_task, target_queue=self.save_task_queue)
                        break
                    elif _action == 4:
                        logger.debug("Action: Confirm -> save -> end filter.")
                        save_task = (task_priority, {
                            "code": result["code"],
                            "status": LeaksStatusConstant.CONFIRM,
                            "pushed": 0,
                            "frid": _rule.id,
                            "filter_task": task,
                            "filter_rule_name": _rule.name
                        })
                        self.put_task_to_queue(
                            save_task, target_queue=self.save_task_queue)
                        break
                    elif _action == 5:
                        logger.debug(
                            "Action: To-be-confirmed -> save -> end filter.")
                        save_task = (task_priority, {
                            "code": result["code"],
                            "status": LeaksStatusConstant.TO_BE_CONFIRMED,
                            "pushed": 0,
                            "frid": _rule.id,
                            "filter_task": task,
                            "filter_rule_name": _rule.name
                        })
                        self.put_task_to_queue(
                            save_task, target_queue=self.save_task_queue)
                        break
                    else:
                        logger.error(
                            "Unknown action value: {}".format(_action))
                else:
                    logger.debug("no hit, continue filter next rule.")
                    continue

            logger.debug("#### [end] SEARCH RULE: {}".format(
                task["search_rule_name"]))

        logger.info("{} end!".format(current_name))
Beispiel #9
0
    def _worker(self):
        logger.info("{name} start!".format(name=self.name))

        while self.__is_running():
            task_priority, task = self.__get_task()
            if task_priority is None or task is None:
                self.__wait(1)
                continue

            # 解析task中的数据
            # {
            #     "task_type": _row.task_type, # 可选值来自 MonitorTaskTypeConstant,监控的维度
            #     "event_type": _row.event_type, # 可选值来自MonitorEventTypeConstant,监控的事件类型,多个值用逗号分隔
            #     "rule_content": _row.rule_content, # 根据task_type有不同含义
            #     "rule_id": _row.id,
            # }
            logger.debug("get task: {}".format(task))
            task_type = task.get("task_type", None)
            event_type: str = task.get("event_type", None)
            rule_content = task.get("rule_content", None)
            monitor_rule_id = task.get("rule_id", None)
            if not task_type or not event_type or not rule_content or not monitor_rule_id:
                self.__wait(1)
                continue

            # 根据task_type 获取不同的API接口
            api_url = MonitorAPIUrl.get(task_type, None)
            if not api_url:
                logger.error("task_type有误,无法获取API!")
                continue
            api_url = api_url.format(**json.loads(rule_content))

            # 请求API获取数据
            results = self.__fetch_api(api_url)
            if not results["success"]:
                logger.error(
                    "Fetch API failed! {err}".format(err=results["reason"]))
                continue

            logger.debug("results: {}".format(results))

            # 从API的返回中parse对应的时间内容,event_type可以为多个事件,返回格式如下
            # ret_val = {
            #     "success": False,
            #     "message": "Unknown Error",
            #     "data": [],  # typing: List[Dict]
            # }
            parse_result = EventParser.parse(event_type.split(","),
                                             results["data"])
            if not parse_result.get("success"):
                logger.error(parse_result.get("message"))
                continue
            else:
                # 把数据扔到队列里去,把event存起来
                self.__put_task(
                    task_priority, {
                        "data": parse_result.get("data"),
                        "monitor_rule_id": monitor_rule_id,
                    })

        logger.info("{name} stop!".format(name=self.name))