Ejemplo n.º 1
0
    def post(request):
        logger.debug("POST: {}".format(request.body))

        result = RequestValidator.check_params(
            request, check_empty=True,
            check_params=["tokenName", "tokenContent", "status"]
        )
        if result.has_error:
            logger.error("error: {}".format(result.error_message))
            return JsonResponse({"code": 1004, "message": result.error_message})

        args = result.params

        token_name = args.get("tokenName", None)
        token = args.get("tokenContent", None)
        status = args.get("status", None)

        obj = GeyeTokenModel.objects.create(
            token_name=token_name, token=token, status=status, remain_limit=99999
        )
        if obj:
            return JsonResponse({"code": 1001, "message": "添加成功!", "data": {
                "id": obj.id,
                "tokenName": obj.token_name,
                "tokenContent": obj.token,
                "status": obj.status,
                "remainLimit": obj.remain_limit,
            }})
        else:
            return JsonResponse({"code": 1002, "message": "添加失败!"})
Ejemplo n.º 2
0
    def post(request):
        logger.debug("POST: {}".format(request.body))
        # 检查参数是否为空
        result = RequestValidator.check_params(request,
                                               check_empty=True,
                                               check_params=[
                                                   "id", "name", "ruleType",
                                                   "ruleEngine", "ruleContent",
                                                   "status", "action",
                                                   "position", "priority"
                                               ])
        if result.has_error:
            logger.error("error: {}".format(result.error_message))
            return JsonResponse({
                "code": 1004,
                "message": result.error_message
            })
        params = result.params

        # 检查filter rule id是否存在
        if not GeyeFilterRuleModel.instance.is_exist(params.get("id", None)):
            return JsonResponse({"code": 1003, "message": "规则ID不存在!"})

        name = params.get("name")
        if not name:
            return JsonResponse({"code": 1003, "message": "规则名称有误!"})

        rule_type = CommonConvert.ensure_int(params.get("ruleType", 1))
        if rule_type not in (1, 2):
            return JsonResponse({"code": 1005, "message": "ruleType有误!"})

        rule_engine = CommonConvert.ensure_int(params.get("ruleEngine", 1))
        if rule_engine not in (1, 2):
            return JsonResponse({"code": 1006, "message": "ruleEngine有误!"})

        # rule_content = params.get("ruleContent", "")

        status = CommonConvert.ensure_int(params.get("status", 1))
        if status not in (1, 0):
            return JsonResponse({"code": 1007, "message": "status有误!"})

        # action
        action = CommonConvert.ensure_int(params.get("action", 1))
        if action not in range(1, 6):
            return JsonResponse({"code": 1007, "message": "action有误!"})

        # position
        position = CommonConvert.ensure_int(params.get("position", 1))
        if position not in range(1, 6):
            return JsonResponse({"code": 1008, "message": "position有误!"})

        # priority
        priority = CommonConvert.ensure_int(params.get("priority", 5))
        if priority not in range(0, 11):
            return JsonResponse({"code": 1009, "message": "priority有误!"})

        if GeyeFilterRuleModel.instance.update_filter_rule(params):
            return JsonResponse({"code": 1001, "message": "更新成功!"})
        else:
            return JsonResponse({"code": 1002, "message": "更新失败!"})
Ejemplo n.º 3
0
    def post(request):
        logger.debug("POST: {}".format(request.body))

        result = RequestValidator.check_params(
            request, check_empty=True,
            check_params=["id", "tokenName", "tokenContent", "status"]
        )
        if result.has_error:
            logger.error("error: {}".format(result.error_message))
            return JsonResponse({"code": 1004, "message": result.error_message})

        args = result.params

        token_id = args.get("id", None)
        if not token_id or not GeyeTokenModel.instance.is_exist(token_id):
            return JsonResponse({"code": 1004, "message": "token id不存在!"})

        obj = GeyeTokenModel.instance.update_token(args)
        if obj:
            return JsonResponse({"code": 1001, "message": "更新成功!", "data": {
                "id": obj.id,
                "tokenName": obj.token_name,
                "tokenContent": mask_token(obj.token),
                "status": obj.status,
                "remainLimit": obj.remain_limit,
            }})
        else:
            return JsonResponse({"code": 1002, "message": "更新失败!"})
Ejemplo n.º 4
0
    def get(request):
        logger.debug("COOKIES: {}".format(request.COOKIES))
        # csrf_token = "22222"
        csrf_token = django.middleware.csrf.get_token(request)
        response = HttpResponse(csrf_token)
        # response.set_cookie("csrftoken", csrf_token, domain="192.168.62.129", samesite=None)

        return response
Ejemplo n.º 5
0
    def _request_page(self, request_header,
                      request_data) -> Optional[requests.Response]:
        """
        请求每一页搜索结果
        :param request_header:
        :param request_data:
        :return:
        """
        logger.debug("request_data: {} || request_header: {}".format(
            request_data, request_header))
        api_limit_cnt = 0

        token_id = request_header["token_id"]
        header = request_header["header"]

        while self.is_running():
            # make_request会循环请求5次,如果超过该次数还请求失败,则会返回None
            response: Optional[requests.Response] = self.make_request(
                header, request_data)

            # 请求超过最大次数、收到结束signal等情况,直接返回None
            if response is None:
                return None

            # 收到了正常的response,解析status_code
            status_code = response.status_code
            logger.debug("status_code: {} || response header: {}".format(
                response.status_code, response.headers))
            if status_code == 401:
                # token有问题,这个情况下不需要再次请求了,直接返回None
                logger.error(
                    "401 - Bad credentials, see: https://developer.github.com/v3"
                )
                GeyeTokenModel.instance.filter(
                    is_deleted=0, pk=token_id).update(remain_limit=-1)
                return None
            elif status_code == 403:
                # 触发了频率限制,这个时候需要wait 60s后再次请求
                # 限制重试5次,如果都请求失败了,直接返回None
                GeyeTokenModel.instance.filter(
                    is_deleted=0, pk=token_id).update(remain_limit=0)
                api_limit_cnt += 1
                if api_limit_cnt >= 5:
                    return None
                logger.error(
                    "403 - API rate limit exceeded. Wait 60s and will retry..."
                )
                self.ev.wait(60)
                continue
            else:
                # 正常情况,返回response
                token_remain_cnt = int(
                    response.headers.get("X-RateLimit-Remaining", 0))
                GeyeTokenModel.instance.filter(
                    is_deleted=0,
                    pk=token_id).update(remain_limit=token_remain_cnt)
                return response
Ejemplo n.º 6
0
    def post(request):
        logger.debug("POST: {}".format(request.body))
        result = RequestValidator.check_params(request,
                                               check_empty=True,
                                               check_params=[
                                                   "id", "ruleName",
                                                   "ruleContent", "status",
                                                   "needNotification", "clone",
                                                   "delay", "priority"
                                               ])
        if result.has_error:
            em = result.error_message
            logger.error("error_message: {}".format(em))
            return JsonResponse({"code": 1004, "message": em})

        request_data = result.params

        # 检查ID是否存在
        srid = request_data.get("id", None)
        if not srid:
            return JsonResponse({"code": 1004, "message": "规则ID有误!"})
        if not GeyeSearchRuleModel.instance.is_exist_by_pk(srid):
            return JsonResponse({"code": 1003, "message": "规则ID不存在!"})

        rule_name = request_data.get("ruleName")
        rule_content = request_data.get("ruleContent")
        status = request_data.get("status")
        delay: str = request_data.get("delay")
        priority: str = request_data.get("priority")

        if isinstance(priority, str) and not priority.isdigit():
            return JsonResponse({"code": 1003, "message": "优先级有误!"})
        if isinstance(delay, str) and not delay.isdigit():
            return JsonResponse({"code": 1003, "message": "刷新间隔有误!"})

        # str -> int
        delay = int(delay) if isinstance(delay, str) else delay
        priority = int(priority) if isinstance(priority, str) else priority

        need_notification = 0
        clone = 0

        # update db
        obj = GeyeSearchRuleModel.instance.filter(
            is_deleted=0, id=srid).update(name=rule_name,
                                          rule=rule_content,
                                          status=status,
                                          priority=priority,
                                          delay=delay,
                                          need_notification=need_notification,
                                          clone=clone)

        if obj:
            return JsonResponse({"code": 1001, "message": "更新规则成功!"})
        else:
            return JsonResponse({"code": 1002, "message": "更新规则失败!"})
Ejemplo n.º 7
0
    def post(request):
        # frid = request.POST.get("id", None)
        frid = json.loads(request.body).get("id", None)
        logger.debug("frid: {}".format(frid))
        if not frid or not GeyeFilterRuleModel.instance.is_exist_global(frid):
            return JsonResponse({"code": 1004, "message": "规则ID不存在!"})

        if GeyeFilterRuleModel.instance.fake_delete_global(frid):
            return JsonResponse({"code": 1001, "message": "删除成功!"})
        else:
            return JsonResponse({"code": 1002, "message": "删除失败!"})
Ejemplo n.º 8
0
    def post(request):
        srid = json.loads(request.body).get("id", None)
        logger.debug("srid: {}".format(srid))
        # logger.debug("request body: {}".format(json.loads(request.body)))
        if not srid:
            return JsonResponse({"code": 1004, "message": "规则id有误!"})

        if not GeyeSearchRuleModel.instance.is_exist_by_pk(srid):
            return JsonResponse({"code": 1003, "message": "规则id不存在!"})

        if not GeyeSearchRuleModel.instance.fake_delete(pk=srid):
            return JsonResponse({"code": 1002, "message": "删除失败!"})
        else:
            return JsonResponse({"code": 1001, "message": "删除成功!"})
Ejemplo n.º 9
0
    def _worker(self):

        logger.info("RefreshEngine start!")

        refresh_task_queue = self.app_ctx.MessageQueues.SEARCH_TASK_QUEUE

        while self.status == self.EngineStatus.RUNNING:
            logger.debug("start build search task.")
            rows = GeyeSearchRuleModel.objects.filter(is_deleted=0,
                                                      status=1).all()
            current_time = datetime.datetime.now()

            for row in rows:
                delay = int(row.delay)
                if row.last_refresh_time + datetime.timedelta(
                        minutes=delay) < current_time:
                    # 该刷新了,添加到任务队列中去
                    # 添加一个字典,如果后续改成分布式,需要改成JSON字符串
                    # Task格式:
                    #   tuple(priority, _task)

                    # build task
                    _data = {
                        "search_rule_id": row.id,
                        "search_rule_name": row.name,
                        "search_rule_content": row.rule,
                    }
                    # task = (row.priority, _data)
                    task = PriorityTask(row.priority, _data)
                    logger.debug("task: {}".format(task))
                    while True:
                        try:
                            refresh_task_queue.put_nowait(task)
                            break
                        except queue.Full:
                            logger.warning("SearchTask队列已满,等待3秒后重试")
                            self.ev.wait(3)
                            continue

                    # 更新任务的最后刷新时间
                    row.last_refresh_time = current_time
                    row.save()

            self.ev.wait(settings.REFRESH_INTERVAL)

        logger.info("RefreshEngine end!")
Ejemplo n.º 10
0
    def post(request):

        logger.debug(f"POST data: {request.body}")

        # 校验参数
        validator = RequestValidator()
        result = validator.check_params(request,
                                        check_params=[
                                            "taskType", "eventType",
                                            "interval", "priority",
                                            "ruleContent", "status"
                                        ],
                                        check_empty=True)
        if result.has_error:
            return JsonResponse({
                "code": 1004,
                "message": result.error_message
            })

        # 校验参数
        params = result.params
        task_type = params.get("taskType")
        event_type = params.get("eventType")
        if task_type not in MonitorTaskTypeConstant.lst():
            return JsonResponse({"code": 1003, "message": "taskType有误!"})
        for _post_event_type in event_type:
            if _post_event_type not in MonitorEventTypeConstant.lst():
                return JsonResponse({"code": 1003, "message": "eventType有误!"})

        # 更新数据
        with transaction.atomic():
            obj: GeyeMonitorRules = GeyeMonitorRules.instance.select_for_update(). \
                filter(is_deleted=False, pk=params.get("id")).first()
            if not obj:
                return JsonResponse({"code": 1003, "message": "规则不存在!"})

            obj.task_type = task_type
            obj.event_type = ",".join(event_type)
            obj.rule_content = params.get("ruleContent")
            obj.status = params.get("status")
            obj.interval = params.get("interval")
            obj.priority = params.get("priority")
            obj.save()

            return JsonResponse({"code": 1001, "message": "更新成功!"})
Ejemplo n.º 11
0
 def get(request):
     rows = GeyeFilterRuleModel.instance.all_global_filter_rule()
     logger.debug("rows: {}".format(rows))
     data = []
     for row in rows:
         data.append({
             "id": row.id,
             "name": row.name,
             "ruleType": row.rule_type,
             "ruleEngine": row.rule_engine,
             "ruleContent": row.rule,
             "status": row.status,
             "parentId": row.parent_id,
             "action": row.action,
             "position": row.position,
             "priority": row.priority,
         })
     return JsonResponse({"code": 1001, "message": "获取成功!", "data": data})
Ejemplo n.º 12
0
    def get(request):
        srid = request.GET.get("id", None)
        rule_name = request.GET.get("rule_name", None)
        logger.debug("srid: {}, rule_name: {}".format(srid, rule_name))
        if not srid and not rule_name:
            return JsonResponse({"code": 1004, "message": "id和rule_name均有误"})

        search_rule_obj = GeyeSearchRuleModel.instance.get_detail(
            pk=srid, rule_name=rule_name)
        if not search_rule_obj:
            return JsonResponse({"code": 1003, "message": "规则不存在!"})

        # filter_rule_obj = GeyeFilterRuleModel.instance.get_filter_rules_by_srid(srid, contains_global_rule=False)
        filter_rule_obj = GeyeFilterRuleModel.instance.filter(
            is_deleted=0, parent_id=srid).order_by("-priority").all()

        rv = {
            "search_rule": {
                "ruleName": search_rule_obj.name,
                "ruleContent": search_rule_obj.rule,
                "status": search_rule_obj.status,
                "priority": search_rule_obj.priority,
                "delay": search_rule_obj.delay,
                "needNotification": int(search_rule_obj.need_notification),
                "clone": int(search_rule_obj.clone),
            },
            "filter_rule": [{
                "id": fr.id,
                "name": fr.name,
                "ruleType": fr.rule_type,
                "ruleEngine": fr.rule_engine,
                "ruleContent": fr.rule,
                "status": fr.status,
                "parentId": fr.parent_id,
                "action": fr.action,
                "position": fr.position,
                "priority": fr.priority
            } for fr in filter_rule_obj],
        }

        return JsonResponse({"code": 1001, "message": "success", "data": rv})
Ejemplo n.º 13
0
    def _real_worker(self):
        while self.is_running():
            priority, task = self._get_task()
            if not priority or not task:
                continue

            # 解析任务内容
            # "rule_id": row.id,
            # "rule_content": row.rule,
            rule_content = task.get("rule_content")
            rule_id = task.get("rule_id")
            if not rule_content:
                continue

            # 构建请求的参数
            # 有几个参数是必须的
            # p 是页码,q是搜索词
            params = {
                "tab": "public",
                "scope": "/",
                "type": "content",
                "q": self.__encode_keyword(rule_content)
            }

            # 默认搜索 3 页
            results = []
            for p in range(1, 4):
                params["p"] = p
                # 从 db 里取出 yuque.com 域名的 cookie 信息
                # 先写死 "yuque.com"

                header = {
                    "Cookie": self._get_cookie_header()
                }
                logger.debug("yuque header: {}".format(header))
                response = self.make_request(self.SEARCH_API, header, params)
                parsed_result = self.parse_response(response)
                results.extend(parsed_result)

            # 放进队列中
            self._put_task(PriorityTask(priority, {"rule_id": rule_id, "results": results}))
Ejemplo n.º 14
0
    def _worker(self):
        logger.debug("{} start.".format(self.name))
        while self.is_running():
            _, task = self._get_task()
            if not task:
                continue

            rule_id = task.get("rule_id")
            result_list = task.get("results")

            for result in result_list:
                leak = GeyeYuqueLeaksModel()
                leak.title = result.get("title")
                leak.go_url = result.get("url")
                leak.url = result.get("raw_url")
                leak.book_name = result.get("book_name")
                leak.group_name = result.get("group_name")
                leak.abstract = result.get("abstract")
                leak.search_rule_obj = result.get("")
                leak.search_rule_id = rule_id
                leak.status = 1
                leak.content_updated_at = task.get("content_updated_at")
                leak.first_published_at = task.get("first_published_at")
                leak.paper_created_at = task.get("created_at")
                leak.paper_updated_at = task.get("updated_at")
                leak.save()
                logger.debug("Save yuque leak <<{}>>".format(
                    result.get("title")))

        logger.debug("{} end.".format(self.name))
Ejemplo n.º 15
0
    def _worker(self):
        logger.info("{name} start!".format(name=self.name))

        while self.__running():
            logger.debug("start build monitor task.")

            rows: List[GeyeMonitorRules] = GeyeMonitorRules.instance.get_all()
            current_time = datetime.datetime.now()

            for _row in rows:
                interval = _row.interval
                if _row.last_fetch_time + datetime.timedelta(
                        minutes=interval) < current_time:
                    task = PriorityTask(
                        _row.priority, {
                            "task_type": _row.task_type,
                            "event_type": _row.event_type,
                            "rule_content": _row.rule_content,
                            "rule_id": _row.id,
                        })
                    logger.debug(
                        "Create monitor task: {task}".format(task=task))
                    while self.__running():
                        try:
                            self._monitor_task_queue.put_nowait(task)
                            break
                        except queue.Full:
                            self.ev.wait(3)
                            continue

                    # 更新rule的最后刷新时间
                    _row.last_fetch_time = current_time
                    _row.save()

            self.ev.wait(30)

        logger.info("{name} stop!".format(name=self.name))
Ejemplo n.º 16
0
    def make_request(self, header, data) -> Optional[requests.Response]:
        """
        发出搜索请求
        :param header: 请求的header,包括token等信息
        :param data: 搜索的内容
        """

        # 获取代理设置信息
        proxies = random.choice(self.all_proxies) if self.use_proxies else None

        # 请求计数
        # todo:先写死到代码里,计划移植到配置中
        request_cnt = 0

        while self.is_running():
            try:
                request_cnt += 1
                if request_cnt == 5:
                    logger.warning("请求超出最大次数!")
                    break
                logger.debug("before requests.get()")
                response = requests.get(self.search_api_url,
                                        params=data,
                                        headers=header,
                                        timeout=12,
                                        proxies=proxies)
                logger.debug("after requests.get()")
                return response
            except requests.RequestException as e:
                logger.error(
                    "Error while make request. requests.RequestException: {}".
                    format(e))
                logger.error("Try re-request after 5s.")
                self.ev.wait(5)
                continue

        return None
Ejemplo n.º 17
0
    def _worker(self):
        current_name = threading.current_thread().name
        logger.info("{} start!".format(current_name))

        while self.is_running():
            # 获取任务信息,没有取到就继续循环
            task_priority, search_task = self.get_task_from_queue()
            if not task_priority or not search_task:
                continue

            # 解析数据内容
            srid = search_task.get("search_rule_id")
            rule_name = search_task.get("search_rule_name")
            rule_content = search_task.get("search_rule_content")
            logger.debug("parse task data done.")

            # 循环请求每一页
            for page_num in range(1, self.search_page_max_size + 1):
                # 构建请求数据
                request_data = self.build_request_data(rule_content, page_num)
                request_header = self.build_request_header()
                if request_header is None:
                    logger.error(
                        "No available token found. Jumping search operator.")
                    break

                # 发起请求,如果response为None,说明收到了结束信号,直接break
                response = self._request_page(request_header, request_data)
                if response is None:
                    break
                logger.debug("response.text: {}".format(response.text))

                # logger.debug("response header: {}".format(response.headers))
                # 正常内容 开始解析内容
                # return_val = {
                #     "filter_tasks": [],
                #     "has_next_page": True,
                #     "error": None
                # }
                results = self.parse_response(response, srid, rule_name)
                if results["error"]:
                    # 解析有问题,这里是否需要重新请求当前页?
                    continue

                # 将生成的filter_task放入filter队列
                for task in results["filter_tasks"]:
                    self.push_to_queue(task_priority, task)

                # 根据has_next_page字段决定是否请求下一页
                if not results["has_next_page"]:
                    logger.debug(
                        "Jump remains page because of 'has_next_page' is False."
                    )
                    break

        logger.info("{} end!".format(current_name))
Ejemplo n.º 18
0
 def put_task_to_queue(self,
                       task,
                       target_queue: queue.PriorityQueue = None):
     """
     把任务放到队列中去
     :param task: 待处理的任务
     :param target_queue: 待放入的队列
     :return:
     """
     if not target_queue:
         target_queue = self.filter_task_queue
     while self.status == self.EngineStatus.RUNNING:
         try:
             target_queue.put_nowait(PriorityTask(task[0], task[1]))
             break
         except queue.Full:
             # get queue name
             q_name = "unknown"
             for k, v in self.__dict__.items():
                 if v is target_queue:
                     q_name = k
             logger.debug("{q_name}已满,1秒后重试.".format(q_name=q_name))
             self.ev.wait(1)
             continue
Ejemplo n.º 19
0
    def post(request):

        logger.debug(f"POST data: {request.body}")

        # 校验参数
        validator = RequestValidator()
        result = validator.check_params(request,
                                        check_params=[
                                            "taskType", "eventType",
                                            "interval", "priority",
                                            "ruleContent", "status"
                                        ],
                                        check_empty=True)
        if result.has_error:
            return JsonResponse({
                "code": 1004,
                "message": result.error_message
            })

        # 校验参数
        params = result.params
        # logger.debug(f"params: {params}")
        task_type = params.get("taskType")
        event_type = params.get("eventType")
        logger.debug(f"TaskTypeConstantList: {MonitorTaskTypeConstant.lst()}")
        logger.debug(
            f"EventTypeConstantList: {MonitorEventTypeConstant.lst()}")
        if task_type not in MonitorTaskTypeConstant.lst():
            return JsonResponse({"code": 1003, "message": "taskType有误!"})
        for _post_event_type in event_type:
            if _post_event_type not in MonitorEventTypeConstant.lst():
                return JsonResponse({"code": 1003, "message": "eventType有误!"})

        # 插入数据
        obj = GeyeMonitorRules.instance.create(
            task_type=task_type,
            event_type=",".join(event_type),
            rule_content=params.get("ruleContent"),
            status=params.get("status"),
            interval=params.get("interval"),
            priority=params.get("priority"))
        if obj:
            return JsonResponse({
                "code": 1001,
                "message": "添加成功",
                "data": obj.convert_to_dict()
            })
        else:
            return JsonResponse({"code": 1002, "message": "添加失败"})
Ejemplo n.º 20
0
    def _worker(self):
        current_name = threading.current_thread().name

        logger.info("{} start!".format(current_name))

        while self.status == self.EngineStatus.RUNNING:
            # task_priority其实就是search rule中指定的优先级
            task_priority, task = self.get_task_from_queue()
            if not task or not task_priority:
                continue

            # 预先过滤一次hash值,如果已经泄露的表中存在这样的hash,跳过后续的检查
            # 可能会有漏报
            # 某文件已经命中规则A,存入表中
            # 当匹配规则B时,会导致跳过匹配该文件
            # result = self.check_hash(task)

            # 获取所有需要filter的规则,先全局filter,再子filter
            all_filter_rules: List[
                GeyeFilterRuleModel] = self.get_filter_rules(task["srid"])
            logger.debug("Get all filter rules: {}".format(all_filter_rules))

            # 获取完整的代码
            response_result = self.get_raw_code(task["full_code_url"])
            if not response_result["success"]:
                # 失败了,把任务重新放回队列
                # 这里可能导致worker卡死
                # self.put_task_to_queue(target_queue=self.filter_task_queue, task=(task_priority, task))
                # logger.debug("Re-put done. continue.")
                logger.error(
                    "获取raw code失败,URL:{url}".format(url=task["full_code_url"]))
                continue
            raw_code = response_result["code"]

            # 按照规则开始匹配
            logger.debug("#### [start] SEARCH RULE: {}".format(
                task["search_rule_name"]))
            logger.debug("#### Content URL: {}".format(task["full_code_url"]))
            for _rule in all_filter_rules:
                logger.debug("==== filter rule: {}, content: {}".format(
                    _rule, _rule.rule))
                result = self.do_filter(_rule, task, raw_code)

                # 匹配过程中有错误,直接终止匹配
                if not result or result["error"]:
                    break

                # 根据规则的正向/反向,获取是否命中
                # hit变量表示是否命中规则
                if _rule.rule_type == 1:
                    # 正向匹配,匹配到算命中
                    hit = True if result["found"] else False
                elif _rule.rule_type == 2:
                    # 反向匹配,没有匹配到算命中
                    hit = True if not result["found"] else False
                else:
                    logger.error("Error rule_type: {}".format(_rule.rule_type))
                    break
                logger.debug("filter end. hit result: %s", hit)

                # 根据匹配结果,决定是向下匹配还是存起来
                if hit:
                    _action = _rule.action
                    # 1-啥也不做,继续下一条匹配,不保存,可以用于其他规则的前置
                    # 2-设为误报,结束匹配,不保存,可以排除掉一定不是敏感信息泄露的内容
                    # 3-设为误报,结束匹配,保存,可以排除掉一定不是敏感信息泄露的内容
                    # 4-设为确认,结束匹配,保存,确定规则
                    # 5-设为待确认,结束匹配,保存
                    if _action == 1:
                        logger.debug("Action: None -> continue next.")
                        continue
                    elif _action == 2:
                        logger.debug(
                            "Action: Ignore -> no save -> end filter.")
                        break
                    elif _action == 3:
                        logger.debug("Action: Ignore -> save -> end filter.")
                        save_task = (task_priority, {
                            "code": result["code"],
                            "status": LeaksStatusConstant.IGNORE,
                            "pushed": 0,
                            "frid": _rule.id,
                            "filter_task": task,
                            "filter_rule_name": _rule.name
                        })
                        self.put_task_to_queue(
                            save_task, target_queue=self.save_task_queue)
                        break
                    elif _action == 4:
                        logger.debug("Action: Confirm -> save -> end filter.")
                        save_task = (task_priority, {
                            "code": result["code"],
                            "status": LeaksStatusConstant.CONFIRM,
                            "pushed": 0,
                            "frid": _rule.id,
                            "filter_task": task,
                            "filter_rule_name": _rule.name
                        })
                        self.put_task_to_queue(
                            save_task, target_queue=self.save_task_queue)
                        break
                    elif _action == 5:
                        logger.debug(
                            "Action: To-be-confirmed -> save -> end filter.")
                        save_task = (task_priority, {
                            "code": result["code"],
                            "status": LeaksStatusConstant.TO_BE_CONFIRMED,
                            "pushed": 0,
                            "frid": _rule.id,
                            "filter_task": task,
                            "filter_rule_name": _rule.name
                        })
                        self.put_task_to_queue(
                            save_task, target_queue=self.save_task_queue)
                        break
                    else:
                        logger.error(
                            "Unknown action value: {}".format(_action))
                else:
                    logger.debug("no hit, continue filter next rule.")
                    continue

            logger.debug("#### [end] SEARCH RULE: {}".format(
                task["search_rule_name"]))

        logger.info("{} end!".format(current_name))
Ejemplo n.º 21
0
    def parse_response(response):
        """
        解析 yuque 接口返回的 response
        :param response:
        :return:
        """
        result = response.json()
        data = result.get("data")
        total_hits = data.get("totalHits")
        num_hits = data.get("numHits")

        # 打个log看看
        logger.debug("total_hits: {}, num_hits".format(total_hits, num_hits))

        # 这里面是命中的信息
        hits_list = data.get("hits")

        ret_list = []
        for hits in hits_list:
            abstract = hits.get("abstract", "NO_ABSTRACT_FIELD")
            book_name = hits.get("book_name", "NO_BOOK_NAME_FIELD")
            group_name = hits.get("group_name", "NO_GROUP_NAME_FIELD")
            paper_id = hits.get("id", "NO_ID_FIELD")
            url = hits.get("url", "NO_URL_FIELD")
            title = hits.get("title", "NO_TITLE_FIELD")

            record = hits.get("record", None)
            if record:
                content_updated_at = record.get("content_updated_at", "")
                first_published_at = record.get("first_published_at", "")
                published_at = record.get("published_at", "")
                created_at = record.get("created_at", "")
                updated_at = record.get("updated_at", "")
            else:
                content_updated_at = ""
                first_published_at = ""
                published_at = ""
                created_at = ""
                updated_at = ""

            # 如果开启了获取真实链接的配置,那么再请求一次获取真实的文章URL
            # TODO 现在默认获取,以后把这个配置项移动到配置文件里
            paper_full_url = "https://yuque.com{}".format(url)
            redirect_path = requests.get(paper_full_url, timeout=9).history[-1].headers.get("location")
            paper_raw_url = "https://yuque.com{}".format(redirect_path)

            ret_list.append({
                "abstract": abstract,
                "book_name": book_name,
                "group_name": group_name,
                "id": paper_id,
                "title": title,
                "url": url,
                "raw_url": paper_raw_url,
                "content_updated_at": content_updated_at,
                "first_published_at": first_published_at,
                "published_at": published_at,
                "created_at": created_at,
                "updated_at": updated_at,
            })
        return ret_list
Ejemplo n.º 22
0
    def post(request):
        logger.debug("POST: {}".format(request.body))

        # 检查参数是否为空
        result = RequestValidator.check_params(request,
                                               check_empty=True,
                                               check_params=[
                                                   "name", "ruleType",
                                                   "ruleEngine", "ruleContent",
                                                   "status", "action",
                                                   "position", "priority"
                                               ])
        if result.has_error:
            logger.error("error: {}".format(result.error_message))
            return JsonResponse({
                "code": 1004,
                "message": result.error_message
            })
        params = result.params

        name = params.get("name")
        if not name:
            return JsonResponse({"code": 1003, "message": "规则名称有误!"})

        rule_type = CommonConvert.ensure_int(params.get("ruleType", 1))
        if rule_type not in (1, 2):
            return JsonResponse({"code": 1005, "message": "ruleType有误!"})

        rule_engine = CommonConvert.ensure_int(params.get("ruleEngine", 1))
        if rule_engine not in (1, 2):
            return JsonResponse({"code": 1006, "message": "ruleEngine有误!"})

        rule_content = params.get("ruleContent", "")
        if not rule_content:
            return JsonResponse({"code": 1004, "message": "ruleContent不能为空"})

        status = CommonConvert.ensure_int(params.get("status", 1))
        if status not in (1, 0):
            return JsonResponse({"code": 1007, "message": "status有误!"})

        # action
        action = CommonConvert.ensure_int(params.get("action", 1))
        if action not in range(1, 6):
            return JsonResponse({"code": 1007, "message": "action有误!"})

        # position
        position = CommonConvert.ensure_int(params.get("position", 1))
        if position not in range(1, 6):
            return JsonResponse({"code": 1008, "message": "position有误!"})

        # priority
        priority = CommonConvert.ensure_int(params.get("priority", 5))
        if priority not in range(0, 11):
            return JsonResponse({"code": 1009, "message": "priority有误!"})

        obj = GeyeFilterRuleModel.instance.create(name=name,
                                                  rule_type=rule_type,
                                                  rule_engine=rule_engine,
                                                  rule=rule_content,
                                                  status=status,
                                                  parent_id=0,
                                                  action=action,
                                                  position=position,
                                                  priority=priority)
        if obj:
            return JsonResponse({
                "code": 1001,
                "message": "添加成功!",
                "data": {
                    "id": obj.id,
                    "name": obj.name,
                    "ruleType": obj.rule_type,
                    "ruleEngine": obj.rule_engine,
                    "ruleContent": obj.rule,
                    "status": obj.status,
                    "parentId": obj.parent_id,
                    "action": obj.action,
                    "position": obj.position,
                    "priority": obj.priority
                }
            })
        else:
            return JsonResponse({"code": 1002, "message": "添加失败!"})
Ejemplo n.º 23
0
    def post(request: HttpRequest):
        logger.debug("POST: {}".format(request.body))
        r_json = {"code": 1001, "message": "", "data": ""}

        # 简单的检查参数是否为空
        result = RequestValidator.check_params(request, [
            "ruleName", "ruleContent", "status", "defaultFilter", "delay",
            "priority", "notification", "clone"
        ],
                                               check_empty=True)
        logger.debug("check result: {}".format(result))
        if result.has_error:
            r_json["code"] = 1004
            r_json["message"] = result.error_message
            logger.error("error_message: {}".format(result.error_message))
            return JsonResponse(r_json)
        request_data = result.params

        rule_name = request_data.get("ruleName")
        rule_content = request_data.get("ruleContent")

        # 检查rule name是否存在
        if GeyeSearchRuleModel.instance.is_exist(rule_name):
            r_json["code"] = 1002
            r_json["message"] = "规则名称已存在!"
            return JsonResponse(r_json)

        status = request_data.get("status", 0)
        default_filter = request_data.get("defaultFilter", 1)
        default_filter = int(default_filter)
        delay: str = request_data.get("delay", "30")
        priority: str = request_data.get("priority", "5")

        # 检查优先级和delay
        if isinstance(priority, str) and not priority.isdigit():
            r_json["code"] = 1003
            r_json["message"] = "非法的优先级!"
            return JsonResponse(r_json)
        if isinstance(delay, str) and not delay.isdigit():
            r_json["code"] = 1003
            r_json["message"] = "非法的搜索间隔时间!"

        # 通知 和 auto-clone功能暂不开启
        notification = 0
        clone = 0

        # 插入到数据库中
        obj = GeyeSearchRuleModel.instance.create(
            name=rule_name,
            rule=rule_content,
            status=status,
            priority=priority,
            last_refresh_time=None,
            delay=delay,
            need_notification=notification,
            clone=clone)

        # 如果default filter 为 true,则插入默认规则
        if default_filter:
            # 默认filter为:
            #   如果没有匹配到搜索的关键词,则结束匹配
            GeyeFilterRuleModel.instance.create(name="DefaultFilter",
                                                rule_type=2,
                                                rule_engine=2,
                                                rule=rule_content,
                                                status=1,
                                                parent_id=obj.id,
                                                action=2,
                                                position=4,
                                                priority=10)

        r_json["code"] = 1001
        r_json["message"] = "创建成功!"
        r_json["data"] = obj.id
        return JsonResponse(r_json)
Ejemplo n.º 24
0
    def regex_filter(rule_content, filter_content, frid) -> dict:
        # 每次匹配的时候都获取一下,这样可以做到热切换
        regex_engine = settings.REGEX_ENGINE

        # 返回值
        filter_result = {"error": False, "found": False, "code": ""}

        if settings.REGEX_ENGINE == "inner":
            # inner engine
            logger.debug("Use 'inner' regex engine.")
            result_queue = multiprocessing.Queue()
            p = Process(target=RuleEngine._regex_inner_engine,
                        args=(
                            rule_content,
                            filter_content,
                            result_queue,
                        ))
            p.start()

            # 等待60秒来进行正则匹配
            p.join(60)
            if p.is_alive():
                logger.error(
                    "[INNER REGEX] filter timeout! frid: {}".format(frid))
                p.terminate()
                p.join()

                # 主动释放queue,防止内存泄露
                del result_queue
                filter_result["error"] = True
                return filter_result

            # 获取queue中的数据
            try:
                _result = result_queue.get_nowait()
                filter_result["found"] = _result["found"]
                filter_result["code"] = _result["code"]
                return filter_result
            except queue.Empty:
                # 进程结束了,但是没获取到东西
                logger.error(
                    "Empty result get from queue! frid: {}".format(frid))
                filter_result["error"] = True
                return filter_result

            # try:
            #     p.join(60)
            #     _result = result_queue.get_nowait()
            #     filter_result["found"] = _result["found"]
            #     filter_result["code"] = _result["code"]
            #     return filter_result
            # except multiprocessing.TimeoutError:
            #     # 进程超时
            #     logger.error("[INNER REGEX] filter timeout! frid: {}".format(frid))
            #     p.terminate()
            #     filter_result["error"] = True
            #     return filter_result
            # except queue.Empty:
            #     # 线程结束了,但是没获取到东西
            #     logger.error("Empty result get from queue! frid: {}".format(frid))
            #     filter_result["error"] = True
            #     return filter_result
        elif settings.REGEX_ENGINE == "grep":
            # grep engine
            rule = shlex.quote(rule_content)
            content = shlex.quote(filter_content)
            _result = RuleEngine._regex_grep_engine(rule, content)
            filter_result["error"] = _result["error"]
            filter_result["found"] = _result["found"]
            filter_result["code"] = _result["code"]
            return filter_result
        else:
            logger.error("Un-support regex-engine '{}' !".format(regex_engine))
            return filter_result
Ejemplo n.º 25
0
    def _worker(self):
        logger.info("{name} start!".format(name=self.name))

        while self.__is_running():
            task_priority, task = self.__get_task()
            if task_priority is None or task is None:
                self.__wait(1)
                continue

            # 解析task中的数据
            # {
            #     "task_type": _row.task_type, # 可选值来自 MonitorTaskTypeConstant,监控的维度
            #     "event_type": _row.event_type, # 可选值来自MonitorEventTypeConstant,监控的事件类型,多个值用逗号分隔
            #     "rule_content": _row.rule_content, # 根据task_type有不同含义
            #     "rule_id": _row.id,
            # }
            logger.debug("get task: {}".format(task))
            task_type = task.get("task_type", None)
            event_type: str = task.get("event_type", None)
            rule_content = task.get("rule_content", None)
            monitor_rule_id = task.get("rule_id", None)
            if not task_type or not event_type or not rule_content or not monitor_rule_id:
                self.__wait(1)
                continue

            # 根据task_type 获取不同的API接口
            api_url = MonitorAPIUrl.get(task_type, None)
            if not api_url:
                logger.error("task_type有误,无法获取API!")
                continue
            api_url = api_url.format(**json.loads(rule_content))

            # 请求API获取数据
            results = self.__fetch_api(api_url)
            if not results["success"]:
                logger.error(
                    "Fetch API failed! {err}".format(err=results["reason"]))
                continue

            logger.debug("results: {}".format(results))

            # 从API的返回中parse对应的时间内容,event_type可以为多个事件,返回格式如下
            # ret_val = {
            #     "success": False,
            #     "message": "Unknown Error",
            #     "data": [],  # typing: List[Dict]
            # }
            parse_result = EventParser.parse(event_type.split(","),
                                             results["data"])
            if not parse_result.get("success"):
                logger.error(parse_result.get("message"))
                continue
            else:
                # 把数据扔到队列里去,把event存起来
                self.__put_task(
                    task_priority, {
                        "data": parse_result.get("data"),
                        "monitor_rule_id": monitor_rule_id,
                    })

        logger.info("{name} stop!".format(name=self.name))