コード例 #1
0
ファイル: setting.py プロジェクト: samshuai/Hawkeye
    def post(self):
        parser = reqparse.RequestParser()
        parser.add_argument('webhook',
                            type=str,
                            required=True,
                            help='WebHook URL')
        parser.add_argument('domain', type=str, help='System URL Host')
        parser.add_argument('enabled',
                            type=inputs.boolean,
                            default=False,
                            help='Enabled Notice')
        parser.add_argument('test',
                            type=inputs.boolean,
                            default=False,
                            help='Test Notice')
        args = parser.parse_args()
        if urlparse(args.get('webhook')).netloc not in [
                'oapi.dingtalk.com', 'qyapi.weixin.qq.com'
        ] or urlparse(args.get('webhook')).scheme != 'https':
            data = {'status': 400, 'msg': '错误的 webhook 地址', 'result': []}
            return jsonify(data)
        if args.get('test'):
            if urlparse(args.get('webhook')).netloc == 'oapi.dingtalk.com':
                test_content = {
                    "msgtype": "markdown",
                    "markdown": {
                        "title":
                        "GitHub泄露",
                        "text":
                        '### 规则名称: [WebHook告警测试]({})'.format(
                            args.get('domain'))
                    },
                    "at": {
                        "atMobiles": [],
                        "isAtAll": False
                    }
                }
            else:
                test_content = {
                    "msgtype": "markdown",
                    "markdown": {
                        "content":
                        '### 规则名称: [WebHook告警测试]({})'.format(
                            args.get('domain'))
                    }
                }

            response = requests.post(args.get('webhook'), json=test_content)
            if response.ok:
                if response.json().get('errmsg') == 'ok':
                    data = {
                        'status': 201,
                        'msg': '已发送,请前往钉钉/企业微信群查看',
                        'result': []
                    }
                else:
                    data = {
                        'status':
                        400,
                        'msg':
                        '发送失败,WebHook 响应: {}'.format(
                            response.json().get('errmsg')),
                        'result': []
                    }
                return jsonify(data)
            else:
                data = {'status': 400, 'msg': '发送失败,请检查服务器网络', 'result': []}
                return jsonify(data)
        del args['test']
        setting_col.update_one({'webhook': args.get('webhook')},
                               {'$set': args},
                               upsert=True)
        result = setting_col.count({'webhook': args.get('webhook')})
        if result > 0:
            data = {'status': 201, 'msg': '设置成功', 'result': result}
        else:
            data = {'status': 400, 'msg': '设置失败', 'result': result}
        return jsonify(data)
コード例 #2
0
extract = tldextract.TLDExtract(cache_file='{}/.tld_set'.format(base_path))

if setting_col.count({
        'key': 'task',
        'minute': {
            '$exists': True
        },
        'page': {
            '$exists': True
        }
}):
    minute = int(setting_col.find_one({'key': 'task'}).get('minute'))
    setting_col.update_one(
        {'key': 'task'},
        {'$set': {
            'key': 'task',
            'pid': os.getpid(),
            'last': timestamp()
        }},
        upsert=True)

else:
    minute = 10
    setting_col.update_one({'key': 'task'}, {
        '$set': {
            'key': 'task',
            'pid': os.getpid(),
            'minute': 10,
            'page': 3,
            'last': timestamp()
        }
    },
コード例 #3
0
def search(query, page, g, github_username):
    mail_notice_list = []
    webhook_notice_list = []
    logger.info('开始抓取: tag is {} keyword is {}, page is {}'.format(
        query.get('tag'), query.get('keyword'), page + 1))
    try:
        repos = g.search_code(query=query.get('keyword'),
                              sort="indexed",
                              order="desc")
        github_col.update_one({'username': github_username}, {
            '$set': {
                'rate_remaining': int(g.get_rate_limit().search.remaining)
            }
        })

    except Exception as error:
        logger.critical(error)
        logger.critical("触发限制啦")
        return
    try:
        for repo in repos.get_page(page):
            setting_col.update_one({'key': 'task'}, {
                '$set': {
                    'key': 'task',
                    'pid': os.getpid(),
                    'last': timestamp()
                }
            },
                                   upsert=True)
            if not result_col.count({'_id': repo.sha}):
                try:
                    code = str(repo.content).replace('\n', '')
                except:
                    code = ''
                leakage = {
                    'link': repo.html_url,
                    'project': repo.repository.full_name,
                    'project_url': repo.repository.html_url,
                    '_id': repo.sha,
                    'language': repo.repository.language,
                    'username': repo.repository.owner.login,
                    'avatar_url': repo.repository.owner.avatar_url,
                    'filepath': repo.path,
                    'filename': repo.name,
                    'security': 0,
                    'ignore': 0,
                    'tag': query.get('tag'),
                    'code': code,
                }
                try:
                    leakage['affect'] = get_affect_assets(repo.decoded_content)
                except Exception as error:
                    logger.critical('{} {}'.format(error, leakage.get('link')))
                    leakage['affect'] = []
                if int(repo.raw_headers.get('x-ratelimit-remaining')) == 0:
                    logger.critical('剩余使用次数: {}'.format(
                        repo.raw_headers.get('x-ratelimit-remaining')))
                    return
                last_modified = datetime.datetime.strptime(
                    repo.last_modified, '%a, %d %b %Y %H:%M:%S %Z')
                leakage['datetime'] = last_modified
                leakage['timestamp'] = last_modified.timestamp()
                in_blacklist = False
                for blacklist in blacklist_col.find({}):
                    if blacklist.get('text').lower() in leakage.get(
                            'link').lower():
                        logger.warning('{} 包含白名单中的 {}'.format(
                            leakage.get('link'), blacklist.get('text')))
                        in_blacklist = True
                if in_blacklist:
                    continue
                if result_col.count({
                        "project": leakage.get('project'),
                        "ignore": 1
                }):
                    continue
                if not result_col.count({
                        "project": leakage.get('project'),
                        "filepath": leakage.get("filepath"),
                        "security": 0
                }):
                    mail_notice_list.append(
                        '上传时间:{} 地址: <a href={}>{}/{}</a>'.format(
                            leakage.get('datetime'), leakage.get('link'),
                            leakage.get('project'), leakage.get('filename')))
                    webhook_notice_list.append('[{}/{}]({}) 上传于 {}'.format(
                        leakage.get('project').split('.')[-1],
                        leakage.get('filename'), leakage.get('link'),
                        leakage.get('datetime')))
                try:
                    result_col.insert_one(leakage)
                    # logger.info(leakage.get('project'))
                except errors.DuplicateKeyError:
                    logger.info('已存在')

                logger.info('抓取关键字:{} {}'.format(query.get('tag'),
                                                 leakage.get('link')))
    except Exception as error:
        if 'Not Found' not in error.data:
            g, github_username = new_github()
            search.schedule(args=(query, page, g, github_username),
                            delay=huey.pending_count() +
                            huey.scheduled_count())
        logger.critical(error)
        logger.error('抓取: tag is {} keyword is {}, page is {} 失败'.format(
            query.get('tag'), query.get('keyword'), page + 1))

        return
    logger.info('抓取: tag is {} keyword is {}, page is {} 成功'.format(
        query.get('tag'), query.get('keyword'), page + 1))
    query_col.update_one({'tag': query.get('tag')}, {
        '$set': {
            'last': int(time.time()),
            'status': 1,
            'reason': '抓取第{}页成功'.format(page),
            'api_total': repos.totalCount,
            'found_total': result_col.count({'tag': query.get('tag')})
        }
    })
    if setting_col.count({
            'key': 'mail',
            'enabled': True
    }) and len(mail_notice_list):
        main_content = '<h2>规则名称: {}</h2><br>{}'.format(
            query.get('tag'), '<br>'.join(mail_notice_list))
        send_mail(main_content)
    logger.info(len(webhook_notice_list))
    webhook_notice(query.get('tag'), webhook_notice_list)
コード例 #4
0
ファイル: test.py プロジェクト: bypdhuorg/Hawkeye
def run():
    # setting_col.update_one({'key': 'task'}, {'$set': {'key': 'task', 'pid': os.getpid()}}, upsert=True)
    query_count = query_col.count({'enabled': True})
    logger.info('需要处理的关键词总数: {}'.format(query_count))
    if query_count:
        logger.info('需要处理的关键词总数: {}'.format(query_count))
    else:
        logger.warning('请添加关键词')
        return
    if github_col.count({'rate_remaining': {'$gt': 5}}):
        pass
    else:
        logger.error('请配置github账号')
        return

    if setting_col.count({'key': 'task', 'page': {'$exists': True}}):
        setting_col.update_one({'key': 'task'}, {'$set': {'pid': os.getpid()}})
        page = int(setting_col.find_one({'key': 'task'}).get('page'))

        for p in range(0, page):
            for query in query_col.find({'enabled': True}).sort('last', 1):
                github_account = random.choice(
                    list(
                        github_col.find({
                            "rate_limit": {
                                "$gt": 5
                            }
                        }).sort('rate_remaining', -1)))
                github_username = github_account.get('username')
                github_password = github_account.get('password')
                rate_remaining = github_account.get('rate_remaining')
                logger.info(github_username)
                logger.info(rate_remaining)
                g = Github(
                    github_username,
                    github_password,
                    per_page=PER_PAGE,
                    user_agent=
                    'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.81 Safari/537.36'
                )

                # total = query.get('total')
                # if total is None:
                #     repos = g.search_code(query=query.get('keyword'),
                #                           sort="indexed", order="desc")
                #     total = repos.totalCount
                api_total = query.get('api_total')
                if api_total:
                    total = api_total
                else:
                    repos = g.search_code(query=query.get('keyword'),
                                          sort="indexed",
                                          order="desc")
                    total = repos.totalCount
                if total > 1000:
                    total = 1000
                page_pre = int(query.get('page_pre')) if query.get(
                    'page_pre') is not None else -1
                page_all = math.ceil(total / 30)
                if page_all == 0:
                    continue
                if page_pre + 1 >= page_all:
                    page_pre = -1
                page_now = page_pre + 1

                search(query, page_now, g, github_username)

    else:
        logger.error('请在页面上配置任务参数')