Beispiel #1
0
def update_rate_remain():
    for account in github_col.find():
        github_username = account.get('username')
        github_password = account.get('password')
        try:
            g = Github(github_username, github_password)
            github_col.update_one({'username': github_username},
                                  {'$set': {'rate_remaining': int(g.get_rate_limit().search.remaining),
                                            'rate_limit': int(g.get_rate_limit().search.limit)}})
        except Exception as error:
            logger.error(error)
Beispiel #2
0
def search(query, page, g, github_username):
    mail_notice_list = []
    webhook_notice_list = []
    logger.info('开始抓取: tag is {} keyword is {}, page is {}'.format(
        query.get('tag'), query.get('keyword'), page + 1))
    try:
        repos = g.search_code(query=query.get('keyword'),
                              sort="indexed",
                              order="desc")
        github_col.update_one({'username': github_username}, {
            '$set': {
                'rate_remaining': int(g.get_rate_limit().search.remaining)
            }
        })

    except Exception as error:
        logger.critical(error)
        logger.critical("触发限制啦")
        return
    try:
        for repo in repos.get_page(page):
            setting_col.update_one({'key': 'task'}, {
                '$set': {
                    'key': 'task',
                    'pid': os.getpid(),
                    'last': timestamp()
                }
            },
                                   upsert=True)
            if not result_col.count({'_id': repo.sha}):
                try:
                    code = str(repo.content).replace('\n', '')
                except:
                    code = ''
                leakage = {
                    'link': repo.html_url,
                    'project': repo.repository.full_name,
                    'project_url': repo.repository.html_url,
                    '_id': repo.sha,
                    'language': repo.repository.language,
                    'username': repo.repository.owner.login,
                    'avatar_url': repo.repository.owner.avatar_url,
                    'filepath': repo.path,
                    'filename': repo.name,
                    'security': 0,
                    'ignore': 0,
                    'tag': query.get('tag'),
                    'code': code,
                }
                try:
                    leakage['affect'] = get_affect_assets(repo.decoded_content)
                except Exception as error:
                    logger.critical('{} {}'.format(error, leakage.get('link')))
                    leakage['affect'] = []
                if int(repo.raw_headers.get('x-ratelimit-remaining')) == 0:
                    logger.critical('剩余使用次数: {}'.format(
                        repo.raw_headers.get('x-ratelimit-remaining')))
                    return
                last_modified = datetime.datetime.strptime(
                    repo.last_modified, '%a, %d %b %Y %H:%M:%S %Z')
                leakage['datetime'] = last_modified
                leakage['timestamp'] = last_modified.timestamp()
                in_blacklist = False
                for blacklist in blacklist_col.find({}):
                    if blacklist.get('text').lower() in leakage.get(
                            'link').lower():
                        logger.warning('{} 包含白名单中的 {}'.format(
                            leakage.get('link'), blacklist.get('text')))
                        in_blacklist = True
                if in_blacklist:
                    continue
                if result_col.count({
                        "project": leakage.get('project'),
                        "ignore": 1
                }):
                    continue
                if not result_col.count({
                        "project": leakage.get('project'),
                        "filepath": leakage.get("filepath"),
                        "security": 0
                }):
                    mail_notice_list.append(
                        '上传时间:{} 地址: <a href={}>{}/{}</a>'.format(
                            leakage.get('datetime'), leakage.get('link'),
                            leakage.get('project'), leakage.get('filename')))
                    webhook_notice_list.append('[{}/{}]({}) 上传于 {}'.format(
                        leakage.get('project').split('.')[-1],
                        leakage.get('filename'), leakage.get('link'),
                        leakage.get('datetime')))
                try:
                    result_col.insert_one(leakage)
                    logger.info(leakage.get('project'))
                except errors.DuplicateKeyError:
                    logger.info('已存在')

                logger.info('抓取关键字:{} {}'.format(query.get('tag'),
                                                 leakage.get('link')))
    except Exception as error:
        if 'Not Found' not in error.data:
            g, github_username = new_github()
            search.schedule(args=(query, page, g, github_username),
                            delay=huey.pending_count() +
                            huey.scheduled_count())
        logger.critical(error)
        logger.error('抓取: tag is {} keyword is {}, page is {} 失败'.format(
            query.get('tag'), query.get('keyword'), page + 1))

        return
    logger.info('抓取: tag is {} keyword is {}, page is {} 成功'.format(
        query.get('tag'), query.get('keyword'), page + 1))
    query_col.update_one({'tag': query.get('tag')}, {
        '$set': {
            'last': int(time.time()),
            'status': 1,
            'reason': '抓取第{}页成功'.format(page),
            'api_total': repos.totalCount,
            'found_total': result_col.count({'tag': query.get('tag')})
        }
    })
    if setting_col.count({
            'key': 'mail',
            'enabled': True
    }) and len(mail_notice_list):
        main_content = '<h2>规则名称: {}</h2><br>{}'.format(
            query.get('tag'), '<br>'.join(mail_notice_list))
        send_mail(main_content)
    logger.info(len(webhook_notice_list))
    webhook_notice(query.get('tag'), webhook_notice_list)