Esempio n. 1
0
def exp_query(delta=0):
    """取第三方当天exp增量和当月exp增量
    """
    exist_eid = []
    if isinstance(delta, list):
        for d in delta:
            day = time_delta(delta=d, format='%Y-%m-%d')
            so, tmp = cve_query_where(
                db='exp',
                table='exps',
                key=['*'],
                where='exp_publishedtime like "%{}%"'.format(day))
            exist_eid = exist_eid + tmp
    else:
        day = time_delta(delta=delta, format='%Y-%m-%d')
        so, exist_eid = cve_query_where(
            db='exp',
            table='exps',
            key=['*'],
            where='exp_publishedtime like "%{}%"'.format(day))

    month = time_delta(delta=0, format='%Y-%m')
    so2, exist_eid2 = cve_query_where(
        db='exp',
        table='exps',
        key=['*'],
        where='exp_publishedtime like "%{}%"'.format(month))

    return exist_eid, exist_eid2
Esempio n. 2
0
def exp_add():
    """
    第三方数据源标记的exp每日和每月增量
    :return day_exp_add:tuple_list
    """
    # 日更新->exps&nvd:exploit-db.com、seebug、github
    a0 = a1 = a3 = a4 = a5 = a6 = list()
    a4 = expdb_exp_add()
    a4 = exp_5tuple(key='exploit-db', value=a4)
    #a5=seebug_exp_add()
    a5 = exp_5tuple(key='seebug', value=a5)
    a6 = github_exp_add()
    a6 = exp_5tuple(key='github', value=a6)
    day_exp_add = a0 + a4 + a5 + a6
    exp_table(day_exp_add)
    cves = [i[0] for i in day_exp_add]
    exp_label(all_exp=cves)
    # 月更新->nvd:cvedetails.com、mitre
    month_day = time_delta(delta=0, format='%Y-%m-%d')
    if month_day.split('-')[2] == 30:
        a1 = list(cvedetails_crawler().keys())
        a3 = list(mitre_expdb_all(reparse=True).keys())
        month_exp_add = a0 + a1 + a3
        exp_label(all_exp=month_exp_add)

    return day_exp_add
Esempio n. 3
0
def auto_email(content,msg_to='*****@*****.**'):
    conf=configparser.ConfigParser()
    conf.read('conf/info.conf')
    msg_from=conf.get('Mail','sendmail')
    password=conf.get('Mail','password')

    subject = "CVE-Flow:CVE Threat Intelligence on {}".format(time_delta(format='%Y-%m-%d'))
    content = mistune.markdown(content, escape=True, hard_wrap=True)
    link = "More informations:https://github.com/404notf0und/CVE-Flow/blob/master/README.md" 
    content=content+link
    msg = MIMEText(content, 'html', 'utf-8')
     
    msg['Subject'] = subject
    msg['From'] = msg_from
    msg['To'] = msg_to
     
    try:
        client = smtplib.SMTP_SSL('smtp.163.com', smtplib.SMTP_SSL_PORT)
        print("[+] Connect to mail server")
     
        client.login(msg_from, password)
        print("[+] Successfully log in")
     
        client.sendmail(msg_from, msg_to, msg.as_string())
        print("[+] Successfully send mail")
    except smtplib.SMTPException as e:
        print("[!] Send mail error")
    finally:
        client.quit()
Esempio n. 4
0
def auto_push():
    ts = "cve-flow_bot_pushed_at_" + time_delta(format="%Y-%m-%d-%H:%m:%S")
    cmd = "git add . && git commit -m '%s' && git push origin master" % (ts)

    ret = os.system(cmd)
    if ret != 0:
        print("%s failed" % cmd)
Esempio n. 5
0
def exp_query():
    """取第三方当天exp增量和当月exp增量
    """
    day = time_delta(delta=-1, format='%Y-%m-%d')
    so, exist_eid = cve_query_where(
        db='exp',
        table='exps',
        key=['*'],
        where='exp_publishedtime like "%{}%"'.format(day))

    month = time_delta(delta=0, format='%Y-%m')
    so2, exist_eid2 = cve_query_where(
        db='exp',
        table='exps',
        key=['*'],
        where='exp_publishedtime like "%{}%"'.format(month))

    return exist_eid, exist_eid2
Esempio n. 6
0
def auto_push():
    print("[+] Auto pushed to github.com/404notf0und/CVE-Flow")
    ts = "cve-flow_bot_pushed_at_"+time_delta(format="%Y-%m-%d-%H:%m:%S")
    cmd = "git add . && git commit -m '%s' && git push origin master" % (ts)

    ret = os.system(cmd)
    if ret != 0:
        print("%s failed" % cmd)
    return ret
Esempio n. 7
0
def draw_report(day_exp_add=None,
                day_exp_proba=None,
                month_exp_add=None,
                month_exp_proba=None):
    """
    生成report:总-年-月三份报告
    """
    print('[+] Start generate report.md')
    year_month = time_delta(delta=0, format="%Y-%m")
    year_year = time_delta(delta=0, format="%Y")
    year_all = str(year_year[0:2])
    for y in [year_month, year_year, year_all]:
        fpath = 'report_{}.md'.format(y)
        draw_md(time=y,
                fpath=fpath,
                day_exp_add=day_exp_add,
                day_exp_proba=day_exp_proba,
                month_exp_add=month_exp_add,
                month_exp_proba=month_exp_proba)
        if len(str(y)) == 7:
            fpath_month = fpath
            fpath_default = "README.md"
            shutil.copyfile(fpath_month, fpath_default)
    print('[+] Done!')
Esempio n. 8
0
def cve_monitor(monitor_init=False):
    """
    存量离线解析,增量在线更新
    :return ret:boolean, if parsed json to sqlite3 or not
    :return cve_day_add:tuple_dict, cve added in given time
    :return exp_day_add:tuple_dict, exp added in given time
    """
    json_list = []
    # 取配置文件中控制存量解析的参数,得到待解析入库文件列表
    if monitor_init == 'True':
        # 清空已有的CVE数据
        so = SQLite('data/cve.db')
        so.execute('delete from nvd')

        zips = glob.glob('data/json/nvdcve-1.1-*.json.zip')
        for z in zips:
            with zipfile.ZipFile(z) as zf:
                print("[+] UNZIP %s" % z)
                zf.extractall(path=path('data/json'))

        jsons_stock = glob.glob('data/json/nvdcve-1.1-*.json')
        jsons_stock = [i for i in jsons_stock]
        json_list.extend(jsons_stock)
    else:
        # 下载最新的modified数据
        modified_zip, modified_link = (
            'nvdcve-1.1-modified.json.zip',
            'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-modified.json.zip'
        )
        json_download(modified_zip, modified_link)
        json_list.extend([path('../data/json', 'nvdcve-1.1-modified.json')])

    # 统一解析存量和增量cve数据
    for j in json_list:
        # 插入前处理逻辑:从modified中获取cve exp added
        exp_day_add = dict()
        cve_day_add = dict()
        if "modified" in j:
            sql, modified_data = json2tuple_dict(j)
            modified_time = time_delta(delta=-1, format="%Y-%m-%d")
            cve_exps = day_modified_exp(modified_data,
                                        time=modified_time,
                                        key='Exploit')
            cve_list = list(cve_exps.values())
            cve_day_add = day_modified_exp(modified_data, time=modified_time)
            for cve in cve_list:
                so, exid = cve_exists_where(
                    db='cve',
                    table='nvd',
                    key=['CVE_Items_cve_CVE_data_meta_ID'],
                    where='CVE_Items_cve_CVE_data_meta_ID="{d}"'.format(
                        d=cve[0]))
                so1, eid = cve_exists_where(
                    db='cve',
                    table='nvd',
                    key=['CVE_Items_cve_CVE_data_meta_ID'],
                    where=
                    'CVE_Items_cve_CVE_data_meta_ID="{d}" and CVE_Items_cve_references_reference_data_tags not like "%Exploit%"'
                    .format(d=cve[0]))
                if exid:
                    if eid:
                        print(
                            '[+] %s occurs to pre cve.db with no exploit,so added'
                            % cve[0])
                        exp_day_add[cve[0]] = cve
                    else:
                        print(
                            '[+] %s occurs to prev cve.db with exploit,so give up'
                            % cve[0])
                else:
                    print(
                        '[+] %s never occurs to prev cve.db with exploit,so added'
                        % cve[0])
                    exp_day_add[cve[0]] = cve

        # 将nvd日新增的exp插入exp.db
        first_part_exp_add = list(exp_day_add.values())
        exp_table(first_part_exp_add)
        # 开始正式插入cve.db
        sql, cve_data = json2tuple_dict(j)
        ret = sqlite_insert(sql, cve_data, dir_name='data/cve.db')
        print("[+] Parsed %s to sqlite3" % j)

    return ret, cve_day_add, exp_day_add
Esempio n. 9
0
def exp_model(delta=0):
    """
    exp训练及预测模型
    :return exp_proba: list or []
    """
    exp_proba = exp_proba2 = list()
    # 训练
    so, cve = cve_query(db='cve', table='nvd', key=['*'])
    cve_df = pd.DataFrame(cve, columns=cve_tags)
    x = cve_df['CVE_Items_cve_description_description_data_value'].astype(
        'str')
    y = cve_df['CVE_EXP_label'].astype('int')
    nlp = wordindex(char_level=False)
    fx, fy = nlp.fit_transform(x, y)
    train_x, valid_x, train_y, valid_y = train_test_split(fx,
                                                          fy,
                                                          random_state=2019,
                                                          test_size=0.3)
    model = textcnn(input_type='wordindex',
                    max_len=nlp.max_length,
                    input_dim=nlp.input_dim,
                    output_dim=16,
                    class_num=1)
    model.fit(train_x,
              train_y,
              validation_data=(valid_x, valid_y),
              epochs=1,
              batch_size=128)

    # 测试:预测当天新增CVE
    modified_time = time_delta(delta=delta, format="%Y-%m-%d")
    so, cve = cve_query_where(
        db='cve',
        table='nvd',
        key=['*'],
        where='CVE_Items_publishedDate like "%{}%"'.format(modified_time))
    if cve:
        cve_df = pd.DataFrame(cve, columns=cve_tags)
        x = cve_df['CVE_Items_cve_description_description_data_value'].astype(
            'str')
        fx = nlp.transform(x)
        model.summary()
        pre = model.predict(fx)
        pre = pd.DataFrame(pre)
        exp_proba = pd.concat([
            cve_df[[
                'CVE_Items_cve_CVE_data_meta_ID',
                'CVE_Items_cve_description_description_data_value',
                'CVE_Items_publishedDate'
            ]], pre
        ],
                              axis=1)
        exp_proba.columns = ['CVE_ID', 'Description', 'PubDate', 'EXP_Proba']
        exp_proba = exp_proba.sort_values(by='EXP_Proba', ascending=False)
        exp_proba = exp_proba.values.tolist()
    else:
        print('[INFO] No CVE Today')

    # 测试:预测本月新增CVE
    time = time_delta(format="%Y-%m")
    so, cve = cve_query_where(
        db='cve',
        table='nvd',
        key=['*'],
        where='CVE_Items_publishedDate like "%{}%"'.format(time))
    if cve:
        cve_df = pd.DataFrame(cve, columns=cve_tags)
        x = cve_df['CVE_Items_cve_description_description_data_value'].astype(
            'str')
        fx = nlp.transform(x)
        pre = model.predict(fx)
        pre = pd.DataFrame(pre)
        exp_proba2 = pd.concat([
            cve_df[[
                'CVE_Items_cve_CVE_data_meta_ID',
                'CVE_Items_cve_description_description_data_value',
                'CVE_Items_publishedDate'
            ]], pre
        ],
                               axis=1)
        exp_proba2.columns = ['CVE_ID', 'Description', 'PubDate', 'EXP_Proba']
        exp_proba2 = exp_proba2.sort_values(by='EXP_Proba', ascending=False)

        so, exist_cid = cve_exists(db='exp', table='exps', key=['cve_id'])
        exp_proba2['Ground_Truth'] = exp_proba2.apply(
            lambda x: 1 if x['CVE_ID'] in exist_cid else 'None', axis=1)
        exp_proba2 = exp_proba2.values.tolist()
    else:
        print('[INFO] No CVE Month')

    return exp_proba, exp_proba2
Esempio n. 10
0
def exp_model(epoch, delta=0):
    """
    exp训练及预测模型
    :param todo: 当天当次新增的CVE
    :return exp_proba: list or []
    """
    exp_proba = exp_proba2 = list()
    so, cve = cve_query(db='cve', table='nvd', key=['*'])
    cve_df = pd.DataFrame(cve, columns=cve_tags)
    cve_csv = cve_df[[
        'CVE_Items_cve_CVE_data_meta_ID',
        'CVE_Items_cve_description_description_data_value',
        'CVE_Items_publishedDate', 'CVE_EXP_label'
    ]]
    cve_csv.to_csv('CVE_EXP_2020.csv', index=0)
    # 抑制模型衰减:每月重训练一次
    month_day = time_delta(delta=0, format='%Y-%m-%d')
    if int(month_day.split('-')[2]) == 404:  #ecs内存不足
        print('[+] Retrain model')
        x = cve_df['CVE_Items_cve_description_description_data_value'].astype(
            'str')
        y = cve_df['CVE_EXP_label'].astype('int')
        nlp = wordindex(char_level=False)
        fx, fy = nlp.fit_transform(x, y)
        joblib.dump(nlp, 'data/model/nlp.h5')
        train_x, valid_x, train_y, valid_y = train_test_split(
            fx, fy, random_state=2019, test_size=0.3)
        model = textcnn(input_type='wordindex',
                        max_len=nlp.max_length,
                        input_dim=nlp.input_dim,
                        output_dim=16,
                        class_num=1)
        model.fit(train_x,
                  train_y,
                  validation_data=(valid_x, valid_y),
                  epochs=1,
                  batch_size=128)
        joblib.dump(model, 'data/model/textcnn.h5')

    print('[+] Load predict model')
    nlp = joblib.load('data/model/nlp.h5')
    model = joblib.load('data/model/textcnn.h5')
    # 测试:预测当天当次新增CVE
    if epoch:
        cve_epoch = pd.DataFrame(
            list(epoch.values()),
            columns=[
                'CVE_Items_cve_CVE_data_meta_ID',
                'CVE_Items_cve_description_description_data_value',
                'CVE_Items_publishedDate', 'CVE_Items_lastModifiedDate',
                'Source'
            ])
        print(cve_epoch)
        x = cve_epoch[
            'CVE_Items_cve_description_description_data_value'].astype('str')
        fx = nlp.transform(x)
        #model.summary()
        pre = model.predict(fx)
        pre = pd.DataFrame(pre)
        epoch_exp_proba = pd.concat([
            cve_epoch[[
                'CVE_Items_cve_CVE_data_meta_ID',
                'CVE_Items_cve_description_description_data_value',
                'CVE_Items_publishedDate'
            ]], pre
        ],
                                    axis=1)
        epoch_exp_proba.columns = [
            'CVE_ID', 'Description', 'PubDate', 'EXP_Proba'
        ]
        epoch_exp_proba = epoch_exp_proba.sort_values(by='EXP_Proba',
                                                      ascending=False)
        epoch_exp_proba = epoch_exp_proba.values.tolist()
    else:
        epoch_exp_proba = None
        print('[INFO] No CVE Today Epoch')

    # 测试:预测当天新增CVE
    cve = []
    for d in delta:
        modified_time = time_delta(delta=d, format="%Y-%m-%d")
        so, tmp = cve_query_where(
            db='cve',
            table='nvd',
            key=['*'],
            where='CVE_Items_publishedDate like "%{}%"'.format(modified_time))
        cve = cve + tmp
    if cve:
        cve_df = pd.DataFrame(cve, columns=cve_tags)
        x = cve_df['CVE_Items_cve_description_description_data_value'].astype(
            'str')
        fx = nlp.transform(x)
        #model.summary()
        pre = model.predict(fx)
        pre = pd.DataFrame(pre)
        day_exp_proba = pd.concat([
            cve_df[[
                'CVE_Items_cve_CVE_data_meta_ID',
                'CVE_Items_cve_description_description_data_value',
                'CVE_Items_publishedDate'
            ]], pre
        ],
                                  axis=1)
        day_exp_proba.columns = [
            'CVE_ID', 'Description', 'PubDate', 'EXP_Proba'
        ]
        day_exp_proba = day_exp_proba.sort_values(by='EXP_Proba',
                                                  ascending=False)
        day_exp_proba = day_exp_proba.values.tolist()
    else:
        day_exp_proba = None
        print('[INFO] No CVE Today')

    # 测试:预测本月新增CVE
    time = time_delta(format="%Y-%m")
    so, cve = cve_query_where(
        db='cve',
        table='nvd',
        key=['*'],
        where='CVE_Items_publishedDate like "%{}%"'.format(time))
    if cve:
        cve_df = pd.DataFrame(cve, columns=cve_tags)
        x = cve_df['CVE_Items_cve_description_description_data_value'].astype(
            'str')
        fx = nlp.transform(x)
        pre = model.predict(fx)
        pre = pd.DataFrame(pre)
        month_exp_proba = pd.concat([
            cve_df[[
                'CVE_Items_cve_CVE_data_meta_ID',
                'CVE_Items_cve_description_description_data_value',
                'CVE_Items_publishedDate'
            ]], pre
        ],
                                    axis=1)
        month_exp_proba.columns = [
            'CVE_ID', 'Description', 'PubDate', 'EXP_Proba'
        ]
        month_exp_proba = month_exp_proba.sort_values(by='EXP_Proba',
                                                      ascending=False)

        so, exist_cid = cve_exists(db='exp', table='exps', key=['cve_id'])
        month_exp_proba['Ground_Truth'] = month_exp_proba.apply(
            lambda x: 1 if x['CVE_ID'] in exist_cid else 'None', axis=1)
        month_exp_proba = month_exp_proba.values.tolist()
    else:
        month_exp_proba = None
        print('[INFO] No CVE Month')
    with open(path('../data/log', 'cveflow.log'), 'a+') as f:
        f.write('[Done] CVE EXP Prediction')
    return epoch_exp_proba, day_exp_proba, month_exp_proba