def exp_query(delta=0): """取第三方当天exp增量和当月exp增量 """ exist_eid = [] if isinstance(delta, list): for d in delta: day = time_delta(delta=d, format='%Y-%m-%d') so, tmp = cve_query_where( db='exp', table='exps', key=['*'], where='exp_publishedtime like "%{}%"'.format(day)) exist_eid = exist_eid + tmp else: day = time_delta(delta=delta, format='%Y-%m-%d') so, exist_eid = cve_query_where( db='exp', table='exps', key=['*'], where='exp_publishedtime like "%{}%"'.format(day)) month = time_delta(delta=0, format='%Y-%m') so2, exist_eid2 = cve_query_where( db='exp', table='exps', key=['*'], where='exp_publishedtime like "%{}%"'.format(month)) return exist_eid, exist_eid2
def exp_add(): """ 第三方数据源标记的exp每日和每月增量 :return day_exp_add:tuple_list """ # 日更新->exps&nvd:exploit-db.com、seebug、github a0 = a1 = a3 = a4 = a5 = a6 = list() a4 = expdb_exp_add() a4 = exp_5tuple(key='exploit-db', value=a4) #a5=seebug_exp_add() a5 = exp_5tuple(key='seebug', value=a5) a6 = github_exp_add() a6 = exp_5tuple(key='github', value=a6) day_exp_add = a0 + a4 + a5 + a6 exp_table(day_exp_add) cves = [i[0] for i in day_exp_add] exp_label(all_exp=cves) # 月更新->nvd:cvedetails.com、mitre month_day = time_delta(delta=0, format='%Y-%m-%d') if month_day.split('-')[2] == 30: a1 = list(cvedetails_crawler().keys()) a3 = list(mitre_expdb_all(reparse=True).keys()) month_exp_add = a0 + a1 + a3 exp_label(all_exp=month_exp_add) return day_exp_add
def auto_email(content,msg_to='*****@*****.**'): conf=configparser.ConfigParser() conf.read('conf/info.conf') msg_from=conf.get('Mail','sendmail') password=conf.get('Mail','password') subject = "CVE-Flow:CVE Threat Intelligence on {}".format(time_delta(format='%Y-%m-%d')) content = mistune.markdown(content, escape=True, hard_wrap=True) link = "More informations:https://github.com/404notf0und/CVE-Flow/blob/master/README.md" content=content+link msg = MIMEText(content, 'html', 'utf-8') msg['Subject'] = subject msg['From'] = msg_from msg['To'] = msg_to try: client = smtplib.SMTP_SSL('smtp.163.com', smtplib.SMTP_SSL_PORT) print("[+] Connect to mail server") client.login(msg_from, password) print("[+] Successfully log in") client.sendmail(msg_from, msg_to, msg.as_string()) print("[+] Successfully send mail") except smtplib.SMTPException as e: print("[!] Send mail error") finally: client.quit()
def auto_push(): ts = "cve-flow_bot_pushed_at_" + time_delta(format="%Y-%m-%d-%H:%m:%S") cmd = "git add . && git commit -m '%s' && git push origin master" % (ts) ret = os.system(cmd) if ret != 0: print("%s failed" % cmd)
def exp_query(): """取第三方当天exp增量和当月exp增量 """ day = time_delta(delta=-1, format='%Y-%m-%d') so, exist_eid = cve_query_where( db='exp', table='exps', key=['*'], where='exp_publishedtime like "%{}%"'.format(day)) month = time_delta(delta=0, format='%Y-%m') so2, exist_eid2 = cve_query_where( db='exp', table='exps', key=['*'], where='exp_publishedtime like "%{}%"'.format(month)) return exist_eid, exist_eid2
def auto_push(): print("[+] Auto pushed to github.com/404notf0und/CVE-Flow") ts = "cve-flow_bot_pushed_at_"+time_delta(format="%Y-%m-%d-%H:%m:%S") cmd = "git add . && git commit -m '%s' && git push origin master" % (ts) ret = os.system(cmd) if ret != 0: print("%s failed" % cmd) return ret
def draw_report(day_exp_add=None, day_exp_proba=None, month_exp_add=None, month_exp_proba=None): """ 生成report:总-年-月三份报告 """ print('[+] Start generate report.md') year_month = time_delta(delta=0, format="%Y-%m") year_year = time_delta(delta=0, format="%Y") year_all = str(year_year[0:2]) for y in [year_month, year_year, year_all]: fpath = 'report_{}.md'.format(y) draw_md(time=y, fpath=fpath, day_exp_add=day_exp_add, day_exp_proba=day_exp_proba, month_exp_add=month_exp_add, month_exp_proba=month_exp_proba) if len(str(y)) == 7: fpath_month = fpath fpath_default = "README.md" shutil.copyfile(fpath_month, fpath_default) print('[+] Done!')
def cve_monitor(monitor_init=False): """ 存量离线解析,增量在线更新 :return ret:boolean, if parsed json to sqlite3 or not :return cve_day_add:tuple_dict, cve added in given time :return exp_day_add:tuple_dict, exp added in given time """ json_list = [] # 取配置文件中控制存量解析的参数,得到待解析入库文件列表 if monitor_init == 'True': # 清空已有的CVE数据 so = SQLite('data/cve.db') so.execute('delete from nvd') zips = glob.glob('data/json/nvdcve-1.1-*.json.zip') for z in zips: with zipfile.ZipFile(z) as zf: print("[+] UNZIP %s" % z) zf.extractall(path=path('data/json')) jsons_stock = glob.glob('data/json/nvdcve-1.1-*.json') jsons_stock = [i for i in jsons_stock] json_list.extend(jsons_stock) else: # 下载最新的modified数据 modified_zip, modified_link = ( 'nvdcve-1.1-modified.json.zip', 'https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-modified.json.zip' ) json_download(modified_zip, modified_link) json_list.extend([path('../data/json', 'nvdcve-1.1-modified.json')]) # 统一解析存量和增量cve数据 for j in json_list: # 插入前处理逻辑:从modified中获取cve exp added exp_day_add = dict() cve_day_add = dict() if "modified" in j: sql, modified_data = json2tuple_dict(j) modified_time = time_delta(delta=-1, format="%Y-%m-%d") cve_exps = day_modified_exp(modified_data, time=modified_time, key='Exploit') cve_list = list(cve_exps.values()) cve_day_add = day_modified_exp(modified_data, time=modified_time) for cve in cve_list: so, exid = cve_exists_where( db='cve', table='nvd', key=['CVE_Items_cve_CVE_data_meta_ID'], where='CVE_Items_cve_CVE_data_meta_ID="{d}"'.format( d=cve[0])) so1, eid = cve_exists_where( db='cve', table='nvd', key=['CVE_Items_cve_CVE_data_meta_ID'], where= 'CVE_Items_cve_CVE_data_meta_ID="{d}" and CVE_Items_cve_references_reference_data_tags not like "%Exploit%"' .format(d=cve[0])) if exid: if eid: print( '[+] %s occurs to pre cve.db with no exploit,so added' % cve[0]) exp_day_add[cve[0]] = cve else: print( '[+] %s occurs to prev cve.db with exploit,so give up' % cve[0]) else: print( '[+] %s never occurs to prev cve.db with exploit,so added' % cve[0]) exp_day_add[cve[0]] = cve # 将nvd日新增的exp插入exp.db first_part_exp_add = list(exp_day_add.values()) exp_table(first_part_exp_add) # 开始正式插入cve.db sql, cve_data = json2tuple_dict(j) ret = sqlite_insert(sql, cve_data, dir_name='data/cve.db') print("[+] Parsed %s to sqlite3" % j) return ret, cve_day_add, exp_day_add
def exp_model(delta=0): """ exp训练及预测模型 :return exp_proba: list or [] """ exp_proba = exp_proba2 = list() # 训练 so, cve = cve_query(db='cve', table='nvd', key=['*']) cve_df = pd.DataFrame(cve, columns=cve_tags) x = cve_df['CVE_Items_cve_description_description_data_value'].astype( 'str') y = cve_df['CVE_EXP_label'].astype('int') nlp = wordindex(char_level=False) fx, fy = nlp.fit_transform(x, y) train_x, valid_x, train_y, valid_y = train_test_split(fx, fy, random_state=2019, test_size=0.3) model = textcnn(input_type='wordindex', max_len=nlp.max_length, input_dim=nlp.input_dim, output_dim=16, class_num=1) model.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=1, batch_size=128) # 测试:预测当天新增CVE modified_time = time_delta(delta=delta, format="%Y-%m-%d") so, cve = cve_query_where( db='cve', table='nvd', key=['*'], where='CVE_Items_publishedDate like "%{}%"'.format(modified_time)) if cve: cve_df = pd.DataFrame(cve, columns=cve_tags) x = cve_df['CVE_Items_cve_description_description_data_value'].astype( 'str') fx = nlp.transform(x) model.summary() pre = model.predict(fx) pre = pd.DataFrame(pre) exp_proba = pd.concat([ cve_df[[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate' ]], pre ], axis=1) exp_proba.columns = ['CVE_ID', 'Description', 'PubDate', 'EXP_Proba'] exp_proba = exp_proba.sort_values(by='EXP_Proba', ascending=False) exp_proba = exp_proba.values.tolist() else: print('[INFO] No CVE Today') # 测试:预测本月新增CVE time = time_delta(format="%Y-%m") so, cve = cve_query_where( db='cve', table='nvd', key=['*'], where='CVE_Items_publishedDate like "%{}%"'.format(time)) if cve: cve_df = pd.DataFrame(cve, columns=cve_tags) x = cve_df['CVE_Items_cve_description_description_data_value'].astype( 'str') fx = nlp.transform(x) pre = model.predict(fx) pre = pd.DataFrame(pre) exp_proba2 = pd.concat([ cve_df[[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate' ]], pre ], axis=1) exp_proba2.columns = ['CVE_ID', 'Description', 'PubDate', 'EXP_Proba'] exp_proba2 = exp_proba2.sort_values(by='EXP_Proba', ascending=False) so, exist_cid = cve_exists(db='exp', table='exps', key=['cve_id']) exp_proba2['Ground_Truth'] = exp_proba2.apply( lambda x: 1 if x['CVE_ID'] in exist_cid else 'None', axis=1) exp_proba2 = exp_proba2.values.tolist() else: print('[INFO] No CVE Month') return exp_proba, exp_proba2
def exp_model(epoch, delta=0): """ exp训练及预测模型 :param todo: 当天当次新增的CVE :return exp_proba: list or [] """ exp_proba = exp_proba2 = list() so, cve = cve_query(db='cve', table='nvd', key=['*']) cve_df = pd.DataFrame(cve, columns=cve_tags) cve_csv = cve_df[[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate', 'CVE_EXP_label' ]] cve_csv.to_csv('CVE_EXP_2020.csv', index=0) # 抑制模型衰减:每月重训练一次 month_day = time_delta(delta=0, format='%Y-%m-%d') if int(month_day.split('-')[2]) == 404: #ecs内存不足 print('[+] Retrain model') x = cve_df['CVE_Items_cve_description_description_data_value'].astype( 'str') y = cve_df['CVE_EXP_label'].astype('int') nlp = wordindex(char_level=False) fx, fy = nlp.fit_transform(x, y) joblib.dump(nlp, 'data/model/nlp.h5') train_x, valid_x, train_y, valid_y = train_test_split( fx, fy, random_state=2019, test_size=0.3) model = textcnn(input_type='wordindex', max_len=nlp.max_length, input_dim=nlp.input_dim, output_dim=16, class_num=1) model.fit(train_x, train_y, validation_data=(valid_x, valid_y), epochs=1, batch_size=128) joblib.dump(model, 'data/model/textcnn.h5') print('[+] Load predict model') nlp = joblib.load('data/model/nlp.h5') model = joblib.load('data/model/textcnn.h5') # 测试:预测当天当次新增CVE if epoch: cve_epoch = pd.DataFrame( list(epoch.values()), columns=[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate', 'CVE_Items_lastModifiedDate', 'Source' ]) print(cve_epoch) x = cve_epoch[ 'CVE_Items_cve_description_description_data_value'].astype('str') fx = nlp.transform(x) #model.summary() pre = model.predict(fx) pre = pd.DataFrame(pre) epoch_exp_proba = pd.concat([ cve_epoch[[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate' ]], pre ], axis=1) epoch_exp_proba.columns = [ 'CVE_ID', 'Description', 'PubDate', 'EXP_Proba' ] epoch_exp_proba = epoch_exp_proba.sort_values(by='EXP_Proba', ascending=False) epoch_exp_proba = epoch_exp_proba.values.tolist() else: epoch_exp_proba = None print('[INFO] No CVE Today Epoch') # 测试:预测当天新增CVE cve = [] for d in delta: modified_time = time_delta(delta=d, format="%Y-%m-%d") so, tmp = cve_query_where( db='cve', table='nvd', key=['*'], where='CVE_Items_publishedDate like "%{}%"'.format(modified_time)) cve = cve + tmp if cve: cve_df = pd.DataFrame(cve, columns=cve_tags) x = cve_df['CVE_Items_cve_description_description_data_value'].astype( 'str') fx = nlp.transform(x) #model.summary() pre = model.predict(fx) pre = pd.DataFrame(pre) day_exp_proba = pd.concat([ cve_df[[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate' ]], pre ], axis=1) day_exp_proba.columns = [ 'CVE_ID', 'Description', 'PubDate', 'EXP_Proba' ] day_exp_proba = day_exp_proba.sort_values(by='EXP_Proba', ascending=False) day_exp_proba = day_exp_proba.values.tolist() else: day_exp_proba = None print('[INFO] No CVE Today') # 测试:预测本月新增CVE time = time_delta(format="%Y-%m") so, cve = cve_query_where( db='cve', table='nvd', key=['*'], where='CVE_Items_publishedDate like "%{}%"'.format(time)) if cve: cve_df = pd.DataFrame(cve, columns=cve_tags) x = cve_df['CVE_Items_cve_description_description_data_value'].astype( 'str') fx = nlp.transform(x) pre = model.predict(fx) pre = pd.DataFrame(pre) month_exp_proba = pd.concat([ cve_df[[ 'CVE_Items_cve_CVE_data_meta_ID', 'CVE_Items_cve_description_description_data_value', 'CVE_Items_publishedDate' ]], pre ], axis=1) month_exp_proba.columns = [ 'CVE_ID', 'Description', 'PubDate', 'EXP_Proba' ] month_exp_proba = month_exp_proba.sort_values(by='EXP_Proba', ascending=False) so, exist_cid = cve_exists(db='exp', table='exps', key=['cve_id']) month_exp_proba['Ground_Truth'] = month_exp_proba.apply( lambda x: 1 if x['CVE_ID'] in exist_cid else 'None', axis=1) month_exp_proba = month_exp_proba.values.tolist() else: month_exp_proba = None print('[INFO] No CVE Month') with open(path('../data/log', 'cveflow.log'), 'a+') as f: f.write('[Done] CVE EXP Prediction') return epoch_exp_proba, day_exp_proba, month_exp_proba