def get_most_100(): #获取本地文件,读取到df filepath='/usr/local/pyTraff/tmp/part-r-00000' #filepath='test_file/part-r-00000' df_filtered=pd.DataFrame() df_reader= pd.read_csv(filepath,sep='\t',header=None,chunksize=50000) #获取前100 for chunk in df_reader: chunk=chunk[chunk.iloc[:,1]>400] df_filtered=pd.concat([df_filtered,chunk]) df_filtered.sort_values(by=[1],inplace=True,ascending=False) df_results=df_filtered.head(100) #清空数据库 sql="delete from hadoop_respIp_top100" db_base.execute_no_result(sql) #将结果写入数据库 sql1="insert into hadoop_respIp_top100(respIp,value) values" for line in df_results.values: ip=line[0] value=line[1] sql1=sql1+"('%s',%d)," %(ip,value) sql1=sql1[:-1] db_base.execute_no_result(sql1) #删除本地文件 del_local_s='rm -rf /usr/local/pyTraff/tmp/part-r-00000' os.system(del_local_s)
def write_database(): ''' 1.获取要当天数据库存在的数据df; 2.获取从log中读取的df; 3.合并df,并计算值得到结果; 4.删除数据库当天的数据; 5.写入结果; ''' time_data=get_time_data() #1.获取要当天数据库存在的数据df; database_df=get_database_df(time_data) #2.获取从log中读取的df; logdata_df=get_logdata_df() if len(logdata_df)==0: return #3.合并df,并计算值得到结果; concat_df_group=pd.concat([logdata_df,database_df],ignore_index=True).groupby(['user','webhost']) #4.删除数据库当天的数据; sql="delete from user_web where date='%s'"%(time_data) db_base.execute_no_result(sql) #5.写入结果; sql1="insert into user_web(date,user,webhost,duration) values" for gp in concat_df_group.groups: _user=gp[0] _webhost=gp[1] _duration=concat_df_group.get_group(gp).duration.sum() sql1=sql1+"('%s','%s','%s',%d)," %(time_data,_user,_webhost,_duration) sql1=sql1[:-1] db_base.execute_no_result(sql1)#批量插入
def to_database(): time_data = get_time_data() data = get_reslut() database_df = get_database_df(time_data) #数据库中的数据 if len(data) == 0: return concat_df_group = pd.concat( [data, database_df], ignore_index=True).groupby(['video_name', 'user']) #取值 sql = "delete from video_flow_bytes where date='%s'" % (time_data ) #删除数据库的值 db_base.execute_no_result(sql) sql1 = "insert into video_flow_bytes(date,video_name,user,orig_ip_bytes,resp_ip_bytes) values" for idata in concat_df_group.groups: _video_name = idata[0] _user = idata[1] #print concat_df_group.get_group(idata) #_duration=concat_df_group.get_group(idata).duration.sum() _orig_ip_bytes = concat_df_group.get_group(idata).orig_ip_bytes.sum() _resp_ip_bytes = concat_df_group.get_group(idata).resp_ip_bytes.sum() sql1 = sql1 + "('%s','%s','%s',%d,%d)," % ( time_data, _video_name, _user, _orig_ip_bytes, _resp_ip_bytes) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def write_black(): data3 = get_list_inter()[1] sql2 = "insert into black(black) values" for j in data3: sql2 = sql2 + "('%s')," % (j) #print j sql2 = sql2[:-1] print sql2 db_base.execute_no_result(sql2)
def write_databae(): data = get_data() if not isinstance(data, pd.DataFrame): return for i in list(data.index): if str(i).find(',') == -1 and str(i).find('-') == -1: sql1 = "insert into protocol_rt(protocol,incomingTraff,outgoingTraff) values('%s',%d,%d)" % ( i, data.loc[i, 'resp_bytes'], data.loc[i, 'orig_bytes']) db_base.execute_no_result(sql1)
def write_database(): ''' 结果写入数据库 ''' sql_clear="delete from sessionTop" db_base.execute_no_result(sql_clear)#清空原有数据 data=get_data() for index in data.index: sql_insert="insert into sessionTop(origIp,respIp,origBytes,respBytes) values('%s','%s',%d,%d)"%(index[0],index[1],data.loc[index,'orign'],data.loc[index,'resp']) db_base.execute_no_result(sql_insert)
def write_database(): file_path_all=get_file('conn') if not os.path.exists(file_path_all): #log_file,conn_all,traff_all,traff_orig,traff_resp=0,0,0,0,0 return else: log_file,conn_all,traff_all,traff_orig,traff_resp=get_data(file_path_all) time_data=time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time()-3600)) sql_string="insert into main(date,main_log,main_conn,main_traff,resp_traff,orig_traff) values ('%s',%d,%d,%d,%d,%d)"%(time_data,log_file,conn_all,traff_all,traff_resp,traff_orig) db_base.execute_no_result(sql_string)
def data_to_localDb(): data = get_usagent_content() if len(data) == 0: return sql1 = "insert into browser(date,user,browser) values" for idata in data: _time = idata[0] _user = idata[1] _browser = idata[2] sql1 = sql1 + "('%s','%s','%s')," % (_time, _user, _browser) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def to_database(): data = get_usagent_content() if len(data) == 0: return sql1 = "insert into app(date,user,app_name) values" for idata in data: time_data = idata[0] _user = idata[1] _app_name = idata[2] sql1 = sql1 + "('%s','%s','%s')," % (time_data, _user, _app_name) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def data_to_localDb(): data = get_usagent_content() if len(data) == 0: return sql1 = "insert into device(date,user,device_type,device_class) values" for idata in data: _time = idata[0] _user = idata[1] _device_type = idata[2] _device_class = idata[3] sql1 = sql1 + "('%s','%s','%s','%s')," % (_time, _user, _device_type, _device_class) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def write_database(): ''' 结果写入数据库 ''' sql_clear="delete from respIpTop_u" db_base.execute_no_result(sql_clear)#清空原有数据 data=get_data() for index in data.index: data_loc=data.loc[index,'origIp'] if index=='202.114.144.53' or index=='202.114.144.33': index=index+'('+'hubu dns'+')' else: index=index+'('+getDNS(index)+')' sql_insert="insert into respIpTop_u(respIp,value) values('%s',%d)"%(index,data_loc) db_base.execute_no_result(sql_insert)
def write_database(): date_time = get_time_data() log_df = get_result_df() db_df = get_database_df(date_time) last_df = pd.concat([log_df, db_df], ignore_index=True) last_df.drop_duplicates(['orig_ip', 'name'], inplace=True) #删除数据库当天的数据; sql = "delete from anomaly_ip where date='%s'" % (date_time) db_base.execute_no_result(sql) #写入结果; sql1 = "insert into anomaly_ip(date,ip,anomalyType) values" for line in last_df.values: ip = line[0] anomalyType = line[1] sql1 = sql1 + "('%s','%s','%s')," % (date_time, ip, anomalyType) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def write_database(): date_time=get_time_data() log_df=get_result_df() db_df=get_database_df(date_time) last_df=pd.concat([log_df,db_df],ignore_index=True) last_df.drop_duplicates(['orig_ip','name'],inplace=True) #删除数据库当天的数据; sql="delete from anomaly_ip where date='%s'"%(date_time) db_base.execute_no_result(sql) #写入结果; sql1="insert into anomaly_ip(date,ip,anomalyType) values" for line in last_df.values: ip=line[0] anomalyType=line[1] sql1=sql1+"('%s','%s','%s')," %(date_time,ip,anomalyType) sql1=sql1[:-1] db_base.execute_no_result(sql1)#批量插入
def data_to_localDb(): time_data = get_time_data() data = get_file_type() database_df = get_database_df(time_data) #数据库中的数据 if len(data) == 0: return concat_df_group = pd.concat([data, database_df], ignore_index=True).groupby(['file_type']) #取值 sql = "delete from file_type where date='%s'" % (time_data) #删除数据库的值 db_base.execute_no_result(sql) sql1 = "insert into file_type(date,file_type,count1) values" for idata in concat_df_group.groups: _file_type = idata _count1 = concat_df_group.get_group(idata).count1.sum() sql1 = sql1 + "('%s','%s',%d)," % (time_data, _file_type, _count1) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def write_database(): database_df = get_database_df() logdata_df = mac_analysis_test() if len(logdata_df) == 0: return concat_df_group = pd.concat([logdata_df, database_df], ignore_index=True).groupby( ['user', 'mac', 'pingpai']) sql = "delete from MAC_analysis" db_base.execute_no_result(sql) sql1 = "insert into MAC_analysis(ip,MAC,mfrs) values" for gp in concat_df_group.groups: _user = gp[0] _mac = gp[1] _pingpai = gp[2] sql1 = sql1 + "('%s','%s','%s')," % (_user, _mac, _pingpai) sql1 = sql1[:-1] #print sql1 db_base.execute_no_result(sql1)
def write_database(): ''' 1.获取要当天数据库存在的数据df; 2.获取从log中读取的df; 3.合并df,并计算值得到结果; 4.删除数据库当天的数据; 5.写入结果; ''' logdata_df = get_logdata_df() if len(logdata_df) == 0: return sql1 = "insert into web(date,user,webhost) values" for gp in concat_df_group.groups: _time = gp[0] _user = gp[1] _webhost = gp[2] sql1 = sql1 + "('%s','%s','%s')," % (_time, _user, _webhost) sql1 = sql1[:-1] db_base.execute_no_result(sql1) #批量插入
def data_to_localDb(): data=get_file_conn() if len(data)==0: return sql1="insert into main(time,ori_ip,resp_ip,proto,service,duration,orig_pkts,orig_bytes,resp_pkts,resp_bytes) values" for i in data: time_da=i[0] ori_ip=i[1] resp_ip=i[2] proto=i[3] service=i[4] duration=float(i[5]) orig_pkts=int(i[6]) orig_bytes=int(i[7]) resp_pkts=int(i[8]) resp_bytes=int(i[9]) sql1=sql1+"('%s','%s','%s','%s','%s',%f,%d,%d,%d,%d)," %(time_da,ori_ip,resp_ip,proto,service,duration,orig_pkts,orig_bytes,resp_pkts,resp_bytes) sql1=sql1[:-1] db_base.execute_no_result(sql1)#批量插入
def data_to_localDb(): data = get_file_anomaly() if len(data) == 0: return sql1 = "insert into anom_b(start,end,ori_ip,ori_o,res_ip,res_o,bec,count) values" for i in data: start_time = i[0] end_time = i[1] orig_ip = i[2] orig_k = i[3] resp_ip = i[4] resp_k = i[5] becu = i[6] cou = i[7] sql1 = sql1 + "('%s','%s','%s','%s','%s','%s','%s','%d')," % ( start_time, end_time, orig_ip, orig_k, resp_ip, resp_k, becu, cou) sql1 = sql1[:-1] print sql1 db_base.execute_no_result(sql1) #批量插入
def to_database(): #time_data=get_time_data() data = get_yc_content() #print data data2 = get_list_inter()[0] data3 = get_list_inter()[1] get_database_df() #数据库中的数据 sql = "insert into white(wh) values" for i in data: sql1 = sql + "('%s')," % (i[0]) #print i[0] sql1 = sql1[:-1] #print sql1 db_base.execute_no_result(sql1) sqll = "insert into gray(gray) values" for j in data2: sql2 = sqll + "('%s')," % (j) #print j sql2 = sql2[:-1] print sql2 db_base.execute_no_result(sql2)
def main(): # 测试数据 data=read.get_database_df()[0] data2=read.get_database_df()[1] # 实例化神经网络类 ann = BPANN.BPNeuralNetworks(10, 13, 1) ann.train(data, iterations = 700) list=ann.caclulate(data2) sql1="insert into bp_abnormal(date,ori_ip,resp_ip,abnormal,ab) values" for i in list: date1=i[0][0] ori=i[0][1] res=i[0][2] abnormal=i[1] if float(i[1])>0.5: ab='异常' else: ab='正常' sql1=sql1+"('%s','%s','%s',%d,'%s')," %(date1,ori,res,abnormal,ab) sql1=sql1[:-1] db_base.execute_no_result(sql1)#批量插入
def write_database(): user_data_out=get_user_data()#待插入的数据 if len(user_data_out)==0: return time_data=get_time_data() sql0="select date,user,beginTime,endTime,outgoingTraff,incomingTraff from user_traff where date='%s'"%(time_data) user_iterms=db_base.execute_result(sql0) user_dict={}#已经存入的数据 if len(user_iterms)>0: for user_iterm in user_iterms: user_dict[user_iterm[1]]=[user_iterm[2],user_iterm[3],user_iterm[5],user_iterm[4]] ######################### user_data_insert=[]#需插入的数据 user_data_update=[]#需更新的数据 update_ip_list=[] for user in user_data_out: if user.get_ip() not in user_dict.iterkeys(): user_data_insert.append(user) else: update_ip_list.append(user.get_ip()) user.min_ts=user_dict[user.get_ip()][0] user.incoming=user.incoming+user_dict[user.get_ip()][2] user.outgoing=user.outgoing+user_dict[user.get_ip()][3] user_data_update.append(user) ################################ if user_data_insert!=[]: sql1="insert into user_traff(date,user,beginTime,endTime,outgoingTraff,incomingTraff) values" for user in user_data_insert: sql1=sql1+"('%s','%s','%s','%s',%d,%d)," %(time_data,user.get_ip(),user.get_min_ts(),user.get_max_ts(),user.get_outgoing(),user.get_incoming()) sql1=sql1[:-1] db_base.execute_no_result(sql1)#批量插入 if user_data_update!=[]: sql2="delete from user_traff where user in (" for ip in update_ip_list: sql2=sql2+"'%s'," %(ip) sql2=sql2[:-1]+") and date='%s'"%(time_data) db_base.execute_no_result(sql2)#批量删除 sql3="insert into user_traff(date,user,beginTime,endTime,outgoingTraff,incomingTraff) values" for user in user_data_update: sql3=sql3+"('%s','%s','%s','%s',%d,%d)," %(time_data,user.get_ip(),user.min_ts,user.get_max_ts(),user.get_outgoing(),user.get_incoming()) sql3=sql3[:-1] db_base.execute_no_result(sql3)#再次批量插入
def clear_database(): sql0 = "delete from protocol_rt" db_base.execute_no_result(sql0)