def create_burst_df(): files = glob.glob('burst_result/*/*') evs = sorted(list(set(["_".join(fi.split('/')[-1].split("_")[1:]) for fi in files]))) cols = evs burst_df = pd.DataFrame(index=pd.date_range('20120101','20130331'), columns=cols) for fi in files: event_name = "_".join(fi.split('/')[-1].split('_')[1:]) print(fi) try: # some bursts are detected ev,day,data = sb.open_dump(fi) except: # no bursts are detected continue if event_name != ev: print('event name error') print(event_name,ev,day,data) continue else: d = pd.to_datetime(day) burst_df.loc[d,ev] = data return burst_df
def burst2get_data(burst_file): # get_data = collections.defaultdict(lambda: 0) get_data = sb.open_dump('burst_file') for line in open(burst_file,"r"): if line[0] == '(': get_date = "".join([a.strip().zfill(2) for a in line[1:-2].split(",")]) get_data[get_date] = [] elif line.strip()[0] == "[": st = line.strip()[1:-2].split(",")[1].strip() en = line.strip()[1:-2].split(",")[2].strip() get_data[get_date].append((float(st),float(en))) return get_data
def cnt_logs(DUMP_NAME, DATE): obj = sb.open_dump('dumps/' + str(DATE) + '/' + DUMP_NAME) return (len(obj))
dbname = 's4causality.db' conn = sqlite3.connect(dbname) cur = conn.cursor() cur.execute('''select srcID,srcHost,dstID,dstHost from event''') edge = cur.fetchall() edge = [ sorted((str(e[0]) + "_" + e[1], str(e[2]) + "_" + e[3])) for e in edge ] edge = [e[0] + "." + e[1] for e in edge] edge = list(set(edge)) edge = [set(e.split(".")) for e in edge] print(len(edge)) co_burst = sb.open_dump('co_prob_df') co_burst = list(co_burst['EvPair'].values) co_burst = [set(x) for x in co_burst] burst = sb.open_dump('burst_df') burst_ev = [x for x in burst.columns if len(burst[x].dropna()) != 0] burst_noburst = [] for ep in edge: if ep not in co_burst: ep = list(ep) if ep[0] in burst_ev: burst_noburst.append(ep) if ep[1] in burst_ev: burst_noburst.append(ep[::-1])
argv[0], argv[1], argv[2], argv[3]) cur.execute(query) r = cur.fetchall() result = [] for i in r: result.append("".join(i[0].split("-"))) return result if __name__ == "__main__": dbname = 's4causality.db' conn = sqlite3.connect(dbname) cur = conn.cursor() edge_burst = sb.open_dump('rp_edge_coburst') print(len(edge_burst)) burst = sb.open_dump('burst_df') burst_ev = [x for x in burst.columns if len(burst[x].dropna()) != 0] result = [] for evp in edge_burst['EvPair']: bday1 = burst[evp[0]].dropna().index.values bday1 = [str(x).split('T')[0].replace("-", "") for x in bday1] bday2 = burst[evp[1]].dropna().index.values bday2 = [str(x).split('T')[0].replace("-", "") for x in bday2] bday = list(set(bday1) & set(bday2)) eday = get_eday(evp) if len(set(bday) & set(eday)) != 0:
elif line.strip()[0] == "[": st = line.strip()[1:-2].split(",")[1].strip() en = line.strip()[1:-2].split(",")[2].strip() get_data[get_date].append((float(st),float(en))) return get_data event = sys.argv[1] files = glob.glob('dumps/*/{}'.format(event)) x = [] y = [] for fi in files: data = sb.open_dump(fi) print(data[0].date(),":",len(data)) x.append(data[0].date()) y.append(len(data)) # DUMP_NAME = sys.argv[1] # if len(sys.argv) > 2: # PLOT_BURST = int(sys.argv[2]) # else: # PLOT_BURST = 0 # # with open(DUMP_NAME,"rb") as f: # obj = pickle.load(f, encoding="bytes") # # tmp = set( [datetime.datetime(row.year,row.month,row.day) for row in obj ] )
temp_id = int(ev_name.split('_')[0]) if temp_id < 500: return pf + '0000-0499/' + ev_name + '.dump' elif temp_id < 1000: return pf + '0500-0999/' + ev_name + '.dump' elif temp_id < 1500: return pf + '1000-1499/' + ev_name + '.dump' else: return pf + '1500-1999/' + ev_name + '.dump' if __name__ == "__main__": if len(sys.argv) == 3: dump = sys.argv[1] ev_day = sys.argv[2] event = sb.open_dump(dump) print(linear_rms(event, ev_day)) else: burst_df = sb.open_dump(sys.argv[1]) for i in burst_df.iteritems(): tmp = i[1].dropna() if len(tmp) != 0: print(tmp.name) dump_name = get_dump_path(tmp.name) event = sb.open_dump(dump_name) for ev_day in tmp.index: rms = linear_rms(event, ev_day.strftime('%Y%m%d')) if not rms > 0.1: print(ev_day, '\t', rms)
def get_log(DUMP_NAME, DATE): obj = sb.open_dump('dumps/' + str(DATE) + '/' + DUMP_NAME) return (obj)
prefix以下が、 prefix/0000-0499/hoge.dump ''' days = [i.split('/')[-1] for i in glob.glob('dumps/*')] for day in days: hosts = set( [i.split('_')[-1] for i in glob.glob('dumps/{0}/*'.format(day))]) for host in hosts: files = glob.glob('dumps/{0}/*_{1}'.format(day, host)) host_data = [] for fi in files: host_data.extend(sb.open_dump(fi)) with open('dumps_host/{0}/{1}'.format(day, day + '_' + host), 'wb') as f: pickle.dump(host_data, f) # # files = glob.glob('dump_files/0000-0499/*_tokyo-dc-rm.dump') # ワイルドカードが使用可能 # files = glob.glob('{0}/*-*/*'.format(PREFIX)) # ワイルドカードが使用可能 # # host_list = [] # for fi in files: # host_list.append(fi.split('/')[-1].split('.')[0].split('_')[1]) # # print(set(host_list),len(set(host_list))) # # # # with open("host_list.txt","w") as f:
# -*- coding: utf-8 -*- from scipy import arange, hamming, sin, pi from scipy.fftpack import fft, ifft, fftfreq import matplotlib.pyplot as plt import search_burst as sb import numpy as np import pandas as pd import sys import datetime event = sb.open_dump(sys.argv[1]) day = sys.argv[2] ev_year = int(day[:4]) ev_month = int(day[4:6]) ev_day = int(day[6:8]) ev_date = datetime.date(ev_year, ev_month, ev_day) plot_data = [row.time() for row in event if row.date() == ev_date] ev_data = [row.hour * 3600 + row.minute * 60 + row.second for row in plot_data] fs = 1 # Sampling rate L = 2**16 # Signal length x = [10. if i in ev_data else 0. for i in range(L)] # test data # x = [10. if i%3600 == 0 else 0. for i in range(L)] # # 440[Hz]のサイン波を作る。 # sine_440 = sin(2. * pi * arange(L) * 440. / fs) # # 600[Hz]のサイン波を作る。
import sqlite3 import numpy as np import pandas as pd import search_burst as sb import pickle def search_pair_query(id1, host1, id2, host2): query = 'select date from date where pairID in(select pairID from event where (srcID={0} and srcHost="{1}" and dstID={2} and dstHost="{3}") or (srcID={2} and srcHost="{3}" and dstID={0} and dstHost="{1}"));'.format( id1, host1, id2, host2) return query if __name__ == "__main__": co_prob_df = sb.open_dump('co_prob_df') dbname = 's4causality.db' conn = sqlite3.connect(dbname) cur = conn.cursor() edge_coburst_pair = [] for row in co_prob_df['EvPair'].values: id1, host1 = row[0].split('_') id2, host2 = row[1].split('_') q = search_pair_query(id1, host1, id2, host2) cur.execute('''{}'''.format(q)) q_result = cur.fetchall() if len(q_result) != 0: edge_coburst_pair.append(row) else: pass
def cnt_logs(DUMP_NAME, DATE): obj = sb.open_dump(DUMP_NAME) return (len(obj))
# coding: UTF-8 ''' coburst, edgeプロット ''' import collections import sys import numpy as np import matplotlib.pyplot as plt import pybursts import datetime import matplotlib.dates as mdates import pickle import search_burst as sb co_prob_df = sb.open_dump('co_prob_df') co_edge_df = sb.open_dump('rp_edge_coburst') xj = co_prob_df['x'] yj = co_prob_df['y_jaccard'] * (10**5) xs = co_prob_df['x'] ys = co_prob_df['y_simpson'] * (10**5) xej = co_edge_df['x'] yej = co_edge_df['y_jaccard'] * (10**5) xes = co_edge_df['x'] yes = co_edge_df['y_simpson'] * (10**5) df_bool = [False] * co_prob_df.shape[0] for i in [ x for x in co_prob_df['EvPair'] if (x[0][:3] == '10_' or x[1][:3] == '11_') or (