def periodic_events(conf, ld, top_dt, end_dt, area, edict, evmap): l_sampling_term = [config.str2dur(diffstr) for diffstr in conf.gettuple("filter", "sampling_term")] if len(l_sampling_term) == 0: raise ValueError("configuration error in filter.sampling_term") search_interval = conf.getboolean("filter", "search_interval") corr_th = conf.getfloat("filter", "self_corr_th") corr_bin = conf.getdur("filter", "self_corr_bin") corr_diff = [config.str2dur(diffstr) for diffstr in conf.gettuple("filter", "self_corr_diff")] l_sample_top_dt = [end_dt - tdelta for tdelta in sorted(l_sampling_term)] sample_top_dt_max = l_sample_top_dt[-1] # prepare time-series of sampling term sample_edict = {} iterobj = ld.iter_lines(top_dt = sample_top_dt_max, end_dt = end_dt, area = area) for line in iterobj: eid = evmap.process_line(line) sample_edict.setdefault(eid, []).append(line.dt) # determine interval candidate if search_interval: new_corr_diff = set() p_cnt = conf.getint("filter", "periodic_count") p_term = conf.getdur("filter", "periodic_term") p_th = conf.getfloat("filter", "periodic_th") for sample_top_dt in l_sample_top_dt: for eid, l_dt in sample_edict.iteritems(): l_dt = list(dtutil.limit_dt_seq(l_dt, sample_top_dt, end_dt)) if is_enough_long(l_dt, p_cnt, p_term): diff = interval(l_dt, p_th) if diff is not None: new_corr_diff.add(datetime.timedelta(seconds = diff)) new_corr_diff.update(set(corr_diff)) corr_diff = list(new_corr_diff) # get periodic events ret = [] for eid, l_dt in sample_edict.iteritems(): l_result = [] for sample_top_dt in l_sample_top_dt: for diff in corr_diff: l_dt = list(dtutil.limit_dt_seq(l_dt, sample_top_dt, end_dt)) if len(l_dt) == 0: continue data = dtutil.auto_discretize(l_dt, corr_bin) corr = self_corr(data, diff, corr_bin) l_result.append((corr, diff)) max_corr, max_diff = max(l_result) if max_corr >= corr_th: _logger.debug("Event {0} is periodic (interval: {1})".format( eid, max_diff)) ret.append((eid, max_diff)) return ret
def graph_cp(conf, dur, output_dirname): common.mkdir(output_dirname) length = config.str2dur(dur) dirname = conf.get("changepoint", "temp_cp_data") cpd = ChangePointData(dirname) cpd.load() cpd_top_dt, cpd_end_dt = cpd.term() top_dt = cpd_end_dt - length if top_dt < cpd.term()[0]: top_dt = cpd.term()[0] end_dt = cpd_end_dt for evdef in cpd.iter_evdef(): fn = "{0}_{1}.pdf".format(evdef.host, evdef.gid) ret = cpd.get(evdef, top_dt, end_dt) if ret is None: continue l_label, l_data, l_score = zip(*ret) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt fig = plt.figure() ax = fig.add_subplot(111) ax.plot(l_label, l_data, "r") ax2 = ax.twinx() ax2.plot(l_label, l_score) import matplotlib.dates as mdates days = mdates.WeekdayLocator() daysFmt = mdates.DateFormatter('%m-%d') ax.xaxis.set_major_locator(days) ax.xaxis.set_major_formatter(daysFmt) plt.savefig(output_dirname + "/" + fn) plt.close()
def replace_edict(conf, edict, evmap, ld, top_dt, end_dt, area): #def resize(data, top_dt, end_dt, binsize): # length = int((top_dt - end_dt).total_seconds() / \ # binsize.total_seconds()) # return data[-length:] def revert_event(data, top_dt, end_dt, binsize): assert top_dt + len(data) * binsize == end_dt return [top_dt + i * binsize for i, val in enumerate(data) if val > 0] ret_edict = copy.deepcopy(edict) ret_evmap = _copy_evmap(evmap) s_eid_periodic = set() for dt_cond in conf.gettuple("filter", "dt_cond"): dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")] if dt_length == top_dt - end_dt: temp_edict = edict else: temp_edict = resize_edict(ld, evmap, end_dt, dt_length, area) #temp_edict = sample_edict(ld, evmap, end_dt, dt_length, area) d_stat = event2stat(temp_edict, top_dt, end_dt, binsize, binarize=False) for eid, l_stat in d_stat.iteritems(): if eid in s_eid_periodic or\ not fourier.pretest(conf, l_stat, binsize): pass else: _logger.debug("periodicity test for eid {0}".format(eid)) flag, remain_data, interval = fourier.replace( conf, l_stat, binsize) if flag: _logger.debug("eid {0} is periodic ({1}, {2})".format( eid, dt_length, binsize)) s_eid_periodic.add(eid) if sum(remain_data) == 0: _logger.debug( "remove eid {0} from dataset".format(eid)) ret_edict.pop(eid) ret_evmap.pop(eid) else: _logger.debug("replace eid {0} (count:{1})".format( eid, sum(remain_data))) ret_edict[eid] = revert_event(remain_data, top_dt, end_dt, binsize) ret_evmap.update_event( eid, evmap.info(eid), EventDefinitionMap.type_periodic_remainder, int(interval.total_seconds())) else: pass return _remap_eid(ret_edict, ret_evmap)
def heat_score(conf, dur, filename): import numpy as np length = config.str2dur(dur) dirname = conf.get("changepoint", "temp_cp_data") cpd = ChangePointData(dirname) cpd.load() cpd_top_dt, cpd_end_dt = cpd.term() top_dt = cpd_end_dt - length if top_dt < cpd.term()[0]: top_dt = cpd.term()[0] end_dt = cpd_end_dt result = [] for evdef in cpd.iter_evdef(): l_label, l_data, l_score = zip(*cpd.get(evdef, top_dt, end_dt)) result.append(l_score) else: xlen = len(l_label) ylen = cpd.len_evdef() data = np.array(result) #data = np.array([[np.log(float(score)) for score in l_score] # for l_score in result]) length = len(l_label) print xlen print ylen x, y = np.meshgrid(np.arange(xlen + 1), np.arange(ylen + 1)) print x print y import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import explot import matplotlib.colors cm = explot.generate_cmap(["orangered", "white"]) #plt.pcolormesh(x, y, data, cmap = cm) plt.pcolormesh(x, y, data, norm=matplotlib.colors.LogNorm(vmin=max(data.min(), 1.0), vmax=data.max()), cmap=cm) xt_v, xt_l = explot.dt_ticks((0, xlen), (top_dt, end_dt), cpd.binsize(), recent=True) #import pdb; pdb.set_trace() plt.xticks(xt_v, xt_l, rotation=336) plt.xlim(xmax=xlen) plt.ylim(ymax=ylen) plt.colorbar() plt.savefig(filename)
def heat_score(conf, dur, filename): import numpy as np length = config.str2dur(dur) dirname = conf.get("changepoint", "temp_cp_data") cpd = ChangePointData(dirname) cpd.load() cpd_top_dt, cpd_end_dt = cpd.term() top_dt = cpd_end_dt - length if top_dt < cpd.term()[0]: top_dt = cpd.term()[0] end_dt = cpd_end_dt result = [] for evdef in cpd.iter_evdef(): l_label, l_data, l_score = zip(*cpd.get(evdef, top_dt, end_dt)) result.append(l_score) else: xlen = len(l_label) ylen = cpd.len_evdef() data = np.array(result) #data = np.array([[np.log(float(score)) for score in l_score] # for l_score in result]) length = len(l_label) print xlen print ylen x, y = np.meshgrid(np.arange(xlen + 1), np.arange(ylen + 1)) print x print y import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt import explot import matplotlib.colors cm = explot.generate_cmap(["orangered", "white"]) #plt.pcolormesh(x, y, data, cmap = cm) plt.pcolormesh(x, y, data, norm=matplotlib.colors.LogNorm( vmin=max(data.min(), 1.0), vmax=data.max()), cmap = cm) xt_v, xt_l = explot.dt_ticks((0, xlen), (top_dt, end_dt), cpd.binsize(), recent = True) #import pdb; pdb.set_trace() plt.xticks(xt_v, xt_l, rotation = 336) plt.xlim(xmax = xlen) plt.ylim(ymax = ylen) plt.colorbar() plt.savefig(filename)
def __init__(self, conf, fflag): self.ld = log_db.LogData(conf) self.filename = conf.get("filter_self_corr", "indata_filename") w_term = conf.getterm("filter_self_corr", "term") if w_term is None: self.top_dt, self.end_dt = self.ld.whole_term() else: self.top_dt, self.end_dt = w_term self.l_dur = [config.str2dur(str_dur) for str_dur in conf.getlist("filter_self_corr", "dur")] self.binsize = conf.getdur("filter_self_corr", "bin_size") self.th = conf.getfloat("filter_self_corr", "threshold") self.d_result = {} self.d_info = {} self.fflag = fflag if self.loaded(): self.load()
def filter_edict_remove(conf, edict, evmap, ld, top_dt, end_dt, area, alg): def is_removed(conf, l_stat, binsize, alg): if alg == "fourier": return fourier.remove(conf, l_stat, binsize) elif alg == "corr": return evfilter.remove_corr(conf, l_stat, binsize) ret_edict = copy.deepcopy(edict) ret_evmap = _copy_evmap(evmap) s_eid_periodic = set() for dt_cond in conf.gettuple("filter", "dt_cond"): dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")] if dt_length == top_dt - end_dt: temp_edict = edict else: temp_edict = resize_edict(ld, evmap, end_dt, dt_length, area) d_stat = event2stat(temp_edict, top_dt, end_dt, binsize, binarize=False) for eid, l_stat in d_stat.iteritems(): _logger.debug("periodicity test for eid {0} {1}".format( eid, evmap.info_str(eid))) if eid in s_eid_periodic or\ not fourier.pretest(conf, l_stat, binsize): pass else: #flag, interval = fourier.remove(conf, l_stat, binsize) flag, interval = is_removed(conf, l_stat, binsize, alg) if flag: _logger.debug("eid {0} is periodic ({1}, {2})".format( eid, interval, binsize)) s_eid_periodic.add(eid) for eid in s_eid_periodic: _logger.debug("remove eid {0} from dataset".format(eid)) ret_edict.pop(eid) ret_evmap.pop(eid) return _remap_eid(ret_edict, ret_evmap)