Example #1
0
def periodic_events(conf, ld, top_dt, end_dt, area, edict, evmap):

    l_sampling_term = [config.str2dur(diffstr) for diffstr
            in conf.gettuple("filter", "sampling_term")]
    if len(l_sampling_term) == 0:
        raise ValueError("configuration error in filter.sampling_term")
    search_interval = conf.getboolean("filter", "search_interval")
    corr_th = conf.getfloat("filter", "self_corr_th")
    corr_bin = conf.getdur("filter", "self_corr_bin")
    corr_diff = [config.str2dur(diffstr) for diffstr
            in conf.gettuple("filter", "self_corr_diff")]

    l_sample_top_dt = [end_dt - tdelta for tdelta in sorted(l_sampling_term)]
    sample_top_dt_max = l_sample_top_dt[-1]

    # prepare time-series of sampling term
    sample_edict = {}
    iterobj = ld.iter_lines(top_dt = sample_top_dt_max, end_dt = end_dt,
            area = area)
    for line in iterobj:
        eid = evmap.process_line(line)
        sample_edict.setdefault(eid, []).append(line.dt)

    # determine interval candidate 
    if search_interval:
        new_corr_diff = set()
        p_cnt = conf.getint("filter", "periodic_count")
        p_term = conf.getdur("filter", "periodic_term")
        p_th = conf.getfloat("filter", "periodic_th")
        for sample_top_dt in l_sample_top_dt: 
            for eid, l_dt in sample_edict.iteritems():
                l_dt = list(dtutil.limit_dt_seq(l_dt, sample_top_dt, end_dt))
                if is_enough_long(l_dt, p_cnt, p_term):
                    diff = interval(l_dt, p_th)
                    if diff is not None:
                        new_corr_diff.add(datetime.timedelta(seconds = diff))
        new_corr_diff.update(set(corr_diff))
        corr_diff = list(new_corr_diff)

    # get periodic events
    ret = []
    for eid, l_dt in sample_edict.iteritems():
        l_result = []
        for sample_top_dt in l_sample_top_dt:
            for diff in corr_diff:
                l_dt = list(dtutil.limit_dt_seq(l_dt, sample_top_dt, end_dt))
                if len(l_dt) == 0:
                    continue
                data = dtutil.auto_discretize(l_dt, corr_bin)
                corr = self_corr(data, diff, corr_bin)
                l_result.append((corr, diff))
        max_corr, max_diff = max(l_result)
        if max_corr >= corr_th:
            _logger.debug("Event {0} is periodic (interval: {1})".format(
                    eid, max_diff))
            ret.append((eid, max_diff))
    return ret
Example #2
0
def graph_cp(conf, dur, output_dirname):
    common.mkdir(output_dirname)
    length = config.str2dur(dur)
    dirname = conf.get("changepoint", "temp_cp_data")
    cpd = ChangePointData(dirname)
    cpd.load()
    cpd_top_dt, cpd_end_dt = cpd.term()
    top_dt = cpd_end_dt - length
    if top_dt < cpd.term()[0]:
        top_dt = cpd.term()[0]
    end_dt = cpd_end_dt

    for evdef in cpd.iter_evdef():
        fn = "{0}_{1}.pdf".format(evdef.host, evdef.gid)
        ret = cpd.get(evdef, top_dt, end_dt)
        if ret is None:
            continue
        l_label, l_data, l_score = zip(*ret)

        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(l_label, l_data, "r")
        ax2 = ax.twinx()
        ax2.plot(l_label, l_score)
        import matplotlib.dates as mdates
        days = mdates.WeekdayLocator()
        daysFmt = mdates.DateFormatter('%m-%d')
        ax.xaxis.set_major_locator(days)
        ax.xaxis.set_major_formatter(daysFmt)
        plt.savefig(output_dirname + "/" + fn)
        plt.close()
Example #3
0
def graph_cp(conf, dur, output_dirname):
    common.mkdir(output_dirname)
    length = config.str2dur(dur)
    dirname = conf.get("changepoint", "temp_cp_data")
    cpd = ChangePointData(dirname)
    cpd.load()
    cpd_top_dt, cpd_end_dt = cpd.term()
    top_dt = cpd_end_dt - length
    if top_dt < cpd.term()[0]:
        top_dt = cpd.term()[0]
    end_dt = cpd_end_dt

    for evdef in cpd.iter_evdef():
        fn = "{0}_{1}.pdf".format(evdef.host, evdef.gid)
        ret = cpd.get(evdef, top_dt, end_dt)
        if ret is None:
            continue
        l_label, l_data, l_score = zip(*ret)

        import matplotlib
        matplotlib.use('Agg')
        import matplotlib.pyplot as plt
        fig = plt.figure()
        ax = fig.add_subplot(111)
        ax.plot(l_label, l_data, "r")
        ax2 = ax.twinx()
        ax2.plot(l_label, l_score)
        import matplotlib.dates as mdates
        days = mdates.WeekdayLocator()
        daysFmt = mdates.DateFormatter('%m-%d')
        ax.xaxis.set_major_locator(days)
        ax.xaxis.set_major_formatter(daysFmt)
        plt.savefig(output_dirname + "/" + fn)
        plt.close()
Example #4
0
def replace_edict(conf, edict, evmap, ld, top_dt, end_dt, area):

    #def resize(data, top_dt, end_dt, binsize):
    #    length = int((top_dt - end_dt).total_seconds() / \
    #            binsize.total_seconds())
    #    return data[-length:]

    def revert_event(data, top_dt, end_dt, binsize):
        assert top_dt + len(data) * binsize == end_dt
        return [top_dt + i * binsize for i, val in enumerate(data) if val > 0]

    ret_edict = copy.deepcopy(edict)
    ret_evmap = _copy_evmap(evmap)
    s_eid_periodic = set()

    for dt_cond in conf.gettuple("filter", "dt_cond"):
        dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")]
        if dt_length == top_dt - end_dt:
            temp_edict = edict
        else:
            temp_edict = resize_edict(ld, evmap, end_dt, dt_length, area)
            #temp_edict = sample_edict(ld, evmap, end_dt, dt_length, area)
        d_stat = event2stat(temp_edict,
                            top_dt,
                            end_dt,
                            binsize,
                            binarize=False)
        for eid, l_stat in d_stat.iteritems():
            if eid in s_eid_periodic or\
                    not fourier.pretest(conf, l_stat, binsize):
                pass
            else:
                _logger.debug("periodicity test for eid {0}".format(eid))
                flag, remain_data, interval = fourier.replace(
                    conf, l_stat, binsize)
                if flag:
                    _logger.debug("eid {0} is periodic ({1}, {2})".format(
                        eid, dt_length, binsize))
                    s_eid_periodic.add(eid)
                    if sum(remain_data) == 0:
                        _logger.debug(
                            "remove eid {0} from dataset".format(eid))
                        ret_edict.pop(eid)
                        ret_evmap.pop(eid)
                    else:
                        _logger.debug("replace eid {0} (count:{1})".format(
                            eid, sum(remain_data)))
                        ret_edict[eid] = revert_event(remain_data, top_dt,
                                                      end_dt, binsize)
                        ret_evmap.update_event(
                            eid, evmap.info(eid),
                            EventDefinitionMap.type_periodic_remainder,
                            int(interval.total_seconds()))
                else:
                    pass

    return _remap_eid(ret_edict, ret_evmap)
Example #5
0
def heat_score(conf, dur, filename):
    import numpy as np
    length = config.str2dur(dur)
    dirname = conf.get("changepoint", "temp_cp_data")
    cpd = ChangePointData(dirname)
    cpd.load()
    cpd_top_dt, cpd_end_dt = cpd.term()
    top_dt = cpd_end_dt - length
    if top_dt < cpd.term()[0]:
        top_dt = cpd.term()[0]
    end_dt = cpd_end_dt

    result = []
    for evdef in cpd.iter_evdef():
        l_label, l_data, l_score = zip(*cpd.get(evdef, top_dt, end_dt))
        result.append(l_score)
    else:
        xlen = len(l_label)
        ylen = cpd.len_evdef()
    data = np.array(result)
    #data = np.array([[np.log(float(score)) for score in l_score]
    #        for l_score in result])

    length = len(l_label)
    print xlen
    print ylen
    x, y = np.meshgrid(np.arange(xlen + 1), np.arange(ylen + 1))
    print x
    print y

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import explot
    import matplotlib.colors
    cm = explot.generate_cmap(["orangered", "white"])
    #plt.pcolormesh(x, y, data, cmap = cm)
    plt.pcolormesh(x,
                   y,
                   data,
                   norm=matplotlib.colors.LogNorm(vmin=max(data.min(), 1.0),
                                                  vmax=data.max()),
                   cmap=cm)
    xt_v, xt_l = explot.dt_ticks((0, xlen), (top_dt, end_dt),
                                 cpd.binsize(),
                                 recent=True)
    #import pdb; pdb.set_trace()
    plt.xticks(xt_v, xt_l, rotation=336)
    plt.xlim(xmax=xlen)
    plt.ylim(ymax=ylen)
    plt.colorbar()
    plt.savefig(filename)
Example #6
0
def heat_score(conf, dur, filename):
    import numpy as np
    length = config.str2dur(dur)
    dirname = conf.get("changepoint", "temp_cp_data")
    cpd = ChangePointData(dirname)
    cpd.load()
    cpd_top_dt, cpd_end_dt = cpd.term()
    top_dt = cpd_end_dt - length
    if top_dt < cpd.term()[0]:
        top_dt = cpd.term()[0]
    end_dt = cpd_end_dt

    result = []
    for evdef in cpd.iter_evdef():
        l_label, l_data, l_score = zip(*cpd.get(evdef, top_dt, end_dt))
        result.append(l_score)
    else:
        xlen = len(l_label)
        ylen = cpd.len_evdef()
    data = np.array(result)
    #data = np.array([[np.log(float(score)) for score in l_score]
    #        for l_score in result])

    length = len(l_label)
    print xlen
    print ylen
    x, y = np.meshgrid(np.arange(xlen + 1), np.arange(ylen + 1))
    print x
    print y

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    import explot
    import matplotlib.colors
    cm = explot.generate_cmap(["orangered", "white"])
    #plt.pcolormesh(x, y, data, cmap = cm)
    plt.pcolormesh(x, y, data, norm=matplotlib.colors.LogNorm(
            vmin=max(data.min(), 1.0), vmax=data.max()), cmap = cm)
    xt_v, xt_l = explot.dt_ticks((0, xlen), (top_dt, end_dt),
            cpd.binsize(), recent = True)
    #import pdb; pdb.set_trace()
    plt.xticks(xt_v, xt_l, rotation = 336)
    plt.xlim(xmax = xlen)
    plt.ylim(ymax = ylen)
    plt.colorbar()
    plt.savefig(filename)
Example #7
0
    def __init__(self, conf, fflag):
        self.ld = log_db.LogData(conf)
        self.filename = conf.get("filter_self_corr", "indata_filename")
        w_term = conf.getterm("filter_self_corr", "term")
        if w_term is None:
            self.top_dt, self.end_dt = self.ld.whole_term()
        else:
            self.top_dt, self.end_dt = w_term
        self.l_dur = [config.str2dur(str_dur) for str_dur
                in conf.getlist("filter_self_corr", "dur")]
        self.binsize = conf.getdur("filter_self_corr", "bin_size")
        self.th = conf.getfloat("filter_self_corr", "threshold")

        self.d_result = {}
        self.d_info = {}

        self.fflag = fflag
        if self.loaded():
            self.load()
Example #8
0
def filter_edict_remove(conf, edict, evmap, ld, top_dt, end_dt, area, alg):
    def is_removed(conf, l_stat, binsize, alg):
        if alg == "fourier":
            return fourier.remove(conf, l_stat, binsize)
        elif alg == "corr":
            return evfilter.remove_corr(conf, l_stat, binsize)

    ret_edict = copy.deepcopy(edict)
    ret_evmap = _copy_evmap(evmap)
    s_eid_periodic = set()

    for dt_cond in conf.gettuple("filter", "dt_cond"):
        dt_length, binsize = [config.str2dur(s) for s in dt_cond.split("_")]
        if dt_length == top_dt - end_dt:
            temp_edict = edict
        else:
            temp_edict = resize_edict(ld, evmap, end_dt, dt_length, area)
        d_stat = event2stat(temp_edict,
                            top_dt,
                            end_dt,
                            binsize,
                            binarize=False)
        for eid, l_stat in d_stat.iteritems():
            _logger.debug("periodicity test for eid {0} {1}".format(
                eid, evmap.info_str(eid)))
            if eid in s_eid_periodic or\
                    not fourier.pretest(conf, l_stat, binsize):
                pass
            else:
                #flag, interval = fourier.remove(conf, l_stat, binsize)
                flag, interval = is_removed(conf, l_stat, binsize, alg)
                if flag:
                    _logger.debug("eid {0} is periodic ({1}, {2})".format(
                        eid, interval, binsize))
                    s_eid_periodic.add(eid)

    for eid in s_eid_periodic:
        _logger.debug("remove eid {0} from dataset".format(eid))
        ret_edict.pop(eid)
        ret_evmap.pop(eid)

    return _remap_eid(ret_edict, ret_evmap)