Beispiel #1
0
    def update(self, conf):
        ld = log_db.LogData(conf)
        db_top_dt, db_end_dt = ld.dt_term()
        if self._end_dt is not None and \
                self._end_dt + self._binsize < db_end_dt:
            _logger.warning("New data is too small or not found")
            return

        self._evmap = log2event.generate_evmap(conf, ld, None, None)

        if self._end_dt is None:
            top_dt = dtutil.adj_sep(db_top_dt, self._binsize)
        else:
            top_dt = self._end_dt
        # The last bin will not be added, because it may be uncompleted
        end_dt = dtutil.adj_sep(db_end_dt, self._binsize)
        l_label = dtutil.label(top_dt, end_dt, self._binsize)
        _logger.info("updating changepoint data ({0} - {1})".format(
            top_dt, end_dt))

        for eid in self._evmap.iter_eid():
            evdef = self._evmap.info(eid)
            _logger.info("processing {0}".format(self._evmap.info_str(eid)))

            cf = self._load_cf(evdef)
            l_data, l_score = self._load_data(evdef)
            if cf is None:
                cf = self._new_cf()
                l_data = []
                l_score = []

            l_dt = [
                line.dt for line in ld.iter_lines(
                    **self._evmap.iterline_args(eid, top_dt, end_dt))
            ]
            if len(l_dt) > 0:
                _logger.info("{0} messages in given term".format(len(l_dt)))
                l_val = dtutil.discretize(l_dt, l_label, binarize=False)
                for val in l_val:
                    l_data.append(val)
                    score = cf.update(val)
                    l_score.append(score)
                self._dump_cf(evdef, cf)
                self._dump_data(evdef, l_data, l_score)
            else:
                _logger.info("no message found in processing term, passed")

        self._end_dt = end_dt
        self._dt_label += l_label
        if self._top_dt is None:
            self._top_dt = top_dt
        _logger.info("task completed")
    def update(self, conf):
        ld = log_db.LogData(conf)
        db_top_dt, db_end_dt = ld.dt_term()
        if self._end_dt is not None and \
                self._end_dt + self._binsize < db_end_dt:
            _logger.warning("New data is too small or not found")
            return

        self._evmap = log2event.generate_evmap(conf, ld, None, None)

        if self._end_dt is None:
            top_dt = dtutil.adj_sep(db_top_dt, self._binsize)
        else:
            top_dt = self._end_dt
        # The last bin will not be added, because it may be uncompleted
        end_dt = dtutil.adj_sep(db_end_dt, self._binsize)
        l_label = dtutil.label(top_dt, end_dt, self._binsize)
        _logger.info("updating changepoint data ({0} - {1})".format(
                top_dt, end_dt))

        for eid in self._evmap.iter_eid():
            evdef = self._evmap.info(eid)
            _logger.info("processing {0}".format(self._evmap.info_str(eid)))

            cf = self._load_cf(evdef)
            l_data, l_score = self._load_data(evdef)
            if cf is None:
                cf = self._new_cf()
                l_data = []
                l_score = []

            l_dt = [line.dt for line in ld.iter_lines(
                    **self._evmap.iterline_args(eid, top_dt, end_dt))]
            if len(l_dt) > 0:
                _logger.info("{0} messages in given term".format(len(l_dt)))
                l_val = dtutil.discretize(l_dt, l_label, binarize = False)
                for val in l_val:
                    l_data.append(val)
                    score = cf.update(val)
                    l_score.append(score)
                self._dump_cf(evdef, cf)
                self._dump_data(evdef, l_data, l_score)
            else:
                _logger.info("no message found in processing term, passed")


        self._end_dt = end_dt
        self._dt_label += l_label
        if self._top_dt is None:
            self._top_dt = top_dt
        _logger.info("task completed")
 def get(self, evdef, top_dt = None, end_dt = None):
     if top_dt is None:
         top_dt = self._top_dt
     if end_dt is None:
         end_dt = self._end_dt
     l_label = self._dt_label
     l_data, l_score = self._load_data(evdef)
     if len(l_data) == 0:
         return None
     if dtutil.is_sep(top_dt, self._binsize):
         top_index = l_label.index(top_dt)
     else:
         top_index = l_label.index(dtutil.adj_sep(top_dt, self._binsize))
     if dtutil.is_sep(end_dt, self._binsize):
         end_index = l_label.index(end_dt)
     else:
         end_index = l_label.index(dtutil.radj_sep(end_dt, self._binsize))
     return zip(self._dt_label, l_data, l_score)[top_index:end_index]
Beispiel #4
0
 def get(self, evdef, top_dt=None, end_dt=None):
     if top_dt is None:
         top_dt = self._top_dt
     if end_dt is None:
         end_dt = self._end_dt
     l_label = self._dt_label
     l_data, l_score = self._load_data(evdef)
     if len(l_data) == 0:
         return None
     if dtutil.is_sep(top_dt, self._binsize):
         top_index = l_label.index(top_dt)
     else:
         top_index = l_label.index(dtutil.adj_sep(top_dt, self._binsize))
     if dtutil.is_sep(end_dt, self._binsize):
         end_index = l_label.index(end_dt)
     else:
         end_index = l_label.index(dtutil.radj_sep(end_dt, self._binsize))
     return zip(self._dt_label, l_data, l_score)[top_index:end_index]
Beispiel #5
0
def dt_ticks(value_term, dt_term, dt_bin, duration = None, recent = False):
    """
    Args:
        value_term (int, int)
        dt_term (datetime.datetime, datetime.datetime):
                The range of datetime for ticks, corresponding to value_term.
        dt_bin (datetime.timedelta): Bin length in datetime.
        duration (datetime.timedelta): Duration of ticks. If None,
                automatically decided in this function.
        recent (bool): In default, ticks are decided from old datetime.
                If recent is true, ticks are decided from recent datetime.
    """
    import dtutil
    top_dt, end_dt = dt_term
    if duration is None:
        whole_term = end_dt - top_dt
        if whole_term <= datetime.timedelta(minutes = 3):
            duration = datetime.timedelta(seconds = 10)
        elif whole_term <= datetime.timedelta(minutes = 10):
            duration = datetime.timedelta(minutes = 1)
        elif whole_term <= datetime.timedelta(minutes = 30):
            duration = datetime.timedelta(minutes = 3)
        elif whole_term <= datetime.timedelta(hours = 1):
            duration = datetime.timedelta(minutes = 5)
        elif whole_term <= datetime.timedelta(hours = 6):
            duration = datetime.timedelta(minutes = 10)
        elif whole_term <= datetime.timedelta(days = 1):
            duration = datetime.timedelta(hours = 6)
        elif whole_term <= datetime.timedelta(days = 7):
            duration = datetime.timedelta(days = 1)
        elif whole_term <= datetime.timedelta(days = 14):
            duration = datetime.timedelta(days = 2)
        elif whole_term <= datetime.timedelta(days = 60):
            duration = datetime.timedelta(days = 7)
        elif whole_term <= datetime.timedelta(days = 300):
            duration = datetime.timedelta(days = 20)
        else:
            duration = datetime.timedelta(days = 50)

    if duration >= datetime.timedelta(days = 1):
        adjsearch_dur = datetime.timedelta(days = 1)
        dtstr = lambda dt: dt.strftime("%Y-%m-%d")
    else:
        adjsearch_dur = duration
        dtstr = lambda dt: dt.strftime("%H:%M:%S")

    ticks_label = []
    if recent is False:
        temp_dt = dtutil.radj_sep(top_dt, adjsearch_dur)
        assert(temp_dt <= end_dt)
        ticks_label.append(temp_dt)
        while 1:
            temp_dt = temp_dt + duration
            if temp_dt > end_dt:
                break
            else:
                ticks_label.append(temp_dt)
    else:
        temp_dt = dtutil.adj_sep(end_dt, adjsearch_dur)
        assert(temp_dt >= top_dt)
        ticks_label.append(temp_dt)
        while 1:
            temp_dt = temp_dt - duration
            if temp_dt < top_dt:
                break
            else:
                ticks_label.append(temp_dt)
        ticks_label.sort()

    val_label = range(value_term[0], value_term[1] + 1, 1)
    dt_label = dtutil.dtrange(top_dt, end_dt, dt_bin, include_end = True)
    ticks_values = []
    ticks_dts = ticks_label[:]
    for val, dt in zip(val_label, dt_label):
        if dt == ticks_dts[0]:
            ticks_dts.pop(0)
            ticks_values.append(val)
            if len(ticks_dts) == 0:
                break
    return ticks_values, [dtstr(l) for l in ticks_label]