def update(self, conf): ld = log_db.LogData(conf) db_top_dt, db_end_dt = ld.dt_term() if self._end_dt is not None and \ self._end_dt + self._binsize < db_end_dt: _logger.warning("New data is too small or not found") return self._evmap = log2event.generate_evmap(conf, ld, None, None) if self._end_dt is None: top_dt = dtutil.adj_sep(db_top_dt, self._binsize) else: top_dt = self._end_dt # The last bin will not be added, because it may be uncompleted end_dt = dtutil.adj_sep(db_end_dt, self._binsize) l_label = dtutil.label(top_dt, end_dt, self._binsize) _logger.info("updating changepoint data ({0} - {1})".format( top_dt, end_dt)) for eid in self._evmap.iter_eid(): evdef = self._evmap.info(eid) _logger.info("processing {0}".format(self._evmap.info_str(eid))) cf = self._load_cf(evdef) l_data, l_score = self._load_data(evdef) if cf is None: cf = self._new_cf() l_data = [] l_score = [] l_dt = [ line.dt for line in ld.iter_lines( **self._evmap.iterline_args(eid, top_dt, end_dt)) ] if len(l_dt) > 0: _logger.info("{0} messages in given term".format(len(l_dt))) l_val = dtutil.discretize(l_dt, l_label, binarize=False) for val in l_val: l_data.append(val) score = cf.update(val) l_score.append(score) self._dump_cf(evdef, cf) self._dump_data(evdef, l_data, l_score) else: _logger.info("no message found in processing term, passed") self._end_dt = end_dt self._dt_label += l_label if self._top_dt is None: self._top_dt = top_dt _logger.info("task completed")
def update(self, conf): ld = log_db.LogData(conf) db_top_dt, db_end_dt = ld.dt_term() if self._end_dt is not None and \ self._end_dt + self._binsize < db_end_dt: _logger.warning("New data is too small or not found") return self._evmap = log2event.generate_evmap(conf, ld, None, None) if self._end_dt is None: top_dt = dtutil.adj_sep(db_top_dt, self._binsize) else: top_dt = self._end_dt # The last bin will not be added, because it may be uncompleted end_dt = dtutil.adj_sep(db_end_dt, self._binsize) l_label = dtutil.label(top_dt, end_dt, self._binsize) _logger.info("updating changepoint data ({0} - {1})".format( top_dt, end_dt)) for eid in self._evmap.iter_eid(): evdef = self._evmap.info(eid) _logger.info("processing {0}".format(self._evmap.info_str(eid))) cf = self._load_cf(evdef) l_data, l_score = self._load_data(evdef) if cf is None: cf = self._new_cf() l_data = [] l_score = [] l_dt = [line.dt for line in ld.iter_lines( **self._evmap.iterline_args(eid, top_dt, end_dt))] if len(l_dt) > 0: _logger.info("{0} messages in given term".format(len(l_dt))) l_val = dtutil.discretize(l_dt, l_label, binarize = False) for val in l_val: l_data.append(val) score = cf.update(val) l_score.append(score) self._dump_cf(evdef, cf) self._dump_data(evdef, l_data, l_score) else: _logger.info("no message found in processing term, passed") self._end_dt = end_dt self._dt_label += l_label if self._top_dt is None: self._top_dt = top_dt _logger.info("task completed")
def get(self, evdef, top_dt = None, end_dt = None): if top_dt is None: top_dt = self._top_dt if end_dt is None: end_dt = self._end_dt l_label = self._dt_label l_data, l_score = self._load_data(evdef) if len(l_data) == 0: return None if dtutil.is_sep(top_dt, self._binsize): top_index = l_label.index(top_dt) else: top_index = l_label.index(dtutil.adj_sep(top_dt, self._binsize)) if dtutil.is_sep(end_dt, self._binsize): end_index = l_label.index(end_dt) else: end_index = l_label.index(dtutil.radj_sep(end_dt, self._binsize)) return zip(self._dt_label, l_data, l_score)[top_index:end_index]
def get(self, evdef, top_dt=None, end_dt=None): if top_dt is None: top_dt = self._top_dt if end_dt is None: end_dt = self._end_dt l_label = self._dt_label l_data, l_score = self._load_data(evdef) if len(l_data) == 0: return None if dtutil.is_sep(top_dt, self._binsize): top_index = l_label.index(top_dt) else: top_index = l_label.index(dtutil.adj_sep(top_dt, self._binsize)) if dtutil.is_sep(end_dt, self._binsize): end_index = l_label.index(end_dt) else: end_index = l_label.index(dtutil.radj_sep(end_dt, self._binsize)) return zip(self._dt_label, l_data, l_score)[top_index:end_index]
def dt_ticks(value_term, dt_term, dt_bin, duration = None, recent = False): """ Args: value_term (int, int) dt_term (datetime.datetime, datetime.datetime): The range of datetime for ticks, corresponding to value_term. dt_bin (datetime.timedelta): Bin length in datetime. duration (datetime.timedelta): Duration of ticks. If None, automatically decided in this function. recent (bool): In default, ticks are decided from old datetime. If recent is true, ticks are decided from recent datetime. """ import dtutil top_dt, end_dt = dt_term if duration is None: whole_term = end_dt - top_dt if whole_term <= datetime.timedelta(minutes = 3): duration = datetime.timedelta(seconds = 10) elif whole_term <= datetime.timedelta(minutes = 10): duration = datetime.timedelta(minutes = 1) elif whole_term <= datetime.timedelta(minutes = 30): duration = datetime.timedelta(minutes = 3) elif whole_term <= datetime.timedelta(hours = 1): duration = datetime.timedelta(minutes = 5) elif whole_term <= datetime.timedelta(hours = 6): duration = datetime.timedelta(minutes = 10) elif whole_term <= datetime.timedelta(days = 1): duration = datetime.timedelta(hours = 6) elif whole_term <= datetime.timedelta(days = 7): duration = datetime.timedelta(days = 1) elif whole_term <= datetime.timedelta(days = 14): duration = datetime.timedelta(days = 2) elif whole_term <= datetime.timedelta(days = 60): duration = datetime.timedelta(days = 7) elif whole_term <= datetime.timedelta(days = 300): duration = datetime.timedelta(days = 20) else: duration = datetime.timedelta(days = 50) if duration >= datetime.timedelta(days = 1): adjsearch_dur = datetime.timedelta(days = 1) dtstr = lambda dt: dt.strftime("%Y-%m-%d") else: adjsearch_dur = duration dtstr = lambda dt: dt.strftime("%H:%M:%S") ticks_label = [] if recent is False: temp_dt = dtutil.radj_sep(top_dt, adjsearch_dur) assert(temp_dt <= end_dt) ticks_label.append(temp_dt) while 1: temp_dt = temp_dt + duration if temp_dt > end_dt: break else: ticks_label.append(temp_dt) else: temp_dt = dtutil.adj_sep(end_dt, adjsearch_dur) assert(temp_dt >= top_dt) ticks_label.append(temp_dt) while 1: temp_dt = temp_dt - duration if temp_dt < top_dt: break else: ticks_label.append(temp_dt) ticks_label.sort() val_label = range(value_term[0], value_term[1] + 1, 1) dt_label = dtutil.dtrange(top_dt, end_dt, dt_bin, include_end = True) ticks_values = [] ticks_dts = ticks_label[:] for val, dt in zip(val_label, dt_label): if dt == ticks_dts[0]: ticks_dts.pop(0) ticks_values.append(val) if len(ticks_dts) == 0: break return ticks_values, [dtstr(l) for l in ticks_label]