def _load(self, metrix): """ Load time series. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(metrix, TimeSeries): return metrix if isinstance(metrix, dict): return TimeSeries(metrix) return TimeSeries(utils.read_csv(metrix))
def _load(self, time_series): """ Load time series into a TimeSeries object. :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if isinstance(time_series, TimeSeries): return time_series if isinstance(time_series, dict): return TimeSeries(time_series) return TimeSeries(utils.read_csv(time_series))
def _load(self, time_series): """ Load time series. :param time_series: a TimeSeries, a dictionary or a path to a csv file(str). :return TimeSeries: a TimeSeries object. """ if not len(time_series): return None if isinstance(time_series, TimeSeries): return time_series if isinstance(time_series, dict): return TimeSeries(time_series) return TimeSeries(time_series)
def cvtTimeSeries(self, ts): if not isinstance(ts, TimeSeries): if isinstance(ts, list): tmp = {} for i, item in enumerate(ts): tmp[i] = item return TimeSeries(tmp) return ts
def _compute_anom_data_decay_all(self): """ Compute anomaly scores using a lagging window covering all the data points before. """ anom_scores = {} values = self.time_series.values ema = utils.compute_ema(self.smoothing_factor, values) stdev = numpy.std(values) for i, (timestamp, value) in enumerate(self.time_series_items): anom_score = abs((value - ema[i]) / stdev) if stdev else value - ema[i] anom_scores[timestamp] = anom_score self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def _set_scores(self): """ Compute anomaly scores for the time series This algorithm just takes the diff of threshold with current value as anomaly score """ anom_scores = {} for timestamp, value in self.time_series.items(): anom_scores[timestamp] = 0.0 if self.absolute_threshold_value_upper and value > self.absolute_threshold_value_upper: anom_scores[timestamp] = value - self.absolute_threshold_value_upper if self.absolute_threshold_value_lower and value < self.absolute_threshold_value_lower: anom_scores[timestamp] = self.absolute_threshold_value_lower - value self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def _set_scores(self): """ Compute anomaly scores for the time series. """ anom_scores = {} self._compute_derivatives() derivatives_ema = utils.compute_ema(self.smoothing_factor, self.derivatives) for i, (timestamp, value) in enumerate(self.time_series_items): anom_scores[timestamp] = abs(self.derivatives[i] - derivatives_ema[i]) stdev = numpy.std(list(anom_scores.values())) if stdev: for timestamp in anom_scores.keys(): anom_scores[timestamp] /= stdev self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def _set_scores(self): """ Set anomaly scores using a weighted sum. """ anom_scores_ema = self.exp_avg_detector.run() anom_scores_deri = self.derivative_detector.run() anom_scores = {} for timestamp in anom_scores_ema.timestamps: # Compute a weighted anomaly score. anom_scores[timestamp] = max(anom_scores_ema[timestamp], anom_scores_ema[timestamp] * DEFAULT_DETECTOR_EMA_WEIGHT \ + anom_scores_deri[timestamp] * (1 - DEFAULT_DETECTOR_EMA_WEIGHT)) # If ema score is significant enough, take the bigger one of the weighted score and deri score. if anom_scores_ema[timestamp] > DEFAULT_DETECTOR_EMA_SIGNIFICANT: anom_scores[timestamp] = max(anom_scores[timestamp], anom_scores_deri[timestamp]) self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def _compute_anom_data_using_window(self): """ Compute anomaly scores using a lagging window. """ anom_scores = {} values = self.time_series.values stdev = numpy.std(values) for i, (timestamp, value) in enumerate(self.time_series_items): if i < self.lag_window_size: anom_score = self._compute_anom_score(values[:i + 1], value) else: anom_score = self._compute_anom_score(values[i - self.lag_window_size: i + 1], value) if stdev: anom_scores[timestamp] = anom_score / stdev else: anom_scores[timestamp] = anom_score self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def _set_scores(self): """ Compute anomaly scores for the time series by sliding both lagging window and future window. """ anom_scores = {} self._generate_SAX() self._construct_all_SAX_chunk_dict() length = self.time_series_length lws = self.lag_window_size fws = self.future_window_size for i, timestamp in enumerate(self.time_series.timestamps): if i < lws or i > length - fws: anom_scores[timestamp] = 0 else: anom_scores[timestamp] = self._compute_anom_score_between_two_windows(i) self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def fit(self, X): """Fit detector. Parameters ---------- X : dataframe of shape (n_samples, n_features) The input samples. """ # a=str(ts[:,0]) X = X.to_numpy() timestamp = np.asarray(X[:, 0].astype(np.datetime64)) pca = IncrementalPCA(n_components=1) value = np.reshape(pca.fit_transform(X[:, 1:]), -1) X = pd.Series(value, index=timestamp) X.index = X.index.map(lambda d: to_epoch(str(d))) lts = TimeSeries(X.to_dict()) self.ts = timestamp self.ts_value = value self.detector = anomaly_detector.AnomalyDetector(lts) return self
def _set_scores(self): """ Compute anomaly scores for the time series This algorithm just takes the diff of threshold with current value as anomaly score """ anom_scores = {} for i, (timestamp, value) in enumerate(self.time_series.items()): baseline_value = self.baseline_time_series[i] if baseline_value > 0: diff_percent = 100 * (value - baseline_value) / baseline_value elif value > 0: diff_percent = 100.0 else: diff_percent = 0.0 anom_scores[timestamp] = 0.0 if self.percent_threshold_upper and diff_percent > 0 and diff_percent > self.percent_threshold_upper: anom_scores[timestamp] = diff_percent if self.percent_threshold_lower and diff_percent < 0 and diff_percent < self.percent_threshold_lower: anom_scores[timestamp] = -1 * diff_percent self.anom_scores = TimeSeries(self._denoise_scores(anom_scores))
def _set_scores(self): """ Compute anomaly scores for the time series anomaly regions are computed with sign test which also assigns a likelihood to the entire region """ scores = np.zeros(len(self.time_series.values)) anomalies = SignTest._rolling_sign_test( self.scale * np.array(self.time_series.values), self.scale * np.array(self.baseline_time_series.values), k=self.scan_window, conf=self.confidence, alpha=float(self.percent_threshold) / 100, offset=self.scale * self.offset) for (s, e), prob in anomalies: scores[s:e] = 100 * prob scores_dict = dict() for i, timestamp in enumerate(self.time_series.timestamps): scores_dict[timestamp] = scores[i] self.anom_scores = TimeSeries(scores_dict)
def real_anomaly_detection(samples, smoothing_factor): ts = TimeSeries(samples) detector = anomaly_detector.AnomalyDetector( ts, algorithm_name="derivative_detector", algorithm_params={ 'smoothing_factor': smoothing_factor # smoothing factor used to compute exponential moving averages }) anomalies = detector.get_anomalies() anomaly_times = [] for a in anomalies: start = a.get_time_window()[0] end = a.get_time_window()[1] while (start <= end): anomaly_times.append(start) start += 1 print anomaly_times return anomaly_times