Ejemplo n.º 1
0
 def test_algorithm_params(self):
   """
   Test if optional parameter algorithm_params works as expected.
   """
   self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
   detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
   self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
Ejemplo n.º 2
0
 def test_algorithm_DefaultDetector(self):
   """
   Test if optional parameter algorithm works as expected.
   """
   detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
   self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
   self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)
Ejemplo n.º 3
0
def clean_anomaly2(df,
                   index_name="15分钟段",
                   var_name="实际功率",
                   anomaly_limit=1.5,
                   base_p=0.3):
    df_clean = []
    for g_name, g in df.groupby(index_name):
        temp = deepcopy(g).reset_index(drop=True)
        base_mean = np.median(temp[var_name])
        if base_mean > base_p:
            limit_low, limit_up = np.percentile(temp[var_name], [2, 99])
        else:
            limit_low, limit_up = np.percentile(temp[var_name], [1, 98])
        temp = temp[(temp[var_name] <= limit_up)
                    & (temp[var_name] >= limit_low)].reset_index(drop=True)
        ts = temp[var_name]
        ts_mean = np.mean(ts)
        ts_std = np.std(ts)
        ts = (ts - ts_mean) / ts_std
        if ts_std > 0:
            my_detector = AnomalyDetector(ts.to_dict(),
                                          algorithm_name='exp_avg_detector')
            score = my_detector.get_all_scores()
            df_clean.append(temp[np.array(score.values) < anomaly_limit])
        else:
            df_clean.append(temp)
    df_clean = pd.concat(df_clean, ignore_index=True)
    return df_clean
Ejemplo n.º 4
0
 def test_anomaly_threshold(self):
   """
   Test if score_percentile_threshold works as expected.
   """
   detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector')
   detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector')
   self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Ejemplo n.º 5
0
 def test_threshold(self):
     """
     Test score threshold=0
     """
     detector = AnomalyDetector(self.s1, score_threshold=0)
     self.assertTrue(len(detector.get_anomalies()) == 1)
     self.assertTrue(detector.get_anomalies() is not None)
Ejemplo n.º 6
0
 def test_threshold(self):
   """
   Test score threshold=0
   """
   detector = AnomalyDetector(self.s1, score_threshold=0)
   self.assertTrue(len(detector.get_anomalies()) == 1)
   self.assertTrue(detector.get_anomalies() is not None)
Ejemplo n.º 7
0
 def test_score_only(self):
   """
   Test that score_only parameter doesn't give anomalies
   """
   detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
   detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
   self.assertTrue(detector2.get_anomalies() is not None)
   self.assertTrue(len(detector.get_anomalies()) == 0)
Ejemplo n.º 8
0
 def test_custom_algorithm(self):
   """
   Test passing a custom algorithm class
   """
   detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                              algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
   anomalies = detector.get_anomalies()
   self.assertTrue(anomalies is not None)
   self.assertTrue(len(anomalies) > 0)
Ejemplo n.º 9
0
 def test_algorithm_DefaultDetector(self):
     """
 Test if optional parameter algorithm works as expected.
 """
     detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
     self.assertEqual(detector.get_all_scores().timestamps,
                      self.detector1.get_all_scores().timestamps)
     self.assertEqual(detector.get_all_scores().values,
                      self.detector1.get_all_scores().values)
Ejemplo n.º 10
0
 def __init__(self, metrix, related_metrices):
     """
 Initializer
 :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
 :param list related_metrixes: a list of time series.
 """
     self.metrix = self._load(metrix)
     self.anomaly_detector = AnomalyDetector(metrix)
     self.related_metrices = related_metrices
     self.anomalies = self.anomaly_detector.get_anomalies()
     self._analyze()
Ejemplo n.º 11
0
def get_anomaly_series(series: pd.Series,
                       algorithm: str = "bitmap_detector") -> pd.Series:
    assert algorithm in [
        "bitmap_detector", "derivative_detector", "exp_avg_detector"
    ]

    ts = series.to_dict()
    detector = AnomalyDetector(ts, algorithm_name=algorithm)
    scores = detector.get_all_scores()
    scores = [s for _, s in scores.iteritems()]
    return pd.Series(scores)
Ejemplo n.º 12
0
 def test_absolute_threshold_algorithm(self):
   """
   Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
   """
   detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                              algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                'absolute_threshold_value_lower': 0.2})
   anomalies = detector.get_anomalies()
   self.assertTrue(anomalies is not None)
   self.assertTrue(len(anomalies) > 0)
   self.assertRaises(exceptions.RequiredParametersNotPassed,
                     lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))
Ejemplo n.º 13
0
 def test_diff_percent_threshold_algorithm(self):
   """
   Test "diff percent threshold" algorithm with a threshold of 20%
   """
   detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                              algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
   anomalies = detector.get_anomalies()
   self.assertTrue(anomalies is not None)
   self.assertTrue(len(anomalies) > 0)
   self.assertRaises(exceptions.RequiredParametersNotPassed,
                     lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                             algorithm_name='diff_percent_threshold'))
Ejemplo n.º 14
0
 def isAnomaly(self, data):  # returns if last data point is anonaly
     length = len(data)
     try:
         detector = AnomalyDetector({i: data[i]
                                     for i in range(length)},
                                    algorithm_name='exp_avg_detector',
                                    score_threshold=self.score_threshold)
         anomalies = detector.get_anomalies()
         for anomaly in anomalies:
             if anomaly.exact_timestamp == length - 1:
                 return True, anomalies
     except:
         return False, []
     return False, anomalies
Ejemplo n.º 15
0
def get_anoms(anomalous_ts):

    if not anomalous_ts:
        return []

    anomalies = []
    try:
        anomaly_ts_dict = dict(anomalous_ts)
        my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5)
        anomalies = my_detector.get_anomalies()
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: AnomalyDetector')
    return anomalies
Ejemplo n.º 16
0
 def test_custom_algorithm(self):
     """
 Test passing a custom algorithm class
 """
     detector = AnomalyDetector(self.s1,
                                baseline_time_series=self.s2,
                                algorithm_class=CustomAlgo,
                                algorithm_params={
                                    'percent_threshold_upper': 20,
                                    'percent_threshold_lower': -20
                                })
     anomalies = detector.get_anomalies()
     self.assertTrue(anomalies is not None)
     self.assertTrue(len(anomalies) > 0)
Ejemplo n.º 17
0
 def test_algorithm_params(self):
     """
 Test if optional parameter algorithm_params works as expected.
 """
     self.assertRaises(
         ValueError,
         lambda: AnomalyDetector(self.s1,
                                 algorithm_name='exp_avg_detector',
                                 algorithm_params='0'))
     detector = AnomalyDetector(self.s1,
                                algorithm_name="exp_avg_detector",
                                algorithm_params={'smoothing_factor': 0.3})
     self.assertNotEqual(self.detector1.get_all_scores().values,
                         detector.get_all_scores().values)
    def detect(self, ts):
        my_detector = AnomalyDetector(ts)
        score = my_detector.get_all_scores()
        anom_score = []

        for (timestamp, value) in score.iteritems():
            t_str = time.strftime('%d-%b-%Y %H:%M:%S',
                                  time.localtime(timestamp))
            anom_score.append([t_str, value])
        overall_stats = {}

        for score in anom_score:
            overall_stats[score[0]] = score[1]
        return overall_stats
Ejemplo n.º 19
0
def get_anomalies(series: pd.Series,
                  algorithm: str = "bitmap_detector") -> List[dict]:
    assert algorithm in [
        "bitmap_detector", "derivative_detector", "exp_avg_detector"
    ]

    ts = series.to_dict()
    detector = AnomalyDetector(ts, algorithm_name=algorithm)
    anomalies = detector.get_anomalies()
    return [{
        "start_time": _.start_timestamp,
        "end_time": _.end_timestamp,
        "top_score_time": _.exact_timestamp,
        "score": _.anomaly_score,
    } for _ in anomalies]
Ejemplo n.º 20
0
 def __init__(self, ts={}, param=None):
     super(DiffPercentD, self).__init__()
     print self.cvtTimeSeries(ts)
     self.detector = AnomalyDetector(
         self.cvtTimeSeries(ts),
         algorithm_name='diff_percent_threshold',
         algorithm_params=param)
Ejemplo n.º 21
0
 def test_absolute_threshold_algorithm(self):
     """
 Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
 """
     detector = AnomalyDetector(self.s1,
                                algorithm_name='absolute_threshold',
                                algorithm_params={
                                    'absolute_threshold_value_upper': 0.2,
                                    'absolute_threshold_value_lower': 0.2
                                })
     anomalies = detector.get_anomalies()
     self.assertTrue(anomalies is not None)
     self.assertTrue(len(anomalies) > 0)
     self.assertRaises(
         exceptions.RequiredParametersNotPassed,
         lambda: AnomalyDetector(self.s1,
                                 algorithm_name='absolute_threshold'))
Ejemplo n.º 22
0
 def test_algorithm(self):
     """
 Test if exception AlgorithmNotFound is raised as expected.
 """
     self.assertRaises(
         exceptions.AlgorithmNotFound,
         lambda: AnomalyDetector(self.s1,
                                 algorithm_name='NotValidAlgorithm'))
Ejemplo n.º 23
0
def get_anoms(anomalous_ts):

    logger = logging.getLogger(skyline_app_logger)

    if not anomalous_ts:
        logger.error('error :: get_anoms :: no anomalous_ts')
        return []

    anomalies = []
    try:
        anomaly_ts_dict = dict(anomalous_ts)
        my_detector = AnomalyDetector(anomaly_ts_dict, score_threshold=1.5)
        anomalies = my_detector.get_anomalies()
    except:
        logger.error(traceback.format_exc())
        logger.error('error :: get_anoms :: AnomalyDetector')
    return anomalies
Ejemplo n.º 24
0
 def test_diff_percent_threshold_algorithm(self):
     """
 Test "diff percent threshold" algorithm with a threshold of 20%
 """
     detector = AnomalyDetector(self.s1,
                                baseline_time_series=self.s2,
                                algorithm_name='diff_percent_threshold',
                                algorithm_params={
                                    'percent_threshold_upper': 20,
                                    'percent_threshold_lower': -20
                                })
     anomalies = detector.get_anomalies()
     self.assertTrue(anomalies is not None)
     self.assertTrue(len(anomalies) > 0)
     self.assertRaises(
         exceptions.RequiredParametersNotPassed,
         lambda: AnomalyDetector(self.s1,
                                 baseline_time_series=self.s2,
                                 algorithm_name='diff_percent_threshold'))
Ejemplo n.º 25
0
 def __init__(self, metrix, related_metrices):
   """
   Initializer
   :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
   :param list related_metrixes: a list of time series.
   """
   self.metrix = self._load(metrix)
   self.anomaly_detector = AnomalyDetector(metrix)
   self.related_metrices = related_metrices
   self.anomalies = self.anomaly_detector.get_anomalies()
   self._analyze()
Ejemplo n.º 26
0
def cleanData(df, index_name="15分钟段", var_name="实际功率", limit=0.5):
    df_clean = []
    for g_name, g in df.groupby(index_name):
        temp = deepcopy(g).reset_index(drop=True)
        limit_low, limit_up = np.percentile(temp[var_name], [5, 95])
        temp = temp[(temp[var_name] < limit_up)
                    & (temp[var_name] > limit_low)].reset_index(drop=True)
        ts = temp[var_name]
        ts_mean = np.mean(ts)
        ts_std = np.std(ts)
        ts = (ts - ts_mean) / ts_std
        if ts_std > 0:
            my_detector = AnomalyDetector(ts.to_dict(),
                                          algorithm_name='exp_avg_detector')
            score = my_detector.get_all_scores()
            df_clean.append(temp[np.array(score.values) < limit])
        else:
            df_clean.append(temp)
    df_clean = pd.concat(df_clean, ignore_index=True)
    return df_clean
Ejemplo n.º 27
0
def pointsOfCorrelation(ts1, ts2, thresholdVal):
    corrPoints = []

    # Conduct AD on each of each of the time series.
    # algorithm_params={'absolute_threshold_value_lower':lower,'absolute_threshold_value_upper':upper}
    # detector = AnomalyDetector(ts2, score_threshold=thresholdVal, algorithm_name="derivative_detector")
    detector = AnomalyDetector(ts2,
                               score_threshold=thresholdVal,
                               algorithm_name="exp_avg_detector")

    # score = detector.get_all_scores()
    anomalies = detector.get_anomalies()

    # For anomalous points in ts1, return correlated points and correlation coefficient.
    for a in anomalies:
        time_period = a.get_time_window()

        try:
            my_correlator = Correlator(ts1, ts2, time_period)

            if my_correlator.is_correlated(threshold=0.8):
                correlatorResultObj = my_correlator.get_correlation_result()

                # Change time period to human readable format
                start = strftime('%Y-%m-%d %H:%M:%S',
                                 localtime(a.start_timestamp))
                end = strftime('%Y-%m-%d %H:%M:%S', localtime(a.end_timestamp))
                time_period = (start, end)

                # Return anomalous time period, correlation coefficient and anomaly score.
                # Note: Anomaly score for absolute threshold will be diff between value and threshold.
                result = [
                    time_period,
                    round(correlatorResultObj.coefficient, 2),
                    round(a.anomaly_score, 2)
                ]
                corrPoints.append(result)
        except:
            continue

    return corrPoints
Ejemplo n.º 28
0
 def __init__(self, X_tr_red, X_te_red, shift_detector, level):
     logger.info("Run Luminol")
     X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist()
     X_te_odim = (-np.amax(X_te_red, axis=1)).tolist()
     ts = X_te_odim
     ts = {i: v for i, v in enumerate(ts)}
     train_ts = {i: v for i, v in enumerate(X_tr_odim)}
     my_detector = AnomalyDetector(ts,
                                   baseline_time_series=train_ts,
                                   algorithm_params={
                                       'precision': 10,
                                       'lag_window_size': 0.1,
                                       'future_window_size': 0.1,
                                       'chunk_size': 2
                                   })
     _score = my_detector.get_all_scores()
     score = []
     for i in range(len(X_te_odim)):
         score.append(_score[i])
     assert len(score) == len(X_te_odim), (len(score), len(X_te_odim))
     super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
Ejemplo n.º 29
0
    def f1_metrics(self):

        y = self.y
        ts = self.ts
        abn_pt = self.abn_pt
        req_stamp = pd.Series(y, index=ts)
        detector = AnomalyDetector(req_stamp.to_dict())
        scores = detector.get_all_scores()

        y_true = np.zeros(y.size)
        for i in abn_pt:
            y_true[i] = 1
        self.y_true = y_true

        np_score = []
        for i in scores.iteritems():
            np_score.append(i[1])
        req_ = pd.Series(data=np_score)

        ap = average_precision_score(y_true, np_score)

        range_ = np.log10(np.arange(0, 9, .1) + 1)

        f1 = []
        for i in range_:
            threshold = np.quantile(np_score, i)
            anomalies = req_[req_.values > threshold].index.values

            y_score = np.zeros(y.size)
            for i in anomalies:
                y_score[i] = 1
            f1.append(f1_score(y_true, y_score))

        threshold = np.quantile(np_score, range_[np.argmax(f1)])
        anomalies = req_[req_.values > threshold].index.values
        y_score = np.zeros(y.size)
        for i in anomalies:
            y_score[i] = 1

        return ap, y_true, y_score
Ejemplo n.º 30
0
  def test_sign_test_algorithm_interface(self):
    """
    Test "sign test" algorithm with a threshold of 0%
    """
    bs = dict()
    bs.update((t, 1) for t in range(1, 30))

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 0,
                        'offset': 2,
                        'scan_window': 24,
                        'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update((t, 3.1) for t in range(1, 21))

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)

    # try lower bound
    algorithm_params = {'percent_threshold_lower': 0,
                        'offset': 2,
                        'scan_window': 24,
                        'confidence': 0.01}
    ts = dict(bs)
    # less than baseline plus bias
    ts.update((t, 2.9) for t in range(1, 25))

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
Ejemplo n.º 31
0
 def test_score_only(self):
     detector = AnomalyDetector(self.s1,
                                score_only=True,
                                algorithm_name='derivative_detector')
     detector2 = AnomalyDetector(self.s1,
                                 algorithm_name='derivative_detector')
     self.assertTrue(detector2.get_anomalies() is not None)
     self.assertTrue(len(detector.get_anomalies()) == 0)
Ejemplo n.º 32
0
    def anomalies(self, metrics):
        _metrics = metrics
        _metrics_for_luminol = {}
        _time_mapping = {}
        _anomalies = {}

        for _str in _metrics:
            _date = datetime.datetime.strptime(_str, "%Y-%m-%d %H:%M:%S")
            _time = datetime.datetime.timestamp(_date)
            _metrics_for_luminol[int(_time)] = _metrics[_str]
            _time_mapping[int(_time)] = _str

        if _metrics_for_luminol:
            _detector = AnomalyDetector(_metrics_for_luminol)
            _score = _detector.get_all_scores()

            if _score:
                for _timestamp, _value in _score.iteritems():
                    _anomalies[_time_mapping[_timestamp]] = _value
                return _anomalies
            else:
                return False
        else:
            return False
Ejemplo n.º 33
0
 def test_score_only(self):
     """
     Test that score_only parameter doesn't give anomalies
     """
     detector = AnomalyDetector(self.s1,
                                score_only=True,
                                algorithm_name='derivative_detector')
     detector2 = AnomalyDetector(self.s1,
                                 algorithm_name='derivative_detector')
     self.assertTrue(detector2.get_anomalies() is not None)
     self.assertTrue(len(detector.get_anomalies()) == 0)
Ejemplo n.º 34
0
 def test_anomaly_threshold(self):
     """
 Test if score_percentile_threshold works as expected.
 """
     detector = AnomalyDetector(self.s1,
                                score_percent_threshold=0.1,
                                algorithm_name='exp_avg_detector')
     detector1 = AnomalyDetector(self.s1,
                                 score_percent_threshold=0.1,
                                 algorithm_name='derivative_detector')
     self.assertNotEqual(detector1.get_anomalies(),
                         detector.get_anomalies())
Ejemplo n.º 35
0
    def test_sign_test_algorithm_interface(self):
        """
        Test "sign test" algorithm with a threshold of 0%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 30))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 3.1) for t in range(1, 21))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)

        # try lower bound
        algorithm_params = {
            'percent_threshold_lower': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # less than baseline plus bias
        ts.update((t, 2.9) for t in range(1, 25))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
Ejemplo n.º 36
0
  def test_sign_test_algorithm_with_shift(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 10,
                      'shift': 1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update({t: 1.2 for t in range(10, 34)})

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    # uses bias
    ts.update({t: 2.100001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline but not below baseline with shift
    algorithm_params = {'percent_threshold_lower': -20,
                      'shift': -0.1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts.update({t: 0.799999 for t in range(10, 34)})
    # no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # lower the time series by 0.1
    ts.update({t: 0.699999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)
Ejemplo n.º 37
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
    self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}

    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_custom_algorithm(self):
    """
    Test passing a custom algorithm class
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)

  def test_diff_percent_threshold_algorithm(self):
    """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='diff_percent_threshold'))

  def test_sign_test_algorithm(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})
    ts = dict(bs)

    # test missing parameters
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='sign_test'))
    #
    # Simple tests
    algorithm_params = {'percent_threshold_upper': 20,
                      'percent_threshold_lower': -20,
                      'scan_window': 24,
                      'confidence': 0.01}

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    ts.update({t: 1.200001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline
    ts.update({t: 0.799999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by big gap
    ts.update(bs)
    ts.update({t: 0.799999 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(60, 84)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 2)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 30)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

    anomaly = anomalies[1]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 54)
    self.assertEquals(e, 89)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by small gap
    ts.update(bs)
    ts.update({t: 1.21 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(30, 40)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    # just one
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 40)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

  def test_sign_test_algorithm_with_shift(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 10,
                      'shift': 1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update({t: 1.2 for t in range(10, 34)})

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    # uses bias
    ts.update({t: 2.100001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline but not below baseline with shift
    algorithm_params = {'percent_threshold_lower': -20,
                      'shift': -0.1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts.update({t: 0.799999 for t in range(10, 34)})
    # no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # lower the time series by 0.1
    ts.update({t: 0.699999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

  def test_absolute_threshold_algorithm(self):
    """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
    detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                               algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                 'absolute_threshold_value_lower': 0.2})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))

  def test_threshold(self):
    """
    Test score threshold=0
    """
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    """
    Test that score_only parameter doesn't give anomalies
    """
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(len(detector.get_anomalies()) == 0)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Ejemplo n.º 38
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
    self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}
    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_custom_algorithm(self):
    """
    Test passing a custom algorithm class
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)

  def test_diff_percent_threshold_algorithm(self):
    """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='diff_percent_threshold'))

  def test_absolute_threshold_algorithm(self):
    """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
    detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                               algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                 'absolute_threshold_value_lower': 0.2})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))

  def test_threshold(self):
    """
    Test score threshold=0
    """
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    """
    Test that score_only parameter doesn't give anomalies
    """
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(len(detector.get_anomalies()) == 0)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Ejemplo n.º 39
0
from luminol.anomaly_detector import AnomalyDetector
import time

my_detector = AnomalyDetector('Turbidity.csv')
score = my_detector.get_all_scores()

for (timestamp, value) in score.iteritems():
    t_str = time.strftime('%y-%m-%d %H:%M:%S', time.localtime(timestamp))
    if value > 0:
        print(f'{t_str}, {value}')
Ejemplo n.º 40
0
 def test_score_only(self):
   detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
   detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
   self.assertTrue(detector2.get_anomalies() is not None)
   self.assertTrue(detector.get_anomalies() is None)
Ejemplo n.º 41
0
  def test_sign_test_algorithm(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})
    ts = dict(bs)

    # test missing parameters
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='sign_test'))
    #
    # Simple tests
    algorithm_params = {'percent_threshold_upper': 20,
                      'percent_threshold_lower': -20,
                      'scan_window': 24,
                      'confidence': 0.01}

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    ts.update({t: 1.200001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline
    ts.update({t: 0.799999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by big gap
    ts.update(bs)
    ts.update({t: 0.799999 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(60, 84)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 2)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 30)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

    anomaly = anomalies[1]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 54)
    self.assertEquals(e, 89)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by small gap
    ts.update(bs)
    ts.update({t: 1.21 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(30, 40)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    # just one
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 40)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)
Ejemplo n.º 42
0
def main(argv):
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(2)
    input = None
    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-i", "--input"):
            input = a
        else:
            assert False, "unhandled option"

    # this is the regular expression used to parse CANoe logs in ASCII format (.asc)
    regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)"

    pattern = re.compile(regex, re.UNICODE)

    inputfile = open(input).read()

    # stores all lines which match the regex
    matches = re.finditer(regex, inputfile)

    # event_dict stores the values (timestamp + CAN-ID) extracted from the logs
    event_dict = {}

    for matchNum, match in enumerate(matches):
        matchNum = matchNum + 1
        myTime = match.group(2)

        # converts absolute time from engine start in seconds from engine start to int
        myTime = float(myTime) * 1000000

        # match.group(20) is ID of CAN event in decimal
        event_dict[myTime] = match.group(20)

    #print event_dict

    my_detector = AnomalyDetector(event_dict,
                                  algorithm_name=("exp_avg_detector"))

    # this calculates an anomal yscore for every event in the time series
    score = my_detector.get_all_scores()

    # filter events in time series for anomalies
    anomalies = my_detector.get_anomalies()

    anom_score = []

    print

    for attack in anomalies:

        if (attack.exact_timestamp in event_dict):

            if (verbose == True):
                # if script is run with "-v" it will output all anomaies
                print("{timestamp} - ID: {id} - Score: {value}".format(
                    timestamp=attack.exact_timestamp,
                    id=event_dict[attack.exact_timestamp],
                    value=attack.anomaly_score))

            elif (attack.anomaly_score > 3.4):
                # if script is not run with "-v" it will output only anomalies with score > 3.4
                print("{timestamp} - ID: {id} - Score: {value}".format(
                    timestamp=attack.exact_timestamp,
                    id=event_dict[attack.exact_timestamp],
                    value=attack.anomaly_score))
Ejemplo n.º 43
0
class RCA(object):
    def __init__(self, metrix, related_metrices):
        """
    Initializer
    :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
    :param list related_metrixes: a list of time series.
    """
        self.metrix = self._load(metrix)
        self.anomaly_detector = AnomalyDetector(metrix)
        self.related_metrices = related_metrices
        self.anomalies = self.anomaly_detector.get_anomalies()
        self._analyze()

    def _load(self, metrix):
        """
    Load time series.
    :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str).
    :return TimeSeries: a TimeSeries object.
    """
        if isinstance(metrix, TimeSeries):
            return metrix
        if isinstance(metrix, dict):
            return TimeSeries(metrix)
        return TimeSeries(utils.read_csv(metrix))

    def _analyze(self):
        """
    Analyzes if a matrix has anomalies.
    If any anomaly is found, determine if the matrix correlates with any other matrixes.
    To be implemented.
    """
        output = defaultdict(list)
        output_by_name = defaultdict(list)
        scores = self.anomaly_detector.get_all_scores()

        if self.anomalies:
            for anomaly in self.anomalies:
                metrix_scores = scores
                start_t, end_t = anomaly.get_time_window()
                t = anomaly.exact_timestamp

                # Compute extended start timestamp and extended end timestamp.
                room = (end_t - start_t) / 2
                if not room:
                    room = 30
                extended_start_t = start_t - room
                extended_end_t = end_t + room
                metrix_scores_cropped = metrix_scores.crop(
                    extended_start_t, extended_end_t)

                # Adjust the two timestamps if not enough data points are included.
                while len(metrix_scores_cropped) < 2:
                    extended_start_t = extended_start_t - room
                    extended_end_t = extended_end_t + room
                    metrix_scores_cropped = metrix_scores.crop(
                        extended_start_t, extended_end_t)

                # Correlate with other metrics
                for entry in self.related_metrices:
                    try:
                        entry_correlation_result = Correlator(
                            self.metrix,
                            entry,
                            time_period=(extended_start_t, extended_end_t),
                            use_anomaly_score=True).get_correlation_result()
                        record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry
                        record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__
                        output[t].append(record)
                        output_by_name[entry].append(record_by_name)
                    except exceptions.NotEnoughDataPoints:
                        pass

        self.output = output
        self.output_by_name = output_by_name
Ejemplo n.º 44
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0}
    self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0}
    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_threshold(self):
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(detector.get_anomalies() is None)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Ejemplo n.º 45
0
class RCA(object):
  def __init__(self, metrix, related_metrices):
    """
    Initializer
    :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
    :param list related_metrixes: a list of time series.
    """
    self.metrix = self._load(metrix)
    self.anomaly_detector = AnomalyDetector(metrix)
    self.related_metrices = related_metrices
    self.anomalies = self.anomaly_detector.get_anomalies()
    self._analyze()

  def _load(self, metrix):
    """
    Load time series.
    :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str).
    :return TimeSeries: a TimeSeries object.
    """
    if isinstance(metrix, TimeSeries):
      return metrix
    if isinstance(metrix, dict):
      return TimeSeries(metrix)
    return TimeSeries(utils.read_csv(metrix))

  def _analyze(self):
    """
    Analyzes if a matrix has anomalies.
    If any anomaly is found, determine if the matrix correlates with any other matrixes.
    To be implemented.
    """
    output = defaultdict(list)
    output_by_name = defaultdict(list)
    scores = self.anomaly_detector.get_all_scores()

    if self.anomalies:
      for anomaly in self.anomalies:
        metrix_scores = scores
        start_t, end_t = anomaly.get_time_window()
        t = anomaly.exact_timestamp

        # Compute extended start timestamp and extended end timestamp.
        room = (end_t - start_t) / 2
        if not room:
          room = 30
        extended_start_t = start_t - room
        extended_end_t = end_t + room
        metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t)

        # Adjust the two timestamps if not enough data points are included.
        while len(metrix_scores_cropped) < 2:
          extended_start_t = extended_start_t - room
          extended_end_t = extended_end_t + room
          metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t)

        # Correlate with other metrics
        for entry in self.related_metrices:
          try:
            entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t),
                                                  use_anomaly_score=True).get_correlation_result()
            record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry
            record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__
            output[t].append(record)
            output_by_name[entry].append(record_by_name)
          except exceptions.NotEnoughDataPoints:
            pass

    self.output = output
    self.output_by_name = output_by_name
Ejemplo n.º 46
0
 def setUp(self):
   self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0}
   self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0}
   self.detector1 = AnomalyDetector(self.s1)
   self.detector2 = AnomalyDetector(self.s2)
Ejemplo n.º 47
0
from luminol.anomaly_detector import AnomalyDetector
import matplotlib.pyplot as plt
import pandas as pd

df = pd.read_csv(
    "https://raw.githubusercontent.com/numenta/NAB/master/data/realTweets/Twitter_volume_FB.csv",
    header=0,
    index_col=0)
# df[:].plot(linewidth=2)
# plt.grid(which='both')
# plt.show()

ts = {}
i = 0
for item in df.value:
    ts[i] = item
    i += 1

my_detector = AnomalyDetector(ts)
anomalies_chart = []
score = my_detector.get_all_scores()
for timestamp, value in score.iteritems():
    #     print(timestamp, value)
    anomalies_chart.append(value)

list_values = [v for v in ts.values()]
plt.plot(list_values)
plt.show()
plt.plot(anomalies_chart, color='r')
plt.show()
Ejemplo n.º 48
0
 def setUp(self):
   self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
   self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}
   self.detector1 = AnomalyDetector(self.s1)
   self.detector2 = AnomalyDetector(self.s2)
Ejemplo n.º 49
0
def get_timeseries(
        phenomenon_time_range,
        num_time_slots,
        get_observations,
        detector_method='bitmap_detector',  # LinkedIn bitmap
        detector_params={
            "precision": 8,
            "lag_window_size": 20,
            "future_window_size": 20,
            "chunk_size": 2
        },
        anomaly_breaks=DEFAULT_ANOMALY_BREAKS,
        value_breaks=DEFAULT_VALUE_BREAKS,
        extend_range=True,
        baseline_time_range=None,
        shift=True,
        use_baseline=True):
    #observations = get_observations(3, 5)
    #observations = get_observations(0, 0)

    # if baseline_time_range is not None:
    # use_baseline = True
    #     baseline_time_series = observation_provider_model.objects.filter(
    #         phenomenon_time_range__contained_by=baseline_time_range,
    #         phenomenon_time_range__duration=frequency,
    #         phenomenon_time_range__matches=frequency,
    #         observed_property=observed_property,
    #         procedure=process,
    #         feature_of_interest=feature_of_interest
    #     )
    #     baseline_reduced = {obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series}

    lower_ext = 0
    upper_ext = 0

    if extend_range:
        lower_ext = detector_params["lag_window_size"]
        upper_ext = detector_params["future_window_size"]

        if use_baseline and shift:
            upper_ext = 0

        if use_baseline and not shift:
            lower_ext = int(upper_ext / 2)
            upper_ext -= lower_ext + 1

    observations = get_observations(lower_ext, upper_ext)

    if not isinstance(observations, list):
        raise Exception('property_values should be array')

    if len(observations) == 0:
        return {
            'phenomenon_time_range': DateTimeTZRange(),
            'property_values': [],
            'property_value_percentiles': {},
            'property_anomaly_rates': [],
            'property_anomaly_percentiles': {},
        }

    property_values = observations_to_property_values(observations)

    VALID_VALUES_LENGTH = len(property_values) - property_values.count(None)

    if VALID_VALUES_LENGTH == 1:
        return {
            'phenomenon_time_range': phenomenon_time_range,
            'property_values': property_values,
            'property_value_percentiles': {
                50: property_values[0]
            },
            'property_anomaly_rates': [0],
            'property_anomaly_percentiles': {
                0: 0
            },
        }

    MINIMAL_POINTS_IN_WINDOWS = DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS

    if use_baseline:
        MINIMAL_POINTS_IN_WINDOWS /= 2

    # if VALID_VALUES_LENGTH <= MINIMAL_POINTS_IN_WINDOWS:
    #     # warn the user?

    WINDOW_LENGTH = detector_params[
        "future_window_size"] if use_baseline else detector_params[
            "future_window_size"] + detector_params["lag_window_size"]

    if VALID_VALUES_LENGTH > MINIMAL_POINTS_IN_WINDOWS and VALID_VALUES_LENGTH <= WINDOW_LENGTH:
        detector_params["future_window_size"] = int(
            max(
                DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2,
                VALID_VALUES_LENGTH *
                DEFAULT_BITMAP_MOD_LEADING_WINDOW_SIZE_PCT))
        detector_params["lag_window_size"] = int(
            max(
                DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2,
                VALID_VALUES_LENGTH *
                DEFAULT_BITMAP_MOD_LAGGING_WINDOW_SIZE_PCT))

    property_value_percentiles = percentiles(
        property_values[lower_ext:lower_ext + num_time_slots], value_breaks)

    if use_baseline and baseline_time_range is None:
        baseline_time_series = observations
        baseline_reduced = {
            obs.phenomenon_time_range.lower.timestamp(): obs.result
            for obs in baseline_time_series
        }

    obs_reduced = {
        obs.phenomenon_time_range.lower.timestamp(): obs.result
        for obs in observations
    }

    if (VALID_VALUES_LENGTH <= 1):
        property_anomaly_rates = [
            0 if value is not None else value
            for value in property_values[lower_ext:lower_ext + num_time_slots]
        ]

        return {
            'phenomenon_time_range':
            phenomenon_time_range,
            'property_values':
            property_values[lower_ext:lower_ext + num_time_slots],
            'property_value_percentiles':
            property_value_percentiles,
            'property_anomaly_rates':
            property_anomaly_rates,
            'property_anomaly_percentiles': {
                0: 0
            },
        }

    try:
        baseline_reduced
    except NameError:
        detector = AnomalyDetector(obs_reduced,
                                   algorithm_name=detector_method,
                                   algorithm_params=detector_params,
                                   score_only=True)
    else:
        detector = AnomalyDetector(obs_reduced,
                                   baseline_reduced,
                                   algorithm_name=detector_method,
                                   algorithm_params=detector_params,
                                   score_only=True)

    property_anomaly_rates = detector.get_all_scores().values

    property_anomaly_percentiles = percentiles(
        property_anomaly_rates[lower_ext:lower_ext + num_time_slots],
        anomaly_breaks)

    for i in range(len(property_values)):
        if property_values[i] is None:
            property_anomaly_rates.insert(i, None)

    return {
        'phenomenon_time_range':
        phenomenon_time_range,
        'property_values':
        property_values[lower_ext:lower_ext + num_time_slots],
        'property_value_percentiles':
        property_value_percentiles,
        'property_anomaly_rates':
        property_anomaly_rates[lower_ext:lower_ext + num_time_slots],
        'property_anomaly_percentiles':
        property_anomaly_percentiles,
    }
Ejemplo n.º 50
0
    if args.interpolate_period > 0:
        tf.set_interpolate(Interpolate(period={'count': args.interpolate_period, 'unit': TimeUnit.MINUTE},
                                       function=InterpolateFunction.LINEAR))

    query.set_transformation_filter(tf)

    series_list = svc.query(query)
    for series in series_list:
        metric_id = '- %s %s' % (series.metric, print_tags(series.tags))
        log('\t' + metric_id)
        # exclude empty series for specific tags
        if len(series.data) > 2:
            ts = {int(sample.t / 1000): sample.v for sample in series.data}

            detector = AnomalyDetector(ts, score_threshold=args.min_score)

            anomalies = []
            for anomaly in detector.get_anomalies():
                if time.mktime(now.timetuple()) - args.last_hours * 3600 <= anomaly.exact_timestamp:
                    anomalies.append(anomaly)

            if anomalies:
                message.append(metric_id)
                for anomaly in anomalies:
                    t_start, t_end = format_t(anomaly.start_timestamp), format_t(anomaly.end_timestamp)
                    t_exact = format_t(anomaly.exact_timestamp)
                    anomaly_msg = '\tAnomaly from %s to %s with score %s: %s, %s' % (
                        t_start, t_end, anomaly.anomaly_score, t_exact, ts[anomaly.exact_timestamp])
                    message.append(anomaly_msg)