Example #1
0
 def test_algorithm_DefaultDetector(self):
     """
 Test if optional parameter algorithm works as expected.
 """
     detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
     self.assertEqual(detector.get_all_scores().timestamps,
                      self.detector1.get_all_scores().timestamps)
     self.assertEqual(detector.get_all_scores().values,
                      self.detector1.get_all_scores().values)
Example #2
0
 def test_algorithm_params(self):
   """
   Test if optional parameter algorithm_params works as expected.
   """
   self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
   detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
   self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)
Example #3
0
def clean_anomaly2(df,
                   index_name="15分钟段",
                   var_name="实际功率",
                   anomaly_limit=1.5,
                   base_p=0.3):
    df_clean = []
    for g_name, g in df.groupby(index_name):
        temp = deepcopy(g).reset_index(drop=True)
        base_mean = np.median(temp[var_name])
        if base_mean > base_p:
            limit_low, limit_up = np.percentile(temp[var_name], [2, 99])
        else:
            limit_low, limit_up = np.percentile(temp[var_name], [1, 98])
        temp = temp[(temp[var_name] <= limit_up)
                    & (temp[var_name] >= limit_low)].reset_index(drop=True)
        ts = temp[var_name]
        ts_mean = np.mean(ts)
        ts_std = np.std(ts)
        ts = (ts - ts_mean) / ts_std
        if ts_std > 0:
            my_detector = AnomalyDetector(ts.to_dict(),
                                          algorithm_name='exp_avg_detector')
            score = my_detector.get_all_scores()
            df_clean.append(temp[np.array(score.values) < anomaly_limit])
        else:
            df_clean.append(temp)
    df_clean = pd.concat(df_clean, ignore_index=True)
    return df_clean
Example #4
0
 def test_algorithm_DefaultDetector(self):
   """
   Test if optional parameter algorithm works as expected.
   """
   detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
   self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
   self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)
Example #5
0
def get_anomaly_series(series: pd.Series,
                       algorithm: str = "bitmap_detector") -> pd.Series:
    assert algorithm in [
        "bitmap_detector", "derivative_detector", "exp_avg_detector"
    ]

    ts = series.to_dict()
    detector = AnomalyDetector(ts, algorithm_name=algorithm)
    scores = detector.get_all_scores()
    scores = [s for _, s in scores.iteritems()]
    return pd.Series(scores)
    def detect(self, ts):
        my_detector = AnomalyDetector(ts)
        score = my_detector.get_all_scores()
        anom_score = []

        for (timestamp, value) in score.iteritems():
            t_str = time.strftime('%d-%b-%Y %H:%M:%S',
                                  time.localtime(timestamp))
            anom_score.append([t_str, value])
        overall_stats = {}

        for score in anom_score:
            overall_stats[score[0]] = score[1]
        return overall_stats
Example #7
0
 def test_algorithm_params(self):
     """
 Test if optional parameter algorithm_params works as expected.
 """
     self.assertRaises(
         ValueError,
         lambda: AnomalyDetector(self.s1,
                                 algorithm_name='exp_avg_detector',
                                 algorithm_params='0'))
     detector = AnomalyDetector(self.s1,
                                algorithm_name="exp_avg_detector",
                                algorithm_params={'smoothing_factor': 0.3})
     self.assertNotEqual(self.detector1.get_all_scores().values,
                         detector.get_all_scores().values)
Example #8
0
def cleanData(df, index_name="15分钟段", var_name="实际功率", limit=0.5):
    df_clean = []
    for g_name, g in df.groupby(index_name):
        temp = deepcopy(g).reset_index(drop=True)
        limit_low, limit_up = np.percentile(temp[var_name], [5, 95])
        temp = temp[(temp[var_name] < limit_up)
                    & (temp[var_name] > limit_low)].reset_index(drop=True)
        ts = temp[var_name]
        ts_mean = np.mean(ts)
        ts_std = np.std(ts)
        ts = (ts - ts_mean) / ts_std
        if ts_std > 0:
            my_detector = AnomalyDetector(ts.to_dict(),
                                          algorithm_name='exp_avg_detector')
            score = my_detector.get_all_scores()
            df_clean.append(temp[np.array(score.values) < limit])
        else:
            df_clean.append(temp)
    df_clean = pd.concat(df_clean, ignore_index=True)
    return df_clean
Example #9
0
 def __init__(self, X_tr_red, X_te_red, shift_detector, level):
     logger.info("Run Luminol")
     X_tr_odim = (-np.amax(X_tr_red, axis=1)).tolist()
     X_te_odim = (-np.amax(X_te_red, axis=1)).tolist()
     ts = X_te_odim
     ts = {i: v for i, v in enumerate(ts)}
     train_ts = {i: v for i, v in enumerate(X_tr_odim)}
     my_detector = AnomalyDetector(ts,
                                   baseline_time_series=train_ts,
                                   algorithm_params={
                                       'precision': 10,
                                       'lag_window_size': 0.1,
                                       'future_window_size': 0.1,
                                       'chunk_size': 2
                                   })
     _score = my_detector.get_all_scores()
     score = []
     for i in range(len(X_te_odim)):
         score.append(_score[i])
     assert len(score) == len(X_te_odim), (len(score), len(X_te_odim))
     super().__init__(X_tr_odim, X_te_odim, score, shift_detector, level)
Example #10
0
    def f1_metrics(self):

        y = self.y
        ts = self.ts
        abn_pt = self.abn_pt
        req_stamp = pd.Series(y, index=ts)
        detector = AnomalyDetector(req_stamp.to_dict())
        scores = detector.get_all_scores()

        y_true = np.zeros(y.size)
        for i in abn_pt:
            y_true[i] = 1
        self.y_true = y_true

        np_score = []
        for i in scores.iteritems():
            np_score.append(i[1])
        req_ = pd.Series(data=np_score)

        ap = average_precision_score(y_true, np_score)

        range_ = np.log10(np.arange(0, 9, .1) + 1)

        f1 = []
        for i in range_:
            threshold = np.quantile(np_score, i)
            anomalies = req_[req_.values > threshold].index.values

            y_score = np.zeros(y.size)
            for i in anomalies:
                y_score[i] = 1
            f1.append(f1_score(y_true, y_score))

        threshold = np.quantile(np_score, range_[np.argmax(f1)])
        anomalies = req_[req_.values > threshold].index.values
        y_score = np.zeros(y.size)
        for i in anomalies:
            y_score[i] = 1

        return ap, y_true, y_score
Example #11
0
    def anomalies(self, metrics):
        _metrics = metrics
        _metrics_for_luminol = {}
        _time_mapping = {}
        _anomalies = {}

        for _str in _metrics:
            _date = datetime.datetime.strptime(_str, "%Y-%m-%d %H:%M:%S")
            _time = datetime.datetime.timestamp(_date)
            _metrics_for_luminol[int(_time)] = _metrics[_str]
            _time_mapping[int(_time)] = _str

        if _metrics_for_luminol:
            _detector = AnomalyDetector(_metrics_for_luminol)
            _score = _detector.get_all_scores()

            if _score:
                for _timestamp, _value in _score.iteritems():
                    _anomalies[_time_mapping[_timestamp]] = _value
                return _anomalies
            else:
                return False
        else:
            return False
Example #12
0
def main(argv):
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:v", ["help", "input="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)
        usage()
        sys.exit(2)
    input = None
    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-i", "--input"):
            input = a
        else:
            assert False, "unhandled option"

    # this is the regular expression used to parse CANoe logs in ASCII format (.asc)
    regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)"

    pattern = re.compile(regex, re.UNICODE)

    inputfile = open(input).read()

    # stores all lines which match the regex
    matches = re.finditer(regex, inputfile)

    # event_dict stores the values (timestamp + CAN-ID) extracted from the logs
    event_dict = {}

    for matchNum, match in enumerate(matches):
        matchNum = matchNum + 1
        myTime = match.group(2)

        # converts absolute time from engine start in seconds from engine start to int
        myTime = float(myTime) * 1000000

        # match.group(20) is ID of CAN event in decimal
        event_dict[myTime] = match.group(20)

    #print event_dict

    my_detector = AnomalyDetector(event_dict,
                                  algorithm_name=("exp_avg_detector"))

    # this calculates an anomal yscore for every event in the time series
    score = my_detector.get_all_scores()

    # filter events in time series for anomalies
    anomalies = my_detector.get_anomalies()

    anom_score = []

    print

    for attack in anomalies:

        if (attack.exact_timestamp in event_dict):

            if (verbose == True):
                # if script is run with "-v" it will output all anomaies
                print("{timestamp} - ID: {id} - Score: {value}".format(
                    timestamp=attack.exact_timestamp,
                    id=event_dict[attack.exact_timestamp],
                    value=attack.anomaly_score))

            elif (attack.anomaly_score > 3.4):
                # if script is not run with "-v" it will output only anomalies with score > 3.4
                print("{timestamp} - ID: {id} - Score: {value}".format(
                    timestamp=attack.exact_timestamp,
                    id=event_dict[attack.exact_timestamp],
                    value=attack.anomaly_score))
Example #13
0
def job():
    print("Run start: " +
          datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'))

    # Connect to either the normal, anomaly or combined database. Note for SQLlite, the detect types
    # line converts from SQLlite datatypes(typically text) to Python native datatypes
    conn = sqlite3.connect('../hl7-combined.db',
                           detect_types=sqlite3.PARSE_DECLTYPES
                           | sqlite3.PARSE_COLNAMES)
    conn.row_factory = sqlite3.Row

    cur = conn.cursor()

    # Group messages into X second intervals
    cur.execute(
        "select messages.MSGTYPE, datetime((strftime('%s', messages.QueueTime) /"
        + INTERVAL + ") * " + INTERVAL +
        ", 'unixepoch') interval, count(*)  count from messages"
        " where msgtype = 'ADT_A31' group by interval order by interval desc limit "
        + LIMIT)

    rows = cur.fetchall()
    data = {}

    if len(rows) > 0:

        for row in rows:
            # Luminol library requires a 2 column unix timestamp + count
            obsTimestamp = time.mktime(
                datetime.datetime.strptime(row["interval"],
                                           "%Y-%m-%d %H:%M:%S").timetuple())
            data[obsTimestamp] = row["count"]

        print data
        # DETECTOR TYPE - see https://github.com/linkedin/luminol/tree/master/src/luminol/algorithms/anomaly_detector_algorithms
        detector = AnomalyDetector(data,
                                   algorithm_name=ALGORITH_NAME,
                                   score_threshold=THRESHOLD)

        score = detector.get_all_scores()
        anom_score = []

        for (timestamp, value) in score.iteritems():
            t_str = time.strftime('%Y-%m-%d %H:%M:%S',
                                  time.localtime(timestamp))

            anom_score.append([t_str, value])

        print "----- ALL SCORES ----- "
        for score in anom_score:
            print(score)

        anomalies = detector.get_anomalies()

        for (value) in anomalies:
            print "       match: " + time.strftime(
                '%Y-%m-%d %H:%M:%S', time.localtime(value.exact_timestamp)),
            value.anomaly_score

            if value.anomaly_score >= int(THRESHOLD) and anomalies.count > int(
                    LIMIT):
                print "ANOMALY DETECTED - NOTIFYING ADMINISTRATOR / CALLING WEBSERVICE ETC"
                quit()

        print("Run End: " +
              datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S') + "\n")
class TestAnomalyDetector(unittest.TestCase):
    def setUp(self):
        self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
        self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}
        self.detector1 = AnomalyDetector(self.s1)
        self.detector2 = AnomalyDetector(self.s2)

    def test_diff_percent_threshold_algorithm(self):
        """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
        detector = AnomalyDetector(self.s1,
                                   baseline_time_series=self.s2,
                                   algorithm_name='diff_percent_threshold',
                                   algorithm_params={
                                       'percent_threshold_upper': 20,
                                       'percent_threshold_lower': -20
                                   })
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='diff_percent_threshold'))

    def test_absolute_threshold_algorithm(self):
        """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
        detector = AnomalyDetector(self.s1,
                                   algorithm_name='absolute_threshold',
                                   algorithm_params={
                                       'absolute_threshold_value_upper': 0.2,
                                       'absolute_threshold_value_lower': 0.2
                                   })
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='absolute_threshold'))

    def test_threshold(self):
        """
    Test score threshold=0
    """
        detector = AnomalyDetector(self.s1, score_threshold=0)
        self.assertTrue(len(detector.get_anomalies()) == 1)
        self.assertTrue(detector.get_anomalies() is not None)

    def test_score_only(self):
        """
    Test that score_only parameter doesn't give anomalies
    """
        detector = AnomalyDetector(self.s1,
                                   score_only=True,
                                   algorithm_name='derivative_detector')
        detector2 = AnomalyDetector(self.s1,
                                    algorithm_name='derivative_detector')
        self.assertTrue(detector2.get_anomalies() is not None)
        self.assertTrue(len(detector.get_anomalies()) == 0)

    def test_get_all_scores(self):
        """
    Test if function get_all_scores works as expected.
    """
        self.assertTrue(isinstance(self.detector1.get_all_scores(),
                                   TimeSeries))
        self.assertEqual(len(self.detector1.get_all_scores()),
                         len(self.detector1.time_series))

    def test_get_anomalies(self):
        """
    Test if anomaly is found as expected.
    """
        self.assertTrue(self.detector1.get_anomalies() is not None)

    def test_algorithm_DefaultDetector(self):
        """
    Test if optional parameter algorithm works as expected.
    """
        detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
        self.assertEqual(detector.get_all_scores().timestamps,
                         self.detector1.get_all_scores().timestamps)
        self.assertEqual(detector.get_all_scores().values,
                         self.detector1.get_all_scores().values)

    def test_algorithm(self):
        """
    Test if exception AlgorithmNotFound is raised as expected.
    """
        self.assertRaises(
            exceptions.AlgorithmNotFound,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='NotValidAlgorithm'))

    def test_algorithm_params(self):
        """
    Test if optional parameter algorithm_params works as expected.
    """
        self.assertRaises(
            ValueError,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='exp_avg_detector',
                                    algorithm_params='0'))
        detector = AnomalyDetector(self.s1,
                                   algorithm_name="exp_avg_detector",
                                   algorithm_params={'smoothing_factor': 0.3})
        self.assertNotEqual(self.detector1.get_all_scores().values,
                            detector.get_all_scores().values)

    def test_anomaly_threshold(self):
        """
    Test if score_percentile_threshold works as expected.
    """
        detector = AnomalyDetector(self.s1,
                                   score_percent_threshold=0.1,
                                   algorithm_name='exp_avg_detector')
        detector1 = AnomalyDetector(self.s1,
                                    score_percent_threshold=0.1,
                                    algorithm_name='derivative_detector')
        self.assertNotEqual(detector1.get_anomalies(),
                            detector.get_anomalies())
Example #15
0
class TestAnomalyDetector(unittest.TestCase):
    def setUp(self):
        self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0}
        self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0}
        self.detector1 = AnomalyDetector(self.s1)
        self.detector2 = AnomalyDetector(self.s2)

    def test_threshold(self):
        detector = AnomalyDetector(self.s1, score_threshold=0)
        self.assertTrue(len(detector.get_anomalies()) == 1)
        self.assertTrue(detector.get_anomalies() is not None)

    def test_score_only(self):
        detector = AnomalyDetector(self.s1,
                                   score_only=True,
                                   algorithm_name='derivative_detector')
        detector2 = AnomalyDetector(self.s1,
                                    algorithm_name='derivative_detector')
        self.assertTrue(detector2.get_anomalies() is not None)
        self.assertTrue(len(detector.get_anomalies()) == 0)

    def test_get_all_scores(self):
        """
    Test if function get_all_scores works as expected.
    """
        self.assertTrue(isinstance(self.detector1.get_all_scores(),
                                   TimeSeries))
        self.assertEqual(len(self.detector1.get_all_scores()),
                         len(self.detector1.time_series))

    def test_get_anomalies(self):
        """
    Test if anomaly is found as expected.
    """
        self.assertTrue(self.detector1.get_anomalies() is not None)

    def test_algorithm_DefaultDetector(self):
        """
    Test if optional parameter algorithm works as expected.
    """
        detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
        self.assertEqual(detector.get_all_scores().timestamps,
                         self.detector1.get_all_scores().timestamps)
        self.assertEqual(detector.get_all_scores().values,
                         self.detector1.get_all_scores().values)

    def test_algorithm(self):
        """
    Test if exception AlgorithmNotFound is raised as expected.
    """
        self.assertRaises(
            exceptions.AlgorithmNotFound,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='NotValidAlgorithm'))

    def test_algorithm_params(self):
        """
    Test if optional parameter algorithm_params works as expected.
    """
        self.assertRaises(
            ValueError,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='exp_avg_detector',
                                    algorithm_params='0'))
        detector = AnomalyDetector(self.s1,
                                   algorithm_name="exp_avg_detector",
                                   algorithm_params={'smoothing_factor': 0.3})
        self.assertNotEqual(self.detector1.get_all_scores().values,
                            detector.get_all_scores().values)

    def test_anomaly_threshold(self):
        """
    Test if score_percentile_threshold works as expected.
    """
        detector = AnomalyDetector(self.s1,
                                   score_percent_threshold=0.1,
                                   algorithm_name='exp_avg_detector')
        detector1 = AnomalyDetector(self.s1,
                                    score_percent_threshold=0.1,
                                    algorithm_name='derivative_detector')
        self.assertNotEqual(detector1.get_anomalies(),
                            detector.get_anomalies())
class TestAnomalyDetector(unittest.TestCase):
    def setUp(self):
        self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
        self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}

        self.detector1 = AnomalyDetector(self.s1)
        self.detector2 = AnomalyDetector(self.s2)

    def test_custom_algorithm(self):
        """
        Test passing a custom algorithm class
        """
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20
        }
        detector = AnomalyDetector(self.s1,
                                   baseline_time_series=self.s2,
                                   algorithm_class=CustomAlgo,
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)

    def test_diff_percent_threshold_algorithm(self):
        """
        Test "diff percent threshold" algorithm with a threshold of 20%
        """
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20
        }
        detector = AnomalyDetector(self.s1,
                                   baseline_time_series=self.s2,
                                   algorithm_name='diff_percent_threshold',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='diff_percent_threshold'))

    def test_sign_test_algorithm(self):
        """
        Test "sign test" algorithm with a threshold of 20%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 100))
        ts = dict(bs)

        # test missing parameters
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='sign_test'))
        # test over specified
        algorithm_params = {
            'percent_threshold_upper': 20,
            'percent_threshold_lower': -20,
            'scan_window': 24,
            'confidence': 0.01
        }

        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    baseline_time_series=self.s2,
                                    algorithm_name='sign_test'))
        # Simple tests
        algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24}

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # Next one anomaly exactly equal to scan window
        ts.update((t, 1.200001) for t in range(10, 34))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()

        # note the anomaly is larger than scan window
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomaly below baseline
        algorithm_params = {
            'percent_threshold_lower': -20,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts.update((t, 0.799999) for t in range(10, 34))

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomalies separated by big gap
        ts.update(bs)
        ts.update((t, 0.799999) for t in range(1, 25))
        ts.update((t, 0.799999) for t in range(60, 84))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 2)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 1)
        self.assertEqual(e, 30)

        # score ~ 99.9
        self.assertGreater(anomaly.anomaly_score, 99)

        anomaly = anomalies[1]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 54)
        self.assertEqual(e, 89)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomalies separated by small gap
        algorithm_params = {'percent_threshold_upper': 20, 'scan_window': 24}
        ts.update(bs)
        ts.update((t, 1.21) for t in range(1, 25))
        ts.update((t, 1.21) for t in range(30, 40))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        # just one
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 1)
        self.assertEqual(e, 40)

        # score ~ 99.9
        self.assertGreater(anomaly.anomaly_score, 99)

        # try noisy data
        ts.update(bs)
        ts.update((t, 1.21) for t in range(1, 25))
        ts.update((t, 1.19) for t in range(1, 25, 6))
        algorithm_params = {
            'percent_threshold_upper': 20,
            'scan_window': 24,
            'confidence': 0.01
        }

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)

        # now decrease sensitivity
        algorithm_params = {
            'percent_threshold_upper': 20,
            'scan_window': 24,
            'confidence': 0.0001
        }

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 0)

    def test_sign_test_algorithm_interface(self):
        """
        Test "sign test" algorithm with a threshold of 0%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 30))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 3.1) for t in range(1, 21))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)

        # try lower bound
        algorithm_params = {
            'percent_threshold_lower': 0,
            'offset': 2,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # less than baseline plus bias
        ts.update((t, 2.9) for t in range(1, 25))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)

    def test_sign_test_algorithm_with_shift(self):
        """
        Test "sign test" algorithm with a threshold of 20%
        """
        bs = dict()
        bs.update((t, 1) for t in range(1, 100))

        # Simple tests
        algorithm_params = {
            'percent_threshold_upper': 10,
            'offset': 1,
            'scan_window': 24,
            'confidence': 0.01
        }
        ts = dict(bs)
        # bigger than 10 percent but below bias
        ts.update((t, 1.2) for t in range(10, 34))

        # first no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # Next one anomaly exactly equal to scan window
        # uses bias
        ts.update((t, 2.100001) for t in range(10, 34))
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()

        # note the anomaly is larger than scan window
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

        # anomaly below baseline but not below baseline with shift
        algorithm_params = {
            'percent_threshold_lower': -20,
            'offset': -0.1,
            'scan_window': 24
        }

        ts.update((t, 0.799999) for t in range(10, 34))
        # no anomalies
        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()

        self.assertTrue(anomalies is not None)
        self.assertEqual(len(anomalies), 0)

        # lower the time series by 0.1
        ts.update((t, 0.699999) for t in range(10, 34))

        detector = AnomalyDetector(ts,
                                   baseline_time_series=bs,
                                   algorithm_name='sign_test',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertEqual(len(anomalies), 1)
        anomaly = anomalies[0]
        s, e = anomaly.get_time_window()
        self.assertEqual(s, 4)
        self.assertEqual(e, 39)

        # score should be roughly 98.5
        self.assertGreater(anomaly.anomaly_score, 98)
        self.assertLess(anomaly.anomaly_score, 99)

    def test_absolute_threshold_algorithm(self):
        """
        Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
        """
        algorithm_params = {
            'absolute_threshold_value_upper': 0.2,
            'absolute_threshold_value_lower': 0.2
        }
        detector = AnomalyDetector(self.s1,
                                   algorithm_name='absolute_threshold',
                                   algorithm_params=algorithm_params)
        anomalies = detector.get_anomalies()
        self.assertTrue(anomalies is not None)
        self.assertTrue(len(anomalies) > 0)
        self.assertRaises(
            exceptions.RequiredParametersNotPassed,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='absolute_threshold'))

    def test_threshold(self):
        """
        Test score threshold=0
        """
        detector = AnomalyDetector(self.s1, score_threshold=0)
        self.assertTrue(len(detector.get_anomalies()) == 1)
        self.assertTrue(detector.get_anomalies() is not None)

    def test_score_only(self):
        """
        Test that score_only parameter doesn't give anomalies
        """
        detector = AnomalyDetector(self.s1,
                                   score_only=True,
                                   algorithm_name='derivative_detector')
        detector2 = AnomalyDetector(self.s1,
                                    algorithm_name='derivative_detector')
        self.assertTrue(detector2.get_anomalies() is not None)
        self.assertTrue(len(detector.get_anomalies()) == 0)

    def test_get_all_scores(self):
        """
        Test if function get_all_scores works as expected.
        """
        self.assertTrue(isinstance(self.detector1.get_all_scores(),
                                   TimeSeries))
        self.assertEqual(len(self.detector1.get_all_scores()),
                         len(self.detector1.time_series))

    def test_get_anomalies(self):
        """
        Test if anomaly is found as expected.
        """
        self.assertTrue(self.detector1.get_anomalies() is not None)

    def test_algorithm_DefaultDetector(self):
        """
        Test if optional parameter algorithm works as expected.
        """
        detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
        self.assertEqual(detector.get_all_scores().timestamps,
                         self.detector1.get_all_scores().timestamps)
        self.assertEqual(detector.get_all_scores().values,
                         self.detector1.get_all_scores().values)

    def test_algorithm(self):
        """
        Test if exception AlgorithmNotFound is raised as expected.
        """
        self.assertRaises(
            exceptions.AlgorithmNotFound,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='NotValidAlgorithm'))

    def test_algorithm_params(self):
        """
        Test if optional parameter algorithm_params works as expected.
        """
        self.assertRaises(
            ValueError,
            lambda: AnomalyDetector(self.s1,
                                    algorithm_name='exp_avg_detector',
                                    algorithm_params='0'))
        detector = AnomalyDetector(self.s1,
                                   algorithm_name="exp_avg_detector",
                                   algorithm_params={'smoothing_factor': 0.3})
        self.assertNotEqual(self.detector1.get_all_scores().values,
                            detector.get_all_scores().values)

    def test_anomaly_threshold(self):
        """
        Test if score_percentile_threshold works as expected.
        """
        detector = AnomalyDetector(self.s1,
                                   score_percent_threshold=0.1,
                                   algorithm_name='exp_avg_detector')
        detector1 = AnomalyDetector(self.s1,
                                    score_percent_threshold=0.1,
                                    algorithm_name='derivative_detector')
        self.assertNotEqual(detector1.get_anomalies(),
                            detector.get_anomalies())
Example #17
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0.5, 5: 1, 6: 1, 7: 1, 8: 0}
    self.s2 = {0: 0, 1: 0.5, 2: 1, 3: 1, 4: 1, 5: 0, 6: 0, 7: 0, 8: 0}
    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_threshold(self):
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(detector.get_anomalies() is None)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(exceptions.InvalidDataFormat, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percentile_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Example #18
0
def main(argv):
    try:
        opts, args = getopt.getopt(sys.argv[1:], "hi:c:v",
                                   ["help", "input="
                                    "correlate="])
    except getopt.GetoptError as err:
        # print help information and exit:
        print str(err)  # will print something like "option -a not recognized"
        usage()
        sys.exit(2)
    input = None
    correlate = None
    verbose = False
    for o, a in opts:
        if o == "-v":
            verbose = True
        elif o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-i", "--input"):
            input = a
        elif o in ("-c", "--correlate"):
            correlate = a
        else:
            assert False, "unhandled option"

    regex = r"(\s+)([+-]?\d*\.\d+)(?![-+0-9\.])(\s+)(\d+)(\s+)(\w+)(\s+)(\w+)(\s+)([a-z])(\s+)(\d+)(?:(?!X).)*(Length = )(\d+)(\s+)(BitCount = )(\d+)(\s+)(ID = )(\d+)"

    pattern = re.compile(regex, re.UNICODE)

    file1 = open(input).read()
    matches1 = re.finditer(regex, file1)

    file2 = open(correlate).read()
    matches2 = re.finditer(regex, file2)

    mydict1 = {}
    mydict2 = {}

    for matchNum, match in enumerate(matches1):
        matchNum = matchNum + 1
        myTime = match.group(2)

        myTime = float(myTime) * 1000000

        mydict1[myTime] = match.group(20)

    for matchNum, match in enumerate(matches2):
        matchNum = matchNum + 1
        myTime = match.group(2)

        myTime = float(myTime) * 1000000

        mydict2[myTime] = match.group(20)

    #print mydict1

    my_detector1 = AnomalyDetector(mydict1,
                                   algorithm_name=("exp_avg_detector"))
    score1 = my_detector1.get_all_scores()

    anomalies = my_detector1.get_anomalies()
    for a in anomalies:
        time_period = a.get_time_window()
        my_correlator = Correlator(mydict1, mydict2, time_period)

        if my_correlator.is_correlated(treshold=0.8):
            print "mydict2 correlate with mydict at time period (%d, %d)" % time_period
Example #19
0
from luminol.anomaly_detector import AnomalyDetector
import time

my_detector = AnomalyDetector('Turbidity.csv')
score = my_detector.get_all_scores()

for (timestamp, value) in score.iteritems():
    t_str = time.strftime('%y-%m-%d %H:%M:%S', time.localtime(timestamp))
    if value > 0:
        print(f'{t_str}, {value}')
Example #20
0
class RCA(object):
    def __init__(self, metrix, related_metrices):
        """
    Initializer
    :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
    :param list related_metrixes: a list of time series.
    """
        self.metrix = self._load(metrix)
        self.anomaly_detector = AnomalyDetector(metrix)
        self.related_metrices = related_metrices
        self.anomalies = self.anomaly_detector.get_anomalies()
        self._analyze()

    def _load(self, metrix):
        """
    Load time series.
    :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str).
    :return TimeSeries: a TimeSeries object.
    """
        if isinstance(metrix, TimeSeries):
            return metrix
        if isinstance(metrix, dict):
            return TimeSeries(metrix)
        return TimeSeries(utils.read_csv(metrix))

    def _analyze(self):
        """
    Analyzes if a matrix has anomalies.
    If any anomaly is found, determine if the matrix correlates with any other matrixes.
    To be implemented.
    """
        output = defaultdict(list)
        output_by_name = defaultdict(list)
        scores = self.anomaly_detector.get_all_scores()

        if self.anomalies:
            for anomaly in self.anomalies:
                metrix_scores = scores
                start_t, end_t = anomaly.get_time_window()
                t = anomaly.exact_timestamp

                # Compute extended start timestamp and extended end timestamp.
                room = (end_t - start_t) / 2
                if not room:
                    room = 30
                extended_start_t = start_t - room
                extended_end_t = end_t + room
                metrix_scores_cropped = metrix_scores.crop(
                    extended_start_t, extended_end_t)

                # Adjust the two timestamps if not enough data points are included.
                while len(metrix_scores_cropped) < 2:
                    extended_start_t = extended_start_t - room
                    extended_end_t = extended_end_t + room
                    metrix_scores_cropped = metrix_scores.crop(
                        extended_start_t, extended_end_t)

                # Correlate with other metrics
                for entry in self.related_metrices:
                    try:
                        entry_correlation_result = Correlator(
                            self.metrix,
                            entry,
                            time_period=(extended_start_t, extended_end_t),
                            use_anomaly_score=True).get_correlation_result()
                        record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry
                        record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__
                        output[t].append(record)
                        output_by_name[entry].append(record_by_name)
                    except exceptions.NotEnoughDataPoints:
                        pass

        self.output = output
        self.output_by_name = output_by_name
Example #21
0
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
    self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}

    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_custom_algorithm(self):
    """
    Test passing a custom algorithm class
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)

  def test_diff_percent_threshold_algorithm(self):
    """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='diff_percent_threshold'))

  def test_sign_test_algorithm(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})
    ts = dict(bs)

    # test missing parameters
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='sign_test'))
    #
    # Simple tests
    algorithm_params = {'percent_threshold_upper': 20,
                      'percent_threshold_lower': -20,
                      'scan_window': 24,
                      'confidence': 0.01}

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    ts.update({t: 1.200001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline
    ts.update({t: 0.799999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by big gap
    ts.update(bs)
    ts.update({t: 0.799999 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(60, 84)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 2)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 30)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

    anomaly = anomalies[1]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 54)
    self.assertEquals(e, 89)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomalies separated by small gap
    ts.update(bs)
    ts.update({t: 1.21 for t in range(1, 25)})
    ts.update({t: 1.21 for t in range(30, 40)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    # just one
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 1)
    self.assertEquals(e, 40)

    # score ~ 99.9
    self.assertGreater(anomaly.anomaly_score, 99)

  def test_sign_test_algorithm_with_shift(self):
    """
    Test "sign test" algorithm with a threshold of 20%
    """
    bs = dict()
    bs.update({t: 1 for t in range(1, 100)})

    # Simple tests
    algorithm_params = {'percent_threshold_upper': 10,
                      'shift': 1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts = dict(bs)
    # bigger than 10 percent but below bias
    ts.update({t: 1.2 for t in range(10, 34)})

    # first no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # Next one anomaly exactly equal to scan window
    # uses bias
    ts.update({t: 2.100001 for t in range(10, 34)})
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()

    # note the anomaly is larger than scan window
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

    # anomaly below baseline but not below baseline with shift
    algorithm_params = {'percent_threshold_lower': -20,
                      'shift': -0.1,
                      'scan_window': 24,
                      'confidence': 0.01}
    ts.update({t: 0.799999 for t in range(10, 34)})
    # no anomalies
    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()

    self.assertTrue(anomalies is not None)
    self.assertEquals(len(anomalies), 0)

    # lower the time series by 0.1
    ts.update({t: 0.699999 for t in range(10, 34)})

    detector = AnomalyDetector(ts, baseline_time_series=bs, algorithm_name='sign_test',
                               algorithm_params=algorithm_params)
    anomalies = detector.get_anomalies()
    self.assertEquals(len(anomalies), 1)
    anomaly = anomalies[0]
    s, e = anomaly.get_time_window()
    self.assertEquals(s, 4)
    self.assertEquals(e, 39)

    # score should be roughly 98.5
    self.assertGreater(anomaly.anomaly_score, 98)
    self.assertLess(anomaly.anomaly_score, 99)

  def test_absolute_threshold_algorithm(self):
    """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
    detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                               algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                 'absolute_threshold_value_lower': 0.2})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))

  def test_threshold(self):
    """
    Test score threshold=0
    """
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    """
    Test that score_only parameter doesn't give anomalies
    """
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(len(detector.get_anomalies()) == 0)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Example #22
0
class RCA(object):
  def __init__(self, metrix, related_metrices):
    """
    Initializer
    :param metrix: a TimeSeries, a dictionary or a path to a csv file(str)
    :param list related_metrixes: a list of time series.
    """
    self.metrix = self._load(metrix)
    self.anomaly_detector = AnomalyDetector(metrix)
    self.related_metrices = related_metrices
    self.anomalies = self.anomaly_detector.get_anomalies()
    self._analyze()

  def _load(self, metrix):
    """
    Load time series.
    :param timeseries: a TimeSeries, a dictionary or a path to a csv file(str).
    :return TimeSeries: a TimeSeries object.
    """
    if isinstance(metrix, TimeSeries):
      return metrix
    if isinstance(metrix, dict):
      return TimeSeries(metrix)
    return TimeSeries(utils.read_csv(metrix))

  def _analyze(self):
    """
    Analyzes if a matrix has anomalies.
    If any anomaly is found, determine if the matrix correlates with any other matrixes.
    To be implemented.
    """
    output = defaultdict(list)
    output_by_name = defaultdict(list)
    scores = self.anomaly_detector.get_all_scores()

    if self.anomalies:
      for anomaly in self.anomalies:
        metrix_scores = scores
        start_t, end_t = anomaly.get_time_window()
        t = anomaly.exact_timestamp

        # Compute extended start timestamp and extended end timestamp.
        room = (end_t - start_t) / 2
        if not room:
          room = 30
        extended_start_t = start_t - room
        extended_end_t = end_t + room
        metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t)

        # Adjust the two timestamps if not enough data points are included.
        while len(metrix_scores_cropped) < 2:
          extended_start_t = extended_start_t - room
          extended_end_t = extended_end_t + room
          metrix_scores_cropped = metrix_scores.crop(extended_start_t, extended_end_t)

        # Correlate with other metrics
        for entry in self.related_metrices:
          try:
            entry_correlation_result = Correlator(self.metrix, entry, time_period=(extended_start_t, extended_end_t),
                                                  use_anomaly_score=True).get_correlation_result()
            record = extended_start_t, extended_end_t, entry_correlation_result.__dict__, entry
            record_by_name = extended_start_t, extended_end_t, entry_correlation_result.__dict__
            output[t].append(record)
            output_by_name[entry].append(record_by_name)
          except exceptions.NotEnoughDataPoints:
            pass

    self.output = output
    self.output_by_name = output_by_name
class TestAnomalyDetector(unittest.TestCase):

  def setUp(self):
    self.s1 = {0: 0, 1: 0, 2: 0, 3: 0, 4: 1, 5: 2, 6: 2, 7: 2, 8: 0}
    self.s2 = {0: 0, 1: 1, 2: 2, 3: 2, 4: 2, 5: 0, 6: 0, 7: 0, 8: 0}
    self.detector1 = AnomalyDetector(self.s1)
    self.detector2 = AnomalyDetector(self.s2)

  def test_custom_algorithm(self):
    """
    Test passing a custom algorithm class
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_class=CustomAlgo,
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)

  def test_diff_percent_threshold_algorithm(self):
    """
    Test "diff percent threshold" algorithm with a threshold of 20%
    """
    detector = AnomalyDetector(self.s1, baseline_time_series=self.s2, algorithm_name='diff_percent_threshold',
                               algorithm_params={'percent_threshold_upper': 20, 'percent_threshold_lower': -20})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, baseline_time_series=self.s2,
                                              algorithm_name='diff_percent_threshold'))

  def test_absolute_threshold_algorithm(self):
    """
    Test "absolute threshold" algorithm with a upper and lower threshold of 0.2
    """
    detector = AnomalyDetector(self.s1, algorithm_name='absolute_threshold',
                               algorithm_params={'absolute_threshold_value_upper': 0.2,
                                                 'absolute_threshold_value_lower': 0.2})
    anomalies = detector.get_anomalies()
    self.assertTrue(anomalies is not None)
    self.assertTrue(len(anomalies) > 0)
    self.assertRaises(exceptions.RequiredParametersNotPassed,
                      lambda: AnomalyDetector(self.s1, algorithm_name='absolute_threshold'))

  def test_threshold(self):
    """
    Test score threshold=0
    """
    detector = AnomalyDetector(self.s1, score_threshold=0)
    self.assertTrue(len(detector.get_anomalies()) == 1)
    self.assertTrue(detector.get_anomalies() is not None)

  def test_score_only(self):
    """
    Test that score_only parameter doesn't give anomalies
    """
    detector = AnomalyDetector(self.s1, score_only=True, algorithm_name='derivative_detector')
    detector2 = AnomalyDetector(self.s1, algorithm_name='derivative_detector')
    self.assertTrue(detector2.get_anomalies() is not None)
    self.assertTrue(len(detector.get_anomalies()) == 0)

  def test_get_all_scores(self):
    """
    Test if function get_all_scores works as expected.
    """
    self.assertTrue(isinstance(self.detector1.get_all_scores(), TimeSeries))
    self.assertEqual(len(self.detector1.get_all_scores()), len(self.detector1.time_series))

  def test_get_anomalies(self):
    """
    Test if anomaly is found as expected.
    """
    self.assertTrue(self.detector1.get_anomalies() is not None)

  def test_algorithm_DefaultDetector(self):
    """
    Test if optional parameter algorithm works as expected.
    """
    detector = AnomalyDetector(self.s1, algorithm_name='default_detector')
    self.assertEqual(detector.get_all_scores().timestamps, self.detector1.get_all_scores().timestamps)
    self.assertEqual(detector.get_all_scores().values, self.detector1.get_all_scores().values)

  def test_algorithm(self):
    """
    Test if exception AlgorithmNotFound is raised as expected.
    """
    self.assertRaises(exceptions.AlgorithmNotFound, lambda: AnomalyDetector(self.s1, algorithm_name='NotValidAlgorithm'))

  def test_algorithm_params(self):
    """
    Test if optional parameter algorithm_params works as expected.
    """
    self.assertRaises(ValueError, lambda: AnomalyDetector(self.s1, algorithm_name='exp_avg_detector', algorithm_params='0'))
    detector = AnomalyDetector(self.s1, algorithm_name="exp_avg_detector", algorithm_params={'smoothing_factor': 0.3})
    self.assertNotEqual(self.detector1.get_all_scores().values, detector.get_all_scores().values)

  def test_anomaly_threshold(self):
    """
    Test if score_percentile_threshold works as expected.
    """
    detector = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='exp_avg_detector')
    detector1 = AnomalyDetector(self.s1, score_percent_threshold=0.1, algorithm_name='derivative_detector')
    self.assertNotEqual(detector1.get_anomalies(), detector.get_anomalies())
Example #24
0
def get_timeseries(
        phenomenon_time_range,
        num_time_slots,
        get_observations,
        detector_method='bitmap_detector',  # LinkedIn bitmap
        detector_params={
            "precision": 8,
            "lag_window_size": 20,
            "future_window_size": 20,
            "chunk_size": 2
        },
        anomaly_breaks=DEFAULT_ANOMALY_BREAKS,
        value_breaks=DEFAULT_VALUE_BREAKS,
        extend_range=True,
        baseline_time_range=None,
        shift=True,
        use_baseline=True):
    #observations = get_observations(3, 5)
    #observations = get_observations(0, 0)

    # if baseline_time_range is not None:
    # use_baseline = True
    #     baseline_time_series = observation_provider_model.objects.filter(
    #         phenomenon_time_range__contained_by=baseline_time_range,
    #         phenomenon_time_range__duration=frequency,
    #         phenomenon_time_range__matches=frequency,
    #         observed_property=observed_property,
    #         procedure=process,
    #         feature_of_interest=feature_of_interest
    #     )
    #     baseline_reduced = {obs.phenomenon_time_range.lower.timestamp(): obs.result for obs in baseline_time_series}

    lower_ext = 0
    upper_ext = 0

    if extend_range:
        lower_ext = detector_params["lag_window_size"]
        upper_ext = detector_params["future_window_size"]

        if use_baseline and shift:
            upper_ext = 0

        if use_baseline and not shift:
            lower_ext = int(upper_ext / 2)
            upper_ext -= lower_ext + 1

    observations = get_observations(lower_ext, upper_ext)

    if not isinstance(observations, list):
        raise Exception('property_values should be array')

    if len(observations) == 0:
        return {
            'phenomenon_time_range': DateTimeTZRange(),
            'property_values': [],
            'property_value_percentiles': {},
            'property_anomaly_rates': [],
            'property_anomaly_percentiles': {},
        }

    property_values = observations_to_property_values(observations)

    VALID_VALUES_LENGTH = len(property_values) - property_values.count(None)

    if VALID_VALUES_LENGTH == 1:
        return {
            'phenomenon_time_range': phenomenon_time_range,
            'property_values': property_values,
            'property_value_percentiles': {
                50: property_values[0]
            },
            'property_anomaly_rates': [0],
            'property_anomaly_percentiles': {
                0: 0
            },
        }

    MINIMAL_POINTS_IN_WINDOWS = DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS

    if use_baseline:
        MINIMAL_POINTS_IN_WINDOWS /= 2

    # if VALID_VALUES_LENGTH <= MINIMAL_POINTS_IN_WINDOWS:
    #     # warn the user?

    WINDOW_LENGTH = detector_params[
        "future_window_size"] if use_baseline else detector_params[
            "future_window_size"] + detector_params["lag_window_size"]

    if VALID_VALUES_LENGTH > MINIMAL_POINTS_IN_WINDOWS and VALID_VALUES_LENGTH <= WINDOW_LENGTH:
        detector_params["future_window_size"] = int(
            max(
                DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2,
                VALID_VALUES_LENGTH *
                DEFAULT_BITMAP_MOD_LEADING_WINDOW_SIZE_PCT))
        detector_params["lag_window_size"] = int(
            max(
                DEFAULT_BITMAP_MOD_MINIMAL_POINTS_IN_WINDOWS / 2,
                VALID_VALUES_LENGTH *
                DEFAULT_BITMAP_MOD_LAGGING_WINDOW_SIZE_PCT))

    property_value_percentiles = percentiles(
        property_values[lower_ext:lower_ext + num_time_slots], value_breaks)

    if use_baseline and baseline_time_range is None:
        baseline_time_series = observations
        baseline_reduced = {
            obs.phenomenon_time_range.lower.timestamp(): obs.result
            for obs in baseline_time_series
        }

    obs_reduced = {
        obs.phenomenon_time_range.lower.timestamp(): obs.result
        for obs in observations
    }

    if (VALID_VALUES_LENGTH <= 1):
        property_anomaly_rates = [
            0 if value is not None else value
            for value in property_values[lower_ext:lower_ext + num_time_slots]
        ]

        return {
            'phenomenon_time_range':
            phenomenon_time_range,
            'property_values':
            property_values[lower_ext:lower_ext + num_time_slots],
            'property_value_percentiles':
            property_value_percentiles,
            'property_anomaly_rates':
            property_anomaly_rates,
            'property_anomaly_percentiles': {
                0: 0
            },
        }

    try:
        baseline_reduced
    except NameError:
        detector = AnomalyDetector(obs_reduced,
                                   algorithm_name=detector_method,
                                   algorithm_params=detector_params,
                                   score_only=True)
    else:
        detector = AnomalyDetector(obs_reduced,
                                   baseline_reduced,
                                   algorithm_name=detector_method,
                                   algorithm_params=detector_params,
                                   score_only=True)

    property_anomaly_rates = detector.get_all_scores().values

    property_anomaly_percentiles = percentiles(
        property_anomaly_rates[lower_ext:lower_ext + num_time_slots],
        anomaly_breaks)

    for i in range(len(property_values)):
        if property_values[i] is None:
            property_anomaly_rates.insert(i, None)

    return {
        'phenomenon_time_range':
        phenomenon_time_range,
        'property_values':
        property_values[lower_ext:lower_ext + num_time_slots],
        'property_value_percentiles':
        property_value_percentiles,
        'property_anomaly_rates':
        property_anomaly_rates[lower_ext:lower_ext + num_time_slots],
        'property_anomaly_percentiles':
        property_anomaly_percentiles,
    }
Example #25
0
        ts = ts.loc[
            ts['stage_parallel'] ==
            'Puding Mixing #1']  # 'Puding Mixing #1', 'Sterilization #111', 'Storage tank #1'
        value_col = 'sensor_value'
        # TODO: ADDING ZEROS JUST FOR PLAYING. REMOVE LATER
        ts[value_col] = ts[value_col].transform(
            lambda x: x.fillna(x.median(), inplace=False))
    else:
        raise Exception('Unknown example.')

    # run anomaly detection algorithm
    keys = ts['epoch']
    values = ts[value_col]
    ts_dict = dict(zip(keys, values))

    algorithm_name = 'exp_avg_detector'

    anomaly_detector = AnomalyDetector(ts_dict, algorithm_name=algorithm_name)
    anomalies = anomaly_detector.get_anomalies()
    anomaly_scores = anomaly_detector.get_all_scores()

    # plot results
    plot_ts_and_anomalies(ts,
                          value_col,
                          anomalies,
                          anomaly_scores,
                          ts_only=False,
                          dir='/Users/yuval/Desktop/',
                          show=True,
                          plotly=True)