def test_strangeness_median_fit(self): strg = Strangeness(measure="median", k=10) X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) strg.fit(X) self.assertTrue(strg.h.med is not None) self.assertTrue(np.allclose(strg.h.med, [4, 5, 6])) self.assertEqual(strg.get([1, 2, 3]), 27**0.5)
def test_strangeness_predict_input_wrong(self): strg = Strangeness(measure="median", k=10) X = [[1, 2, 3], [4, 5, 6], [7, 8, 9]] strg.fit(X) with self.assertRaises(InputValidationError): strg.get([]) with self.assertRaises(InputValidationError): strg.get("foo")
def __init__(self, w_martingale=15, non_conformity="median", k=20, dev_threshold=0.6): utils.validate_individual_deviation_params(w_martingale, non_conformity, k, dev_threshold) self.w_martingale = w_martingale self.non_conformity = non_conformity self.k = k self.dev_threshold = dev_threshold self.strg = Strangeness(non_conformity, k) self.scores = [] self.T, self.S, self.P, self.M = [], [], [], [] self.mart = 0 self.marts = [0, 0, 0]
def test_strangeness_fit_input_wrong(self): strg = Strangeness(measure="median", k=10) with self.assertRaises(InputValidationError): strg.fit([]) with self.assertRaises(InputValidationError): strg.fit("foo")
def test_strangeness_knn_fit(self): strg = Strangeness(measure="knn", k=2) X = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) strg.fit(X) self.assertTrue(strg.h.X is not None) self.assertEqual(strg.get([7, 8, 9]), 27**0.5 / 2)
def test_strangeness_knn_not_fitted(self): with self.assertRaises(NotFittedError): strg = Strangeness(measure="knn", k=10) strg.get([1, 2, 3])
def test_strangeness_wrong_measure(self): with self.assertRaises(InputValidationError): strg = Strangeness(measure="foo", k=10)
def test_strangeness_knn_wrong_k(self): with self.assertRaises(InputValidationError): strg = Strangeness(measure="knn", k=0)
def test_strangeness_knn_general(self): strg = Strangeness(measure="knn", k=10) self.assertEqual(strg.h.X, None) self.assertFalse(strg.is_fitted())
def test_strangeness_median_general(self): strg = Strangeness(measure="median", k=10) self.assertEqual(strg.h.med, None) self.assertFalse(strg.is_fitted())
class IndividualDeviation: '''Deviation detection for a single/individual unit Parameters: ---------- w_martingale : int Window used to compute the deviation level based on the last w_martingale samples. non_conformity : string Strangeness (or non-conformity) measure used to compute the deviation level. It must be either "median" or "knn" k : int Parameter used for k-nearest neighbours, when non_conformity is set to "knn" dev_threshold : float Threshold in [0,1] on the deviation level ''' def __init__(self, w_martingale=15, non_conformity="median", k=20, dev_threshold=0.6): utils.validate_individual_deviation_params(w_martingale, non_conformity, k, dev_threshold) self.w_martingale = w_martingale self.non_conformity = non_conformity self.k = k self.dev_threshold = dev_threshold self.strg = Strangeness(non_conformity, k) self.scores = [] self.T, self.S, self.P, self.M = [], [], [], [] self.mart = 0 self.marts = [0, 0, 0] # =========================================== def fit(self, X): '''Fit the anomaly detector to the data X (assumed to be normal) Parameters: ----------- X : array-like, shape (n_samples, n_features) Samples assumed to be not deviating from normal Returns: -------- self : object ''' self.strg.fit(X) self.scores = self.strg.get_fit_scores() return self # =========================================== def predict(self, dtime, x): '''Update the deviation level based on the new test sample x Parameters: ----------- dtime : datetime datetime corresponding to the sample x x : array-like, shape (n_features,) Sample for which the strangeness, p-value and deviation level are computed Returns: -------- strangeness : float Strangeness of x with respect to samples in Xref pval : float, in [0, 1] p-value that represents the proportion of samples in Xref that are stranger than x. deviation : float, in [0, 1] Normalized deviation level updated based on the last w_martingale steps ''' self.T.append(dtime) # TODO: this is not necessarily required strangeness = self.strg.get(x) self.S.append(strangeness) pval = pvalue(strangeness, self.scores) self.P.append(pval) deviation = self._update_martingale(pval) self.M.append(deviation) is_deviating = deviation > self.dev_threshold return DeviationContext(strangeness, pval, deviation, is_deviating) # =========================================== def _update_martingale(self, pval): '''Incremental additive martingale over the last w_martingale steps. Parameters: ----------- pval : int, in [0, 1] The most recent p-value Returns: -------- normalized_one_sided_mart : float, in [0, 1] Deviation level. A normalized version of the current martingale value. ''' betting = lambda p: -p + .5 self.mart += betting(pval) self.marts.append(self.mart) w = min(self.w_martingale, len(self.marts)) mat_in_window = self.mart - self.marts[-w] normalized_mart = (mat_in_window) / (.5 * w) normalized_one_sided_mart = max(normalized_mart, 0) return normalized_one_sided_mart # =========================================== def plot_deviations(self): '''Plots the p-value and deviation level over time. ''' plt.scatter(self.T, self.P, marker=".") plt.plot(self.T, self.M) plt.axhline(y=self.dev_threshold, color='r', linestyle='--') plt.show()