def train_cosmo(self, data, w_martingale=15, non_conformity="median", k=20): df = data self.model = IndividualAnomalyInductive(w_martingale=w_martingale, non_conformity=non_conformity, k=k) # Fit the model to a fixed subset of the data X_fit = data.to_numpy() self.model.fit(X_fit)
def __init__(self, nb_units, ids_target_units, w_ref_group="7days", w_martingale=15, non_conformity="median", k=20, dev_threshold=.6, transform=False, w_transform=20): self.nb_units = nb_units self.ids_target_units = ids_target_units self.w_ref_group = w_ref_group self.w_martingale = w_martingale self.non_conformity = non_conformity self.k = k self.dev_threshold = dev_threshold self.transform = transform self.w_transform = w_transform self.dfs_original = [ pd.DataFrame(data=[], index=[]) for _ in range(nb_units) ] self.dfs = [pd.DataFrame(data=[], index=[]) for _ in range(nb_units)] self.pg = PeerGrouping(self.w_ref_group) self.detectors = [ IndividualAnomalyInductive(w_martingale, non_conformity, k, dev_threshold) for _ in range(nb_units) ] self.transformers = [ Transformer(w=w_transform) for _ in range(nb_units) ]
def test_predict_input_wrong(self): indev = IndividualAnomalyInductive(w_martingale=15, non_conformity="median", k=20, dev_threshold=0.6) indev.fit(np.array([[1,2,3], [4,5,6], [7,8,9]])) with self.assertRaises(InputValidationError): indev.predict(None, []) with self.assertRaises(InputValidationError): indev.predict(None, "foo")
def test_init(self): with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=0, non_conformity="median", k=20, dev_threshold=0.6) with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=-1, non_conformity="median", k=20, dev_threshold=0.6) with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=15, non_conformity="foo", k=20, dev_threshold=0.6) with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=15, non_conformity="knn", k=0, dev_threshold=0.6) with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=15, non_conformity="knn", k=-1, dev_threshold=0.6) with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=15, non_conformity="median", k=20, dev_threshold=-1) with self.assertRaises(InputValidationError): IndividualAnomalyInductive(w_martingale=15, non_conformity="median", k=20, dev_threshold=2)
def test_predict_not_fitted(self): indev = IndividualAnomalyInductive(w_martingale=15, non_conformity="median", k=20, dev_threshold=0.6) with self.assertRaises(NotFittedError): indev.predict(None, [1,1,1])
def test_predict_median_w3(self): indev = IndividualAnomalyInductive(w_martingale=3, non_conformity="median", k=20, dev_threshold=0.6) indev.fit(np.array([[1,2,3], [4,5,6], [7,8,9]])) res = indev.predict(None, [1,1,1]) expected = DeviationContext((3**2+4**2+5**2)**0.5, 0, 1/3, False) self.assertEqual(res, expected)
def test_fit_knn_k2(self): indev = IndividualAnomalyInductive(w_martingale=15, non_conformity="knn", k=2, dev_threshold=0.6) indev.fit(np.array([[1,2,3], [4,5,6], [7,8,9]])) expected = [ (0 + 3**(3/2)) / 2, (3**(3/2) + 0) / 2, (3**(3/2) + 0) / 2 ] self.assertEqual(indev.scores, expected) self.assertTrue(np.allclose(indev.scores, expected))
def test_fit_median(self): indev = IndividualAnomalyInductive(w_martingale=15, non_conformity="median", k=20, dev_threshold=0.6) indev.fit(np.array([[1,2,3], [4,5,6], [7,8,9]])) expected = [3**(3/2), 0, 3**(3/2)] self.assertEqual(indev.scores, expected) self.assertTrue(np.allclose(indev.scores, expected))
def test_fit_input_empty(self): indev = IndividualAnomalyInductive(w_martingale=15, non_conformity="median", k=20, dev_threshold=0.6) with self.assertRaises(InputValidationError): indev.fit([])
generator = bb.generate_samples( cam=args.cam_id, rs=args.resample_time ) # Choose between the Transductive or Inductive version. ref_groups_list = args.ref_group.split(",") if args.type == "T": indev = IndividualAnomalyTransductive(w_martingale=args.martingale, # Window size for computing the deviation level non_conformity=args.measure, # Strangeness measure: "median","knn","lof" k=15, # Used if non_conformity is "knn" dev_threshold=args.dev_threshold, # Threshold on the deviation level ref_group=ref_groups_list, #ref_group="external", ) # reference group construction: "week", "month", "season", "external" else: indev = IndividualAnomalyInductive( w_martingale=args.martingale,# Window size for computing the deviation level non_conformity=args.measure, # Strangeness measure: "median" or "knn" or "lof" k=50, # Used if non_conformity is "knn" dev_threshold=args.dev_threshold) # Train # Training consists of pushing one week of data through the algorithm. # (Pretending we are "live") # Can be switched off. # if args.do_train: print( "--------" ) print( "Training" ) print( "--------" ) inside = False processed = 0 training = [] sequence = 0
class AnomalyDetection: def __init__(self): pass def deviation_detection(self, data, mu, sigma, l1=4, l2=8, l3=12): z_s = self.zscore(data, mu, sigma) if (len(z_s.shape) > 1): z_s = z_s[:, 0] t = np.linspace(0, len(z_s) - 1, len(z_s)) thres1 = l1 * sigma thres2 = l2 * sigma thres3 = l3 * sigma plt.scatter(t[np.where(z_s <= thres1)], z_s[np.where(z_s <= thres1)], color='y', label='Normal', alpha=0.3, edgecolors='none') plt.scatter(t[np.where((z_s > thres1) & (z_s <= thres2))], z_s[np.where((z_s > thres1) & (z_s <= thres2))], color='b', label='L1 Threshold', alpha=0.3, edgecolors='none') plt.scatter(t[np.where((z_s > thres2) & (z_s <= thres3))], z_s[np.where((z_s > thres2) & (z_s <= thres3))], color='g', label='L2 Threshold', alpha=0.3, edgecolors='none') plt.scatter(t[np.where(z_s > thres3)], z_s[np.where(z_s > thres3)], color='r', label='Anomalous points', alpha=0.3, edgecolors='none') plt.xlabel('Observation Signal (in samples)') plt.ylabel('Anomaly Score') plt.title('Anomaly Score Estimation') plt.legend() return z_s, sigma def train_cosmo(self, data, w_martingale=15, non_conformity="median", k=20): df = data self.model = IndividualAnomalyInductive(w_martingale=w_martingale, non_conformity=non_conformity, k=k) # Fit the model to a fixed subset of the data X_fit = data.to_numpy() self.model.fit(X_fit) def test_cosmo(self, data): cols = ['Strangeness', 'P-Values', 'Deviation'] lst_dict = [] df = data for t, x in zip(df.index, df.values): info = self.model.predict(t, x) lst_dict.append({ 'Strangeness': info.strangeness, 'P-Values': info.pvalue, 'Deviation': info.deviation }) # Plot strangeness and deviation level over time # gr = model.plot_deviations(figsize=(2000,2000)) df1 = pd.DataFrame(lst_dict, columns=cols) return df1['Strangeness'].to_numpy(), df1['P-Values'].to_numpy() def nonstationary_AD_cosmo(self, data, n, w_martingale, k, non_conformity="median", ref_group=["hour-of-day"]): df = self.data cols = ['Strangeness', 'P-Values', 'Deviation'] lst_dict = [] model = IndividualAnomalyTransductive( w_martingale= w_martingale, # Window size for computing the deviation level non_conformity= non_conformity, # Strangeness measure: "median" or "knn" k=k, # Used if non_conformity is "knn" ref_group=ref_group # Criteria for reference group construction ) for t, x in zip(df.index, df.values): info = model.predict(t, x) lst_dict.append({ 'Strangeness': info.strangeness, 'P-Values': info.pvalue, 'Deviation': info.deviation }) # Plot strangeness and deviation level over time gr = model.plot_deviations(figsize=(2000, 2000)) df1 = pd.DataFrame(lst_dict, columns=cols) return df1, gr
from grand.datasets import load_vehicles, load_artificial_toy from grand import IndividualAnomalyInductive if __name__ == '__main__': # Get data from one unit (vehicle) dataset = load_artificial_toy(0) #load_vehicles() unit1_train = [x for dt, x in dataset.stream_unit(1) ] # we use unit number 1 for training # Create an instance of IndividualAnomalyInductive indev = IndividualAnomalyInductive( w_martingale=15, # Window size for computing the deviation level non_conformity= "median", # Strangeness measure: "median" or "knn" or "lof" k=50, # Used if non_conformity is "knn" dev_threshold=.6) # Threshold on the deviation level # Fit the IndividualAnomalyInductive detector to unit1_train indev.fit(unit1_train) # At each time step dt, a data-point x comes from the stream of unit number 0 for dt, x in dataset.stream_unit(0): devContext = indev.predict(dt, x) st, pv, dev, isdev = devContext.strangeness, devContext.pvalue, devContext.deviation, devContext.is_deviating print("Time: {} ==> strangeness: {}, p-value: {}, deviation: {} ({})". format(dt, st, pv, dev, "high" if isdev else "low")) # Plot p-values and deviation level over time indev.plot_deviations()