def test1(): x = np.random.uniform(100, 500, size=(10000, 3)) x_test = np.random.uniform(100, 500, size=(200000, 3)) cdt = GaussianCusum(arl=100, window_size=100) cdt.fit(x, estimate_threshold=True, len_simulation=1e3) pred, cum_sum = cdt.predict(x_test, reset=True) pred = np.array(pred).astype(int) y_true = np.zeros((1,20000)) y_pred = pred # y_pred = pred.reshape(-1,1000).mean(-1).round().reshape(-1) import matplotlib.pyplot as plt plt.plot(cum_sum) plt.axhline(cdt.threshold) plt.show() from sklearn.metrics import confusion_matrix # # tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() # tpr = tp / (tp + fn) # fpr = fp / (fp + tn) return
def precomp_threshold(dof, len_sim=1e5, beta=.75): """ Generates once for all certain common thresholds. :param dof: list of degrees of freedom. :param len_sim: length of the simulated sequence (default is 1e5). :param beta: sensitivity parameter (default is .75) """ for d in dof: cdt = GaussianCusum(arl=None, beta=beta) cdt.fit(x=np.zeros((1, d)), estimate_threshold=True, len_simulation=len_sim, verbose=True, precompute_thresholds=True)
def test3(): from cdg.graph import DelaunayGraphs, convert no_nodes = 5 no_graphs = {0: 500, 8: 50} model = DelaunayGraphs() G = model.get(seed_points=no_nodes, classes=list(no_graphs.keys()), no_graphs=no_graphs, sigma=.3, include_seed_graph=False) from cdg.graph.distance import GraphEditDistanceNX ged = GraphEditDistanceNX(node_cost='euclidean', n_jobs=2) Gnx = convert(G[0] + G[8], format_in='cdg', format_out='nx') G_train, G_test = Gnx[:50], Gnx[50:] from cdg.embedding import MultiDimensionalScaling mds = MultiDimensionalScaling(emb_dim=2, nprot=5) mds.fit(graphs=G_train, dist_fun=ged.get_measure_fun(verbose=True)) x = mds.transform(G_test) from cdg.changedetection import GaussianCusum cdt = GaussianCusum(window_size=5, arl=20) cdt.fit(x[:100]) y, g = cdt.predict(x, reset=False)
def test_cusum_alarm_curve(): from tqdm import tqdm import numpy as np # np.random.seed(20190225) d=3 n=10000 arl=30 mu = np.random.randn(d) sigma = np.eye(d) + np.random.rand(d, d) # Sigma += Sigma.transpose() from cdg.changedetection import GaussianCusum cdt = GaussianCusum(arl=arl, window_size=10) for i in range(2): x_train = mu + np.dot(np.random.randn(n, d), sigma.transpose()) y_train = x_train[:, 0] * 0 x = mu + np.dot(np.random.randn(n, d), sigma.transpose()) y_true = x[:, 0] * 0 if i == 0: cdt.fit(x, estimate_threshold=True) th_true = cdt.threshold gamma_true = cdt.gamma else: # cdt.fit(x, estimate_threshold=True, threshold_type='data') cdt.fit(x, estimate_threshold=True, gamma_type='data', threshold_type='data') print('thresholds:\ttrue={}\test={}'.format(th_true, cdt.threshold)) # cdt.fit(x, gamma_type='data') # print(f'gamma:\ttrue={gamma_true}\test={cdt.gamma}') # th_true = cdt.threshold # # _, _, th = cusum_alarm_curve(cusum=cdt, sequence=x_train, arl=arl, y_true=y_train, verbose=True)[0] # cdt._mu_0 = mu # cdt._s2_0inv = np.linalg.inv(np.dot(sigma, sigma.transpose())) y_predict, cumulative_sums = cdt.predict(x, reset=True, verbose=False) print(np.sum(y_predict)/n)
def test2(): import numpy as np N = 400 N_train = 100 N_change = 320 alpha = 0.01 x = np.random.normal(size=(N, 1)) x[N_change:] += 1. from cdg.changedetection import GaussianCusum cdt = GaussianCusum(arl=round(1. / alpha)) cdt.fit(x[:N_train]) y, g = cdt.predict(x, reset=False) cdt.reset() print(cdt.threshold) cdt.fit(x[:N_train]) for t in range(N): alarm, _ = cdt.iterate(x[t:t + 1]) if alarm: print("An alarm is raised at time {}".format(t)) cdt.reset()
def demo(): import matplotlib.pyplot as plt from scipy.stats import multivariate_normal import cdg.embedding np.random.seed(123) # setup sample_size = 5000 arl = 100 win_size = 10 cusum = [] data_train = [] data_test = [] # create two multivariate distributions rv1 = multivariate_normal(mean=[0., 0.], cov=[[1., 0.], [0., .2]]) rv2 = multivariate_normal(mean=[0., 0.2], cov=[[1., 0.], [0., 1.]]) training_stream = rv1.rvs(size=1000) x1 = rv1.rvs(size=int(sample_size / 5 * 4)) x2 = rv2.rvs(size=int(sample_size / 5)) test_stream = np.concatenate((x1, x2), axis=0) # univariate training_stream_uni = training_stream[:, :1] test_stream_uni = test_stream[:, :1] # # euclidean data # man_euc = cdg.embedding.ccm.EuclideanManifold() # tmp = np.random.rand(1, 3) * 5 # wlog generate a mean # true_mean = man_tmp.clip(X_mat=tmp, radius=man_tmp.radius) # stream_euc_tr = man_tmp.exp_map(x0_mat=true_mean, Nu_mat=training_stream) # stream_euc_te = man_tmp.exp_map(x0_mat=true_mean, Nu_mat=test_stream) # spherical data man_sph = cdg.geometry.SphericalManifold(man_dim=2, radius=3) tmp = np.random.rand(1, 3) * 5 # wlog generate a mean true_mean = man_sph.clip(X_mat=tmp, radius=man_sph.radius) stream_sph_tr = man_sph.exp_map(x0_mat=true_mean, Nu_mat=training_stream) stream_sph_te = man_sph.exp_map(x0_mat=true_mean, Nu_mat=test_stream) # hyperbolic data man_hyp = cdg.geometry.HyperbolicManifold(man_dim=2, radius=3) tmp = np.random.rand(1, 3) * 5 # wlog generate a mean true_mean = man_hyp.clip(X_mat=tmp, radius=man_hyp.radius) stream_hyp_tr = man_hyp.exp_map(x0_mat=true_mean, Nu_mat=training_stream) stream_hyp_te = man_hyp.exp_map(x0_mat=true_mean, Nu_mat=test_stream) # gaussian no window cusum.append(GaussianCusum(arl=arl)) data_train.append(training_stream) data_test.append(test_stream) # gaussian windowed cusum.append(GaussianCusum(arl=arl, window_size=win_size)) data_train.append(training_stream) data_test.append(test_stream) for i in range(2): cusum.append(None) data_train.append(None) data_test.append(None) # lower cusum.append(LowerCusum(arl=arl)) data_train.append(training_stream_uni) data_test.append(test_stream_uni) # greater cusum.append(GreaterCusum(arl=arl)) data_train.append(training_stream_uni) data_test.append(test_stream_uni) # two-sided cusum.append(TwoSidedCusum(arl=arl)) data_train.append(training_stream_uni) data_test.append(test_stream_uni) # bonferroni on different cusum bonf_cusum = BonferroniCusum(arl=arl, cusum_list=[LowerCusum(arl=arl), TwoSidedCusum(arl=arl)]) #, # GreaterCusum(arl=arl)]) cusum.append(bonf_cusum) data_train.append(training_stream_uni) data_test.append(test_stream_uni) # euclidean windowed # cusum_euc = ManifoldCLTCusum(arl=arl, manifold=man_euc, window_size=win_size) cusum_euc = GaussianCusum(arl=arl, window_size=win_size) cusum.append(cusum_euc) data_train.append(training_stream) data_test.append(test_stream) # spherica windowed cusum_sph = ManifoldCLTCusum(arl=arl, manifold=man_sph, window_size=win_size) cusum.append(cusum_sph) data_train.append(stream_sph_tr) data_test.append(stream_sph_te) # hyperbolic windowed cusum_hyp = ManifoldCLTCusum(arl=arl, manifold=man_hyp, window_size=win_size) cusum.append(cusum_hyp) data_train.append(stream_hyp_tr) data_test.append(stream_hyp_te) # bonferroni on different cusum bonf_cusum = BonferroniCusum(arl=arl, cusum_list=[cusum_euc, cusum_sph, cusum_hyp]) cusum.append(bonf_cusum) data_train.append([training_stream, stream_sph_tr, stream_hyp_tr]) data_test.append([test_stream, stream_sph_te, stream_hyp_te]) fig1 = plt.figure() for ci in range(len(cusum)): if not cusum[ci] is None: cusum[ci].fit(data_train[ci], estimate_threshold=True, len_simulation=1000) y_pred, gg = cusum[ci].predict(data_test[ci], reset=False) gg = np.mean(gg, axis=1) # only necessary for bonferroni sp = fig1.add_subplot(3,4, 1 + ci) sp.plot(y_pred*max(gg), '+k') sp.plot(gg, label='g') sp.plot([cusum[ci].threshold] * len(gg), label='h') sp.grid(True) sp.set_title(str(type(cusum[ci]))[-20:]) plt.show()
def _d_cdt(_path, _c): _id = _path.split('/')[-2] tpr_avg = [] fpr_avg = [] auc_avg = [] run = 0 skipped = 0 crashed = False while run < P['N_RUNS'] and (skipped < 100 or skipped / (run + skipped) < 0.9): # Read data data = dataset_load(_path) try: nominal, live, labels = data except: live, labels = data nominal = live[labels == 0].copy() live = live[(labels == 0) | (labels == _c)] labels = labels[(labels == 0) | (labels == _c)] labels[labels != 0] = 1 CUSUM_WINDOW_SIZE = int(nominal.shape[0] * P['CUSUM_WINDOW_RATIO']) cut = CUSUM_WINDOW_SIZE * (nominal.shape[0] // CUSUM_WINDOW_SIZE) nominal = nominal[:cut] cut = CUSUM_WINDOW_SIZE * (labels.shape[0] // CUSUM_WINDOW_SIZE) live = live[:cut] labels = labels[:cut] live_n = live[labels == 0].copy() live_nn = live[labels == 1].copy() live = np.vstack((live_n, live_nn)) # Compute distances distances_nom = [] distances_test = [] try: for i_, r_ in enumerate(P['radius']): start = i_ * P['latent_space'] stop = start + P['latent_space'] if r_ > 0.: # Spherical s_mean = SphericalManifold.sample_mean(nominal[:, start:stop], radius=r_) d_nom = SphericalManifold.distance(nominal[:, start:stop], s_mean, radius=r_) d_test = SphericalManifold.distance(live[:, start:stop], s_mean, radius=r_) elif r_ < 0.: # Hyperbolic s_mean = HyperbolicManifold.sample_mean( nominal[:, start:stop], radius=-r_) d_nom = HyperbolicManifold.distance(nominal[:, start:stop], s_mean, radius=-r_) d_test = HyperbolicManifold.distance(live[:, start:stop], s_mean, radius=-r_) else: # Euclidean s_mean = np.mean(nominal[:, start:stop], 0) d_nom = np.linalg.norm(nominal[:, start:stop] - s_mean, axis=-1)[..., None] d_test = np.linalg.norm(live[:, start:stop] - s_mean, axis=-1)[..., None] distances_nom.append(d_nom) distances_test.append(d_test) except FloatingPointError: print('D-CDT: FloatingPointError') skipped += 1 continue # Combined distances_nom = np.concatenate(distances_nom, -1) distances_test = np.concatenate(distances_test, -1) # Change detection cdt = GaussianCusum(arl=P['CUSUM_ARL'], window_size=CUSUM_WINDOW_SIZE) cdt.fit(distances_nom, estimate_threshold=True, len_simulation=P['CUSUM_SIM_LEN']) pred, cum_sum = cdt.predict(distances_test, reset=True) pred = np.array(pred).astype(int) y_true = labels.reshape(-1, CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1) y_pred = pred.reshape(-1, CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1) tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() tpr = tp / (tp + fn) fpr = fp / (fp + tn) auc, _ = detection_score(y_pred, y_true) if auc > 0.: tpr_avg.append(tpr) fpr_avg.append(fpr) auc_avg.append(auc) run += 1 else: print('No true positive predictions') skipped += 1 if len(auc_avg) == 0 or np.isnan(np.mean(auc_avg)): crashed = True result_str = 'crashed' if crashed else 'TPR: {:.5f} FPR: {:.5f} - AUC: {:.3f}'.format( np.mean(tpr_avg), np.mean(fpr_avg), np.mean(auc_avg)) print('Done: {} {} - {}'.format(_id, _c, result_str)) if not crashed: return (_id, _c, np.mean(tpr_avg), np.std(tpr_avg), np.mean(fpr_avg), np.std(fpr_avg), np.mean(auc_avg), np.std(auc_avg)) else: return _id, _c, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan
def _r_cdt(_path, _c): _id = _path.split('/')[-2] tpr_avg = [] fpr_avg = [] auc_avg = [] run = 0 skipped = 0 crashed = False while run < P['N_RUNS'] and (skipped < 100 or skipped / (run + skipped) < 0.9): # Read data data = dataset_load(_path) try: nominal, live, labels = data except: live, labels = data nominal = live[labels == 0].copy() live = live[(labels == 0) | (labels == _c)] labels = labels[(labels == 0) | (labels == _c)] labels[labels != 0] = 1 CUSUM_WINDOW_SIZE = int(nominal.shape[0] * P['CUSUM_WINDOW_RATIO']) cut = CUSUM_WINDOW_SIZE * (nominal.shape[0] // CUSUM_WINDOW_SIZE) nominal = nominal[:cut] cut = CUSUM_WINDOW_SIZE * (labels.shape[0] // CUSUM_WINDOW_SIZE) live = live[:cut] labels = labels[:cut] live_n = live[labels == 0].copy() live_nn = live[labels == 1].copy() live = np.vstack((live_n, live_nn)) # Change detection cusum_list = [] indices = [] for i_, r_ in enumerate(P['radius']): start = i_ * P['latent_space'] stop = start + P['latent_space'] indices.append((start, stop)) if r_ < 0.: # Hyperbolic man_tmp = HyperbolicManifold(radius=-r_) cusum_list.append( ManifoldCLTCusum(arl=P['CUSUM_ARL'], manifold=man_tmp, window_size=CUSUM_WINDOW_SIZE)) elif r_ > 0.: # Spherical man_tmp = SphericalManifold(radius=r_) cusum_list.append( ManifoldCLTCusum(arl=P['CUSUM_ARL'], manifold=man_tmp, window_size=CUSUM_WINDOW_SIZE)) else: # Euclidean cusum_list.append( GaussianCusum(arl=P['CUSUM_ARL'], window_size=CUSUM_WINDOW_SIZE)) # Bonferroni on different cdt = BonferroniCusum(cusum_list=cusum_list, arl=P['CUSUM_ARL'] // len(P['radius'])) try: cdt.fit([nominal[..., start:stop] for start, stop in indices], estimate_threshold=True, len_simulation=P['CUSUM_SIM_LEN'], radia=P['radius']) except FloatingPointError: print('R-CDT: FloatingPointError') skipped += 1 continue pred, cum_sum = cdt.predict( [live[..., start:stop] for start, stop in indices], reset=True) pred = np.array(pred).astype(int) y_true = labels.reshape(-1, CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1) y_pred = pred.reshape(-1, CUSUM_WINDOW_SIZE).mean(-1).round().reshape(-1) tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel() tpr = tp / (tp + fn) fpr = fp / (fp + tn) auc, _ = detection_score(y_pred, y_true) if auc > 0.: tpr_avg.append(tpr) fpr_avg.append(fpr) auc_avg.append(auc) run += 1 else: print('No true positive predictions') skipped += 1 if len(auc_avg) == 0 or np.isnan(np.mean(auc_avg)): crashed = True result_str = 'crashed' if crashed else 'TPR: {:.5f} FPR: {:.5f} - AUC: {:.3f}'.format( np.mean(tpr_avg), np.mean(fpr_avg), np.mean(auc_avg)) print('Done: {} {} - {}'.format(_id, _c, result_str)) if not crashed: return (_id, _c, np.mean(tpr_avg), np.std(tpr_avg), np.mean(fpr_avg), np.std(fpr_avg), np.mean(auc_avg), np.std(auc_avg)) else: return _id, _c, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan