def test_reference_window(test_path): from sklearn.utils import shuffle from pysad.models.integrations import ReferenceWindowModel from pysad.utils import Data from pysad.evaluation import AUROCMetric from pysad.utils import ArrayStreamer import os from pyod.models.iforest import IForest data = Data(os.path.join(test_path,"../../examples/data")) X_all, y_all = data.get_data("arrhythmia.mat") X_all, y_all = shuffle(X_all, y_all) model = ReferenceWindowModel(model_cls=IForest, window_size=240, sliding_size=30, initial_window_X=X_all[:100]) iterator = ArrayStreamer(shuffle=False) auroc = AUROCMetric() y_pred = [] for X, y in iterator.iter(X_all[100:], y_all[100:]): model.fit_partial(X) score = model.score_partial(X) y_pred.append(score) auroc.update(y, score) print("AUROC: ", auroc.get())
def test_one_fit(test_path): from sklearn.utils import shuffle from pysad.utils import Data from pysad.evaluation import AUROCMetric from pysad.utils import ArrayStreamer import os from pyod.models.iforest import IForest from pysad.models.integrations.one_fit_model import OneFitModel data = Data(os.path.join(test_path, "../../examples/data")) X_all, y_all = data.get_data("arrhythmia.mat") print(X_all, y_all) X_all, y_all = shuffle(X_all, y_all) model = OneFitModel(model_cls=IForest, initial_X=X_all[:100]) iterator = ArrayStreamer(shuffle=False) auroc = AUROCMetric() y_pred = [] for X, y in iterator.iter(X_all[100:], y_all[100:]): model.fit_partial(X) score = model.score_partial(X) y_pred.append(score) auroc.update(y, score) print("AUROC: ", auroc.get())
def ensembler_usage_example(): np.random.seed(61) # Fix random seed. data = Data("data") X_all, y_all = data.get_data("arrhythmia.mat") # Load Aryhytmia data. X_all, y_all = shuffle(X_all, y_all) # Shuffle data. iterator = ArrayStreamer( shuffle=False) # Create streamer to simulate streaming data. auroc = AUROCMetric( ) # Tracker of area under receiver-operating- characteristics curve metric. # Models to be ensembled. models = [xStream(), LODA()] ensembler = AverageScoreEnsembler() # Ensembler module. for X, y in tqdm(iterator.iter(X_all, y_all)): # Iterate over examples. model_scores = np.empty(len(models), dtype=np.float64) # Fit & Score via for each model. for i, model in enumerate(models): model.fit_partial(X) model_scores[i] = model.score_partial(X) score = ensembler.fit_transform_partial( model_scores) # Fit to ensembler model and get ensembled score. auroc.update(y, score) # Update AUROC metric. # Output score. print("AUROC: {}.".format(auroc.get()))
def full_usage_example(): np.random.seed(61) # Fix random seed. # Get data to stream. data = Data("data") X_all, y_all = data.get_data("arrhythmia.mat") X_all, y_all = shuffle(X_all, y_all) iterator = ArrayStreamer( shuffle=False) # Init streamer to simulate streaming data. model = xStream() # Init xStream anomaly detection model. preprocessor = InstanceUnitNormScaler() # Init normalizer. postprocessor = RunningAveragePostprocessor( window_size=5) # Init running average postprocessor. auroc = AUROCMetric( ) # Init area under receiver-operating- characteristics curve metric. for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])): # Stream data. X = preprocessor.fit_transform_partial( X) # Fit preprocessor to and transform the instance. score = model.fit_score_partial( X) # Fit model to and score the instance. score = postprocessor.fit_transform_partial( score) # Apply running averaging to the score. auroc.update(y, score) # Update AUROC metric. # Output resulting AUROCS metric. print("AUROC: {}.".format(auroc.get()))
def PyOD_integration_example(): np.random.seed(61) # Fix seed. # Get data to stream. data = Data("data") X_all, y_all = data.get_data("arrhythmia.mat") X_all, y_all = shuffle(X_all, y_all) iterator = ArrayStreamer(shuffle=False) # Fit reference window integration to first 100 instances initially. model = ReferenceWindowModel(model_cls=IForest, window_size=240, sliding_size=30, initial_window_X=X_all[:100]) auroc = AUROCMetric( ) # Init area under receiver-operating-characteristics curve metric tracker. for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])): model.fit_partial(X) # Fit to the instance. score = model.score_partial(X) # Score the instance. auroc.update(y, score) # Update the metric. # Output AUROC metric. print("AUROC: {}.".format(auroc.get()))
# Import modules. from pysad.evaluation import AUROCMetric from pysad.models import LODA from pysad.utils import Data model = LODA() # Init model metric = AUROCMetric( ) # Init area under receiver-operating- characteristics curve metric streaming_data = Data().get_iterator("arrhythmia.mat") # Get data streamer. for x, y_true in streaming_data: # Stream data. anomaly_score = model.fit_score_partial( x) # Fit the instance to model and score the instance. metric.update(y_true, anomaly_score) # Update the AUROC metric. # Output the resulting AUROCMetric. print(f"Area under ROC metric is {metric.get()}.")
if __name__ == "__main__": np.random.seed(61) # Fix random seed. # Get data to stream. data = Data("data") X_all, y_all = data.get_data("arrhythmia.mat") X_all, y_all = shuffle(X_all, y_all) iterator = ArrayStreamer( shuffle=False) # Init streamer to simulate streaming data. model = xStream() # Init xStream anomaly detection model. preprocessor = InstanceUnitNormScaler() # Init normalizer. postprocessor = RunningAveragePostprocessor( window_size=5) # Init running average postprocessor. auroc = AUROCMetric( ) # Init area under receiver-operating- characteristics curve metric. for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])): # Stream data. X = preprocessor.fit_transform_partial( X) # Fit preprocessor to and transform the instance. score = model.fit_score_partial( X) # Fit model to and score the instance. score = postprocessor.fit_transform_partial( score) # Apply running averaging to the score. auroc.update(y, score) # Update AUROC metric. # Output resulting AUROCS metric. print("AUROC: ", auroc.get())
# iterator = PandasStreamer(shuffle=False) iterator = ArrayStreamer(shuffle=False) # model = xStream() # Init xStream anomaly detection model. # model = IForestASD(initial_window_X=df[:4096], window_size=2048) model = KitNet() model.fit(X[:5000]) # need our own preprocessing as hexadecimals dont need zero variance but packet vs checksum length might preprocessor = InstanceUnitNormScaler() # Init normalizer. postprocessor = RunningAveragePostprocessor( window_size=5) # Init running average postprocessor. # Davies-Bouldin Index, Calinski-Harabasz Index, Silhouette Coefficient exist for clustering, none for anomaly detection # wrapped in BaseSKLearnMetric # no metrics I can see, AUC would be perfect if the dataset were labelled # maybe something else? auroc = AUROCMetric( ) # Init area under receiver-operating- characteristics curve metric. for X in tqdm(iterator.iter(X[5000:])): # Stream data. score = model.score_partial(X) print(score) # score = postprocessor.fit_transform_partial(score) # Apply running averaging to the score. # # auroc.update(y, score) # Update AUROC metric. # Output resulting AUROCS metric. # print("AUROC: ", auroc.get())