コード例 #1
0
def test_reference_window(test_path):
    from sklearn.utils import shuffle
    from pysad.models.integrations import ReferenceWindowModel
    from pysad.utils import Data
    from pysad.evaluation import AUROCMetric
    from pysad.utils import ArrayStreamer
    import os
    from pyod.models.iforest import IForest

    data = Data(os.path.join(test_path,"../../examples/data"))

    X_all, y_all = data.get_data("arrhythmia.mat")
    X_all, y_all = shuffle(X_all, y_all)

    model = ReferenceWindowModel(model_cls=IForest, window_size=240, sliding_size=30,
                                 initial_window_X=X_all[:100])

    iterator = ArrayStreamer(shuffle=False)

    auroc = AUROCMetric()

    y_pred = []
    for X, y in iterator.iter(X_all[100:], y_all[100:]):
        model.fit_partial(X)
        score = model.score_partial(X)

        y_pred.append(score)

        auroc.update(y, score)

    print("AUROC: ", auroc.get())
コード例 #2
0
def test_one_fit(test_path):
    from sklearn.utils import shuffle
    from pysad.utils import Data
    from pysad.evaluation import AUROCMetric
    from pysad.utils import ArrayStreamer
    import os
    from pyod.models.iforest import IForest
    from pysad.models.integrations.one_fit_model import OneFitModel

    data = Data(os.path.join(test_path, "../../examples/data"))

    X_all, y_all = data.get_data("arrhythmia.mat")
    print(X_all, y_all)
    X_all, y_all = shuffle(X_all, y_all)

    model = OneFitModel(model_cls=IForest, initial_X=X_all[:100])

    iterator = ArrayStreamer(shuffle=False)

    auroc = AUROCMetric()

    y_pred = []
    for X, y in iterator.iter(X_all[100:], y_all[100:]):
        model.fit_partial(X)
        score = model.score_partial(X)

        y_pred.append(score)

        auroc.update(y, score)

    print("AUROC: ", auroc.get())
コード例 #3
0
def ensembler_usage_example():
    np.random.seed(61)  # Fix random seed.

    data = Data("data")
    X_all, y_all = data.get_data("arrhythmia.mat")  # Load Aryhytmia data.
    X_all, y_all = shuffle(X_all, y_all)  # Shuffle data.
    iterator = ArrayStreamer(
        shuffle=False)  # Create streamer to simulate streaming data.
    auroc = AUROCMetric(
    )  # Tracker of area under receiver-operating- characteristics curve metric.

    # Models to be ensembled.
    models = [xStream(), LODA()]
    ensembler = AverageScoreEnsembler()  # Ensembler module.

    for X, y in tqdm(iterator.iter(X_all, y_all)):  # Iterate over examples.
        model_scores = np.empty(len(models), dtype=np.float64)

        # Fit & Score via for each model.
        for i, model in enumerate(models):
            model.fit_partial(X)
            model_scores[i] = model.score_partial(X)

        score = ensembler.fit_transform_partial(
            model_scores)  # Fit to ensembler model and get ensembled score.

        auroc.update(y, score)  # Update AUROC metric.

    # Output score.
    print("AUROC: {}.".format(auroc.get()))
コード例 #4
0
def full_usage_example():
    np.random.seed(61)  # Fix random seed.

    # Get data to stream.
    data = Data("data")
    X_all, y_all = data.get_data("arrhythmia.mat")
    X_all, y_all = shuffle(X_all, y_all)

    iterator = ArrayStreamer(
        shuffle=False)  # Init streamer to simulate streaming data.

    model = xStream()  # Init xStream anomaly detection model.
    preprocessor = InstanceUnitNormScaler()  # Init normalizer.
    postprocessor = RunningAveragePostprocessor(
        window_size=5)  # Init running average postprocessor.
    auroc = AUROCMetric(
    )  # Init area under receiver-operating- characteristics curve metric.

    for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):  # Stream data.
        X = preprocessor.fit_transform_partial(
            X)  # Fit preprocessor to and transform the instance.

        score = model.fit_score_partial(
            X)  # Fit model to and score the instance.
        score = postprocessor.fit_transform_partial(
            score)  # Apply running averaging to the score.

        auroc.update(y, score)  # Update AUROC metric.

    # Output resulting AUROCS metric.
    print("AUROC: {}.".format(auroc.get()))
コード例 #5
0
def PyOD_integration_example():
    np.random.seed(61)  # Fix seed.

    # Get data to stream.
    data = Data("data")
    X_all, y_all = data.get_data("arrhythmia.mat")
    X_all, y_all = shuffle(X_all, y_all)
    iterator = ArrayStreamer(shuffle=False)

    # Fit reference window integration to first 100 instances initially.
    model = ReferenceWindowModel(model_cls=IForest,
                                 window_size=240,
                                 sliding_size=30,
                                 initial_window_X=X_all[:100])

    auroc = AUROCMetric(
    )  # Init area under receiver-operating-characteristics curve metric tracker.

    for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):

        model.fit_partial(X)  # Fit to the instance.
        score = model.score_partial(X)  # Score the instance.

        auroc.update(y, score)  # Update the metric.

    # Output AUROC metric.
    print("AUROC: {}.".format(auroc.get()))
コード例 #6
0
# Import modules.
from pysad.evaluation import AUROCMetric
from pysad.models import LODA
from pysad.utils import Data

model = LODA()  # Init model
metric = AUROCMetric(
)  # Init area under receiver-operating- characteristics curve metric
streaming_data = Data().get_iterator("arrhythmia.mat")  # Get data streamer.

for x, y_true in streaming_data:  # Stream data.
    anomaly_score = model.fit_score_partial(
        x)  # Fit the instance to model and score the instance.

    metric.update(y_true, anomaly_score)  # Update the AUROC metric.

# Output the resulting AUROCMetric.
print(f"Area under ROC metric is {metric.get()}.")
コード例 #7
0
if __name__ == "__main__":
    np.random.seed(61)  # Fix random seed.

    # Get data to stream.
    data = Data("data")
    X_all, y_all = data.get_data("arrhythmia.mat")
    X_all, y_all = shuffle(X_all, y_all)

    iterator = ArrayStreamer(
        shuffle=False)  # Init streamer to simulate streaming data.

    model = xStream()  # Init xStream anomaly detection model.
    preprocessor = InstanceUnitNormScaler()  # Init normalizer.
    postprocessor = RunningAveragePostprocessor(
        window_size=5)  # Init running average postprocessor.
    auroc = AUROCMetric(
    )  # Init area under receiver-operating- characteristics curve metric.

    for X, y in tqdm(iterator.iter(X_all[100:], y_all[100:])):  # Stream data.
        X = preprocessor.fit_transform_partial(
            X)  # Fit preprocessor to and transform the instance.

        score = model.fit_score_partial(
            X)  # Fit model to and score the instance.
        score = postprocessor.fit_transform_partial(
            score)  # Apply running averaging to the score.

        auroc.update(y, score)  # Update AUROC metric.

    # Output resulting AUROCS metric.
    print("AUROC: ", auroc.get())
コード例 #8
0
    # iterator = PandasStreamer(shuffle=False)
    iterator = ArrayStreamer(shuffle=False)

    # model = xStream()  # Init xStream anomaly detection model.

    # model = IForestASD(initial_window_X=df[:4096], window_size=2048)
    model = KitNet()
    model.fit(X[:5000])

    # need our own preprocessing as hexadecimals dont need zero variance but packet vs checksum length might
    preprocessor = InstanceUnitNormScaler()  # Init normalizer.
    postprocessor = RunningAveragePostprocessor(
        window_size=5)  # Init running average postprocessor.

    # Davies-Bouldin Index, Calinski-Harabasz Index, Silhouette Coefficient exist for clustering, none for anomaly detection
    # wrapped in BaseSKLearnMetric
    # no metrics I can see, AUC would be perfect if the dataset were labelled
    # maybe something else?
    auroc = AUROCMetric(
    )  # Init area under receiver-operating- characteristics curve metric.

    for X in tqdm(iterator.iter(X[5000:])):  # Stream data.

        score = model.score_partial(X)
        print(score)
        # score = postprocessor.fit_transform_partial(score)  # Apply running averaging to the score.
        #
        # auroc.update(y, score)  # Update AUROC metric.

    # Output resulting AUROCS metric.
    # print("AUROC: ", auroc.get())