Beispiel #1
0
def test_timestamp_api():
    host = '127.0.0.1'
    user = '******'
    password = '******'
    algorithm = ['luminol']
    rng = np.random.RandomState(42)
    np.random.seed(42)
    conn, cursor = connect_server(host, user, password)
    ground_truth_whole = insert_demo_data(conn,
                                          cursor,
                                          'db',
                                          't',
                                          ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=True,
                                    ground_truth_flag=True)
    for alg in algorithm:
        clf = algorithm_selection(alg, random_state=rng, contamination=0.1)
        print('Start processing:')
        start_time = time.clock()
        clf.fit(data)
        prediction_result = clf.predict(data)
        outlierness = clf.decision_function(data)
        output_performance(alg, ground_truth, prediction_result,
                           time.clock() - start_time, outlierness)
    conn.close()
Beispiel #2
0
def test_static_api():
    host = '127.0.0.1'
    user = '******'
    password = '******'
    algorithm = [
        'iforest', 'ocsvm', 'lof', 'robustcovariance', 'cblof', 'knn', 'hbos',
        'sod', 'pca', 'dagmm', 'autoencoder', 'lstm_ad', 'lstm_ed',
        'staticautoencoder'
    ]
    rng = np.random.RandomState(42)
    np.random.seed(42)
    conn, cursor = connect_server(host, user, password)
    ground_truth_whole = insert_demo_data(conn,
                                          cursor,
                                          'db',
                                          't',
                                          ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=False,
                                    ground_truth_flag=True)

    for alg in algorithm:
        clf = algorithm_selection(alg, random_state=rng, contamination=0.1)
        print('Start processing:')
        start_time = time.clock()
        clf.fit(data)
        prediction_result = clf.predict(data)
        outlierness = clf.decision_function(data)
        output_performance(alg, ground_truth, prediction_result,
                           time.clock() - start_time, outlierness)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time=None,
                                    end_time=None,
                                    time_serie=False,
                                    ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time=None,
                                    end_time=None,
                                    time_serie=False,
                                    ground_truth_flag=False)

    conn.close()
Beispiel #3
0
def test_io_static():
    host = '127.0.0.1'
    user = '******'
    password = '******'
    alg = 'iforest'

    rng = np.random.RandomState(42)
    np.random.seed(42)
    conn, cursor = connect_server(host, user, password)
    ground_truth_whole = insert_demo_data(conn,
                                          cursor,
                                          'db',
                                          't',
                                          ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=False,
                                    ground_truth_flag=True)

    clf = algorithm_selection(alg, random_state=rng, contamination=0.1)
    print('Start processing:')
    start_time = time.clock()
    clf.fit(data)
    prediction_result = clf.predict(data)
    outlierness = clf.decision_function(data)
    output_performance(alg, ground_truth, prediction_result,
                       time.clock() - start_time, outlierness)

    visualize_distribution_static(data, prediction_result, outlierness)
    visualize_distribution(data, prediction_result, outlierness)
    visualize_outlierscore(outlierness, prediction_result, contamination=0.1)

    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time=None,
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=False,
                                    ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time=None,
                                    time_serie=False,
                                    ground_truth_flag=True)
Beispiel #4
0
                          args.start_time,
                          args.end_time,
                          args.time_serie_name,
                          time_serie=args.time_stamp,
                          ground_truth_flag=args.ground_truth)

    print('Loading cost: %.6f seconds' % (time.clock() - start_time))
    print('Load data successful')

    #Algorithm Selection
    if args.ground_truth:
        alg_selector = Cash(data, ground_truth)
        clf = alg_selector.model_selector(max_evals=2)
    else:
        clf = algorithm_selection(args.algorithm,
                                  random_state=rng,
                                  contamination=args.contamination)

    start_time = time.clock()
    clf.fit(data)
    prediction_result = clf.predict(data)
    outlierness = clf.decision_function(data)
    anomaly_scores = clf.anomaly_likelihood(data)

    if args.ground_truth:
        output_performance(args.algorithm, ground_truth, prediction_result,
                           time.clock() - start_time, anomaly_scores)

    if args.visualize_distribution and args.ground_truth:
        if not args.time_stamp:
            visualize_distribution_static(data, prediction_result, outlierness,
                      ])

    from IPython.display import display, HTML
    #display(df)

    return df


training_features = extract_features(training_FileList)

from pyodds.utils.importAlgorithm import algorithm_selection
from pyodds.utils import utilities
from pyodds.utils.plotUtils import visualize_distribution_static, visualize_distribution_time_serie, visualize_outlierscore, visualize_distribution
#import ipdb; ipdb.set_trace()

clf = algorithm_selection('lof', 1, 0.3)
clf.fit(training_features)
#import pandas as pd

print("Extracting test_features")
test_features = []
#import ipdb; ipdb.set_trace()
for filepath in test_FileList:
    if os.path.basename(filepath) in file_name_col:
        label_index = np.where(
            file_name_col == os.path.basename(filepath))[0][0]
        gtruth = labels[label_index][1].astype(int)
        test_features = extract_features([filepath])
        prediction_result = clf.predict(test_features)
        outlierness_score = clf.decision_function(test_features)
        print(gtruth, prediction_result)
	def __init__(self, features):
		self.clf = algorithm_selection('lof', 1, 0.3)
		self.clf.fit(features)
		self.test_features = []