def test_timestamp_api(): host = '127.0.0.1' user = '******' password = '******' algorithm = ['luminol'] rng = np.random.RandomState(42) np.random.seed(42) conn, cursor = connect_server(host, user, password) ground_truth_whole = insert_demo_data(conn, cursor, 'db', 't', ground_truth_flag=True) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time='2019-07-20 00:00:00', end_time='2019-08-20 00:00:00', time_serie=True, ground_truth_flag=True) for alg in algorithm: clf = algorithm_selection(alg, random_state=rng, contamination=0.1) print('Start processing:') start_time = time.clock() clf.fit(data) prediction_result = clf.predict(data) outlierness = clf.decision_function(data) output_performance(alg, ground_truth, prediction_result, time.clock() - start_time, outlierness) conn.close()
def test_static_api(): host = '127.0.0.1' user = '******' password = '******' algorithm = [ 'iforest', 'ocsvm', 'lof', 'robustcovariance', 'cblof', 'knn', 'hbos', 'sod', 'pca', 'dagmm', 'autoencoder', 'lstm_ad', 'lstm_ed', 'staticautoencoder' ] rng = np.random.RandomState(42) np.random.seed(42) conn, cursor = connect_server(host, user, password) ground_truth_whole = insert_demo_data(conn, cursor, 'db', 't', ground_truth_flag=True) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time='2019-07-20 00:00:00', end_time='2019-08-20 00:00:00', time_serie=False, ground_truth_flag=True) for alg in algorithm: clf = algorithm_selection(alg, random_state=rng, contamination=0.1) print('Start processing:') start_time = time.clock() clf.fit(data) prediction_result = clf.predict(data) outlierness = clf.decision_function(data) output_performance(alg, ground_truth, prediction_result, time.clock() - start_time, outlierness) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time=None, end_time=None, time_serie=False, ground_truth_flag=True) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time=None, end_time=None, time_serie=False, ground_truth_flag=False) conn.close()
def test_io_static(): host = '127.0.0.1' user = '******' password = '******' alg = 'iforest' rng = np.random.RandomState(42) np.random.seed(42) conn, cursor = connect_server(host, user, password) ground_truth_whole = insert_demo_data(conn, cursor, 'db', 't', ground_truth_flag=True) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time='2019-07-20 00:00:00', end_time='2019-08-20 00:00:00', time_serie=False, ground_truth_flag=True) clf = algorithm_selection(alg, random_state=rng, contamination=0.1) print('Start processing:') start_time = time.clock() clf.fit(data) prediction_result = clf.predict(data) outlierness = clf.decision_function(data) output_performance(alg, ground_truth, prediction_result, time.clock() - start_time, outlierness) visualize_distribution_static(data, prediction_result, outlierness) visualize_distribution(data, prediction_result, outlierness) visualize_outlierscore(outlierness, prediction_result, contamination=0.1) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time=None, end_time='2019-08-20 00:00:00', time_serie=False, ground_truth_flag=True) data, ground_truth = query_data(conn, cursor, 'db', 't', time_serie_name='ts', ground_truth=ground_truth_whole, start_time='2019-07-20 00:00:00', end_time=None, time_serie=False, ground_truth_flag=True)
args.start_time, args.end_time, args.time_serie_name, time_serie=args.time_stamp, ground_truth_flag=args.ground_truth) print('Loading cost: %.6f seconds' % (time.clock() - start_time)) print('Load data successful') #Algorithm Selection if args.ground_truth: alg_selector = Cash(data, ground_truth) clf = alg_selector.model_selector(max_evals=2) else: clf = algorithm_selection(args.algorithm, random_state=rng, contamination=args.contamination) start_time = time.clock() clf.fit(data) prediction_result = clf.predict(data) outlierness = clf.decision_function(data) anomaly_scores = clf.anomaly_likelihood(data) if args.ground_truth: output_performance(args.algorithm, ground_truth, prediction_result, time.clock() - start_time, anomaly_scores) if args.visualize_distribution and args.ground_truth: if not args.time_stamp: visualize_distribution_static(data, prediction_result, outlierness,
]) from IPython.display import display, HTML #display(df) return df training_features = extract_features(training_FileList) from pyodds.utils.importAlgorithm import algorithm_selection from pyodds.utils import utilities from pyodds.utils.plotUtils import visualize_distribution_static, visualize_distribution_time_serie, visualize_outlierscore, visualize_distribution #import ipdb; ipdb.set_trace() clf = algorithm_selection('lof', 1, 0.3) clf.fit(training_features) #import pandas as pd print("Extracting test_features") test_features = [] #import ipdb; ipdb.set_trace() for filepath in test_FileList: if os.path.basename(filepath) in file_name_col: label_index = np.where( file_name_col == os.path.basename(filepath))[0][0] gtruth = labels[label_index][1].astype(int) test_features = extract_features([filepath]) prediction_result = clf.predict(test_features) outlierness_score = clf.decision_function(test_features) print(gtruth, prediction_result)
def __init__(self, features): self.clf = algorithm_selection('lof', 1, 0.3) self.clf.fit(features) self.test_features = []