Ejemplo n.º 1
0
def test_timestamp_api():
    host = '127.0.0.1'
    user = '******'
    password = '******'
    algorithm = ['luminol']
    rng = np.random.RandomState(42)
    np.random.seed(42)
    conn, cursor = connect_server(host, user, password)
    ground_truth_whole = insert_demo_data(conn,
                                          cursor,
                                          'db',
                                          't',
                                          ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=True,
                                    ground_truth_flag=True)
    for alg in algorithm:
        clf = algorithm_selection(alg, random_state=rng, contamination=0.1)
        print('Start processing:')
        start_time = time.clock()
        clf.fit(data)
        prediction_result = clf.predict(data)
        outlierness = clf.decision_function(data)
        output_performance(alg, ground_truth, prediction_result,
                           time.clock() - start_time, outlierness)
    conn.close()
Ejemplo n.º 2
0
def test_static_api():
    host = '127.0.0.1'
    user = '******'
    password = '******'
    algorithm = [
        'iforest', 'ocsvm', 'lof', 'robustcovariance', 'cblof', 'knn', 'hbos',
        'sod', 'pca', 'dagmm', 'autoencoder', 'lstm_ad', 'lstm_ed',
        'staticautoencoder'
    ]
    rng = np.random.RandomState(42)
    np.random.seed(42)
    conn, cursor = connect_server(host, user, password)
    ground_truth_whole = insert_demo_data(conn,
                                          cursor,
                                          'db',
                                          't',
                                          ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=False,
                                    ground_truth_flag=True)

    for alg in algorithm:
        clf = algorithm_selection(alg, random_state=rng, contamination=0.1)
        print('Start processing:')
        start_time = time.clock()
        clf.fit(data)
        prediction_result = clf.predict(data)
        outlierness = clf.decision_function(data)
        output_performance(alg, ground_truth, prediction_result,
                           time.clock() - start_time, outlierness)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time=None,
                                    end_time=None,
                                    time_serie=False,
                                    ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time=None,
                                    end_time=None,
                                    time_serie=False,
                                    ground_truth_flag=False)

    conn.close()
Ejemplo n.º 3
0
def test_io_static():
    host = '127.0.0.1'
    user = '******'
    password = '******'
    alg = 'iforest'

    rng = np.random.RandomState(42)
    np.random.seed(42)
    conn, cursor = connect_server(host, user, password)
    ground_truth_whole = insert_demo_data(conn,
                                          cursor,
                                          'db',
                                          't',
                                          ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=False,
                                    ground_truth_flag=True)

    clf = algorithm_selection(alg, random_state=rng, contamination=0.1)
    print('Start processing:')
    start_time = time.clock()
    clf.fit(data)
    prediction_result = clf.predict(data)
    outlierness = clf.decision_function(data)
    output_performance(alg, ground_truth, prediction_result,
                       time.clock() - start_time, outlierness)

    visualize_distribution_static(data, prediction_result, outlierness)
    visualize_distribution(data, prediction_result, outlierness)
    visualize_outlierscore(outlierness, prediction_result, contamination=0.1)

    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time=None,
                                    end_time='2019-08-20 00:00:00',
                                    time_serie=False,
                                    ground_truth_flag=True)
    data, ground_truth = query_data(conn,
                                    cursor,
                                    'db',
                                    't',
                                    time_serie_name='ts',
                                    ground_truth=ground_truth_whole,
                                    start_time='2019-07-20 00:00:00',
                                    end_time=None,
                                    time_serie=False,
                                    ground_truth_flag=True)
Ejemplo n.º 4
0
    if args.ground_truth:
        alg_selector = Cash(data, ground_truth)
        clf = alg_selector.model_selector(max_evals=2)
    else:
        clf = algorithm_selection(args.algorithm,
                                  random_state=rng,
                                  contamination=args.contamination)

    start_time = time.clock()
    clf.fit(data)
    prediction_result = clf.predict(data)
    outlierness = clf.decision_function(data)
    anomaly_scores = clf.anomaly_likelihood(data)

    if args.ground_truth:
        output_performance(args.algorithm, ground_truth, prediction_result,
                           time.clock() - start_time, anomaly_scores)

    if args.visualize_distribution and args.ground_truth:
        if not args.time_stamp:
            visualize_distribution_static(data, prediction_result, outlierness,
                                          args.saving_path)
            visualize_distribution(data, prediction_result, outlierness,
                                   args.saving_path)
            visualize_outlierscore(outlierness, prediction_result,
                                   args.contamination, args.saving_path)
        else:
            visualize_distribution_time_serie(clf.ts, data, args.saving_path)
            visualize_outlierscore(outlierness, prediction_result,
                                   args.contamination, args.saving_path)

    conn.close()
Ejemplo n.º 5
0
    print('Loading cost: %.6f seconds' % (time.clock() - start_time))
    print('Load data successful')

    #algorithm

    clf = algorithm_selection(args.algorithm,
                              random_state=rng,
                              contamination=args.contamination)
    print('Start processing:')
    start_time = time.clock()
    clf.fit(data)
    prediction_result = clf.predict(data)
    outlierness = clf.decision_function(data)

    if args.ground_truth:
        output_performance(args.algorithm, ground_truth, prediction_result,
                           time.clock() - start_time, outlierness)

    if args.visualize_distribution and args.ground_truth:
        if not args.time_stamp:
            visualize_distribution_static(data, prediction_result, outlierness,
                                          args.saving_path)
            visualize_distribution(data, prediction_result, outlierness,
                                   args.saving_path)
            visualize_outlierscore(outlierness, prediction_result,
                                   args.contamination)
        else:
            visualize_distribution_time_serie(clf.ts, data, args.saving_path)
            visualize_outlierscore(outlierness, prediction_result,
                                   args.contamination, args.saving_path)

    conn.close()