예제 #1
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_lambda_values(X_n140_outliers) -> None:
    """
    Test to ensure results are returned which correspond to what is expected
    when varying the extent parameter (we expect larger extent values to
    result in more constrained scores).
    :param X_n140_outliers: A pytest Fixture that generates 140 observations.
    :return: None
    """
    # Fit the model with different extent (lambda) values
    clf1 = loop.LocalOutlierProbability(X_n140_outliers,
                                        extent=1,
                                        use_numba=NUMBA)
    clf2 = loop.LocalOutlierProbability(X_n140_outliers,
                                        extent=2,
                                        use_numba=NUMBA)
    clf3 = loop.LocalOutlierProbability(X_n140_outliers,
                                        extent=3,
                                        use_numba=NUMBA)

    # predict scores (the lower, the more normal)
    score1 = clf1.fit().local_outlier_probabilities
    score2 = clf2.fit().local_outlier_probabilities
    score3 = clf3.fit().local_outlier_probabilities

    # Get the mean of all the scores
    score_mean1 = np.mean(score1)
    score_mean2 = np.mean(score2)
    score_mean3 = np.mean(score3)

    # check that expected the means align with expectation
    assert_greater(score_mean1, score_mean2)
    assert_greater(score_mean2, score_mean3)
예제 #2
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_stream_performance(X_n140_outliers) -> None:
    """
    Test to ensure that the streaming approach works as desired when using
    a regular set of input data (no distance and neighbor matrices) and that
    the result is within some expected level of error when compared to the
    classical approach.
    :param X_n140_outliers: A pytest Fixture that generates 140 observations.
    :return:
    """
    X_train = X_n140_outliers[0:100]
    X_test = X_n140_outliers[100:140]

    # Fit the models in standard and stream form
    m = loop.LocalOutlierProbability(X_n140_outliers, use_numba=NUMBA).fit()
    scores_noclust = m.local_outlier_probabilities

    m_train = loop.LocalOutlierProbability(X_train, use_numba=NUMBA)
    m_train.fit()
    X_train_scores = m_train.local_outlier_probabilities

    X_test_scores = []
    for idx in range(X_test.shape[0]):
        X_test_scores.append(m_train.stream(X_test[idx]))
    X_test_scores = np.array(X_test_scores)

    stream_scores = np.hstack((X_train_scores, X_test_scores))

    # calculate the rmse and ensure score is below threshold
    rmse = np.sqrt(((scores_noclust - stream_scores)**2).mean(axis=None))
    assert_greater(0.35, rmse)
예제 #3
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_loop(X_n8) -> None:
    """
    Tests the basic functionality and asserts that the anomalous observations
    are detected as anomalies. Tests the functionality using inputs
    as Numpy arrays and as Pandas dataframes.
    :param X_n8: A pytest Fixture that generates the 8 observations.
    :return: None
    """
    # Test LocalOutlierProbability:
    clf = loop.LocalOutlierProbability(X_n8, n_neighbors=5, use_numba=NUMBA)
    score = clf.fit().local_outlier_probabilities
    share_outlier = 2. / 8.
    predictions = [-1 if s > share_outlier else 1 for s in score]
    assert_array_equal(predictions, 6 * [1] + 2 * [-1])

    # Assert smallest outlier score is greater than largest inlier score:
    assert_greater(np.min(score[-2:]), np.max(score[:-2]))

    # Test the DataFrame functionality
    X_df = pd.DataFrame(X_n8)

    # Test LocalOutlierProbability:
    clf = loop.LocalOutlierProbability(X_df, n_neighbors=5, use_numba=NUMBA)
    score = clf.fit().local_outlier_probabilities
    share_outlier = 2. / 8.
    predictions = [-1 if s > share_outlier else 1 for s in score]
    assert_array_equal(predictions, 6 * [1] + 2 * [-1])

    # Assert smallest outlier score is greater than largest inlier score:
    assert_greater(np.min(score[-2:]), np.max(score[:-2]))
예제 #4
0
def test_lambda_values():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X, X_outliers]

    # Fit the model with different extent (lambda) values
    clf1 = loop.LocalOutlierProbability(X_test, extent=1)
    clf2 = loop.LocalOutlierProbability(X_test, extent=2)
    clf3 = loop.LocalOutlierProbability(X_test, extent=3)

    # predict scores (the lower, the more normal)
    score1 = clf1.fit().local_outlier_probabilities
    score2 = clf2.fit().local_outlier_probabilities
    score3 = clf3.fit().local_outlier_probabilities

    # Get the mean of all the scores
    score_mean1 = np.mean(score1)
    score_mean2 = np.mean(score2)
    score_mean3 = np.mean(score3)

    # check that expected the means align with expectation
    assert_greater(score_mean1, score_mean2)
    assert_greater(score_mean2, score_mean3)
예제 #5
0
def test_stream_performance():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X = np.r_[X, X_outliers]

    X_train = X[0:100]
    X_test = X[100:140]

    # Fit the models in standard and stream form
    m = loop.LocalOutlierProbability(X).fit()
    scores_noclust = m.local_outlier_probabilities

    m_train = loop.LocalOutlierProbability(X_train)
    m_train.fit()
    X_train_scores = m_train.local_outlier_probabilities

    X_test_scores = []
    for idx in range(X_test.shape[0]):
        X_test_scores.append(m_train.stream(X_test[idx]))
    X_test_scores = np.array(X_test_scores)

    stream_scores = np.hstack((X_train_scores, X_test_scores))

    # calculate the rmse and ensure score is below threshold
    rmse = np.sqrt(((scores_noclust - stream_scores)**2).mean(axis=None))
    assert_greater(0.35, rmse)
예제 #6
0
def test_stream_distance(X_n140_outliers) -> None:

    X_train = X_n140_outliers[0:100]
    X_test = X_n140_outliers[100:140]

    # generate distance and neighbor indices
    neigh = NearestNeighbors(metric='euclidean')
    neigh.fit(X_train)
    d, idx = neigh.kneighbors(X_train, n_neighbors=10, return_distance=True)

    # Fit the models in standard and distance matrix form
    m = loop.LocalOutlierProbability(X_train).fit()
    m_dist = loop.LocalOutlierProbability(distance_matrix=d,
                                          neighbor_matrix=idx).fit()

    # Collect the scores
    X_test_scores = []
    for i in range(X_test.shape[0]):
        X_test_scores.append(m.stream(np.array(X_test[i])))
    X_test_scores = np.array(X_test_scores)

    X_test_dist_scores = []
    for i in range(X_test.shape[0]):
        dd, ii = neigh.kneighbors(np.array([X_test[i]]), return_distance=True)
        X_test_dist_scores.append(m_dist.stream(np.mean(dd)))
    X_test_dist_scores = np.array(X_test_dist_scores)

    # calculate the rmse and ensure score is below threshold
    rmse = np.sqrt(((X_test_scores - X_test_dist_scores)**2).mean(axis=None))
    assert_greater(0.075, rmse)
예제 #7
0
def test_n_neighbors():
    X = iris.data
    clf = loop.LocalOutlierProbability(X, n_neighbors=500).fit()
    assert_equal(clf.n_neighbors, X.shape[0] - 1)

    clf = loop.LocalOutlierProbability(X, n_neighbors=500)
    assert_warns(UserWarning, clf.fit)
    assert_equal(clf.n_neighbors, X.shape[0] - 1)
예제 #8
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_n_neighbors() -> None:
    """
    Tests the functionality of providing a large number of neighbors that
    is greater than the number of observations (software defaults to the
    data input size and provides a UserWarning).
    :return: None
    """
    X = iris.data
    clf = loop.LocalOutlierProbability(X, n_neighbors=500,
                                       use_numba=NUMBA).fit()
    assert_equal(clf.n_neighbors, X.shape[0] - 1)

    clf = loop.LocalOutlierProbability(X, n_neighbors=500, use_numba=NUMBA)
    assert_warns(UserWarning, clf.fit)
    assert_equal(clf.n_neighbors, X.shape[0] - 1)
예제 #9
0
def test_loop_dist_matrix(X_n120) -> None:

    # generate distance and neighbor indices
    neigh = NearestNeighbors(metric='euclidean')
    neigh.fit(X_n120)
    d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True)

    # fit loop using data and distance matrix
    clf1 = loop.LocalOutlierProbability(X_n120)
    clf2 = loop.LocalOutlierProbability(distance_matrix=d, neighbor_matrix=idx)
    scores1 = clf1.fit().local_outlier_probabilities
    scores2 = clf2.fit().local_outlier_probabilities

    # compare the agreement between the results
    assert_almost_equal(scores1, scores2, decimal=1)
예제 #10
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_small_cluster_size(X_n140_outliers) -> None:
    """
    Test to ensure that the program exits when the specified number of neighbors
    is larger than the smallest cluster size in the input data.
    :param X_n140_outliers: A pytest Fixture that generates 140 observations.
    :return: None
    """
    # Generate cluster labels
    a = [0] * 120
    b = [1] * 18
    cluster_labels = a + b

    clf = loop.LocalOutlierProbability(X_n140_outliers,
                                       n_neighbors=50,
                                       cluster_labels=cluster_labels,
                                       use_numba=NUMBA)

    with pytest.raises(SystemExit) as record_a, pytest.warns(
            UserWarning) as record_b:
        clf.fit()

    assert record_a.type == SystemExit

    # check that only one warning was raised
    assert len(record_b) == 1
    # check that the message matches
    assert record_b[0].message.args[
               0] == "Number of neighbors specified larger than smallest " \
                     "cluster. Specify a number of neighbors smaller than " \
                     "the smallest cluster size (observations in smallest " \
                     "cluster minus one)."
예제 #11
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_loop_performance(X_n120) -> None:
    """
    Using a set of known anomalies (labels), tests the performance (using
    ROC / AUC score) of the software and ensures it is able to capture most
    anomalies under this basic scenario.
    :param X_n120: A pytest Fixture that generates the 120 observations.
    :return: None
    """
    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X_test = np.r_[X_n120, X_outliers]
    X_labels = np.r_[np.repeat(1, X_n120.shape[0]),
                     np.repeat(-1, X_outliers.shape[0])]

    # fit the model
    clf = loop.LocalOutlierProbability(
        X_test,
        n_neighbors=X_test.shape[0] - 1,
        # test the progress bar
        progress_bar=True,
        use_numba=NUMBA)

    # predict scores (the lower, the more normal)
    score = clf.fit().local_outlier_probabilities
    share_outlier = X_outliers.shape[0] / X_test.shape[0]
    X_pred = [-1 if s > share_outlier else 1 for s in score]

    # check that roc_auc is good
    assert_greater(roc_auc_score(X_pred, X_labels), .98)
예제 #12
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_input_too_many(X_n120) -> None:
    """
    Test to ensure that the proper warning is issued if both a data matrix
    and a distance matrix are provided (can only be data matrix).
    :param X_n120: A pytest Fixture that generates 120 observations.
    :return: None
    """
    # generate distance and neighbor indices
    neigh = NearestNeighbors(metric='euclidean')
    neigh.fit(X_n120)
    d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True)

    with pytest.warns(UserWarning) as record:
        # attempt to fit loop with data and a distance matrix
        loop.LocalOutlierProbability(X_n120,
                                     distance_matrix=d,
                                     neighbor_matrix=idx,
                                     use_numba=NUMBA)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "Only one of the following may be provided: data or a " \
                     "distance matrix (not both)."
예제 #13
0
def test_input_neighbor_mismatch():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # generate distance and neighbor indices
    neigh = NearestNeighbors(n_neighbors=5, metric='euclidean')
    neigh.fit(X)
    d, idx = neigh.kneighbors(X, return_distance=True)

    with pytest.warns(UserWarning) as record:
        warnings.warn(
            "The shape of the distance or "
            "neighbor index matrix does not "
            "match the number of neighbors "
            "specified.", UserWarning)

    # attempt to fit loop with only a distance matrix and no neighbor matrix
    loop.LocalOutlierProbability(distance_matrix=d)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "The shape of the distance or " \
                     "neighbor index matrix does not " \
                     "match the number of neighbors " \
                     "specified."
예제 #14
0
def test_input_shape_mismatch():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # generate distance and neighbor indices
    neigh = NearestNeighbors(n_neighbors=10, metric='euclidean')
    neigh.fit(X)
    d, idx = neigh.kneighbors(X, return_distance=True)

    # generate distance and neighbor indices of a different shape
    neigh_2 = NearestNeighbors(n_neighbors=5, metric='euclidean')
    neigh_2.fit(X)
    d_2, idx_2 = neigh.kneighbors(X, return_distance=True)

    with pytest.warns(UserWarning) as record:
        warnings.warn(
            "The shape of the distance and neighbor "
            "index matrices must match.", UserWarning)

    # attempt to fit loop with only a distance matrix and no neighbor matrix
    loop.LocalOutlierProbability(distance_matrix=d, neighbor_matrix=idx_2)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "The shape of the distance and neighbor " \
                     "index matrices must match."
예제 #15
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_distance_neighbor_shape_mismatch(X_n120) -> None:
    """
    Test to ensure that the proper warning is issued if there is a mismatch
    between the shape of the provided distance and neighbor matrices.
    :param X_n120: A pytest Fixture that generates 120 observations.
    :return: None
    """
    # generate distance and neighbor indices
    neigh = NearestNeighbors(metric='euclidean')
    neigh.fit(X_n120)
    d, idx = neigh.kneighbors(X_n120, n_neighbors=10, return_distance=True)

    # generate distance and neighbor indices of a different shape
    neigh_2 = NearestNeighbors(metric='euclidean')
    neigh_2.fit(X_n120)
    d_2, idx_2 = neigh.kneighbors(X_n120, n_neighbors=5, return_distance=True)

    with pytest.warns(UserWarning) as record:
        # attempt to fit loop with a mismatch in shapes
        loop.LocalOutlierProbability(distance_matrix=d,
                                     neighbor_matrix=idx_2,
                                     n_neighbors=5,
                                     use_numba=NUMBA)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "The shape of the distance and neighbor " \
                     "index matrices must match."
예제 #16
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_input_neighbor_mismatch(X_n120) -> None:
    """
    Test to ensure that the proper warning is issued if the supplied distance
    (and neighbor) matrix and specified number of neighbors do not match.
    :param X_n120: A pytest Fixture that generates 120 observations.
    :return: None
    """
    # generate distance and neighbor indices
    neigh = NearestNeighbors(metric='euclidean')
    neigh.fit(X_n120)
    d, idx = neigh.kneighbors(X_n120, n_neighbors=5, return_distance=True)

    with pytest.warns(UserWarning) as record:
        # attempt to fit loop with a neighbor size mismatch
        loop.LocalOutlierProbability(distance_matrix=d,
                                     neighbor_matrix=idx,
                                     n_neighbors=10,
                                     use_numba=NUMBA)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "The shape of the distance or " \
                     "neighbor index matrix does not " \
                     "match the number of neighbors " \
                     "specified."
예제 #17
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_stream_cluster(X_n140_outliers) -> None:
    """
    Test to ensure that the proper warning is issued if the streaming approach
    is called on clustered data, as the streaming approach does not support
    this functionality.
    :param X_n140_outliers: A pytest Fixture that generates 140 observations.
    :return: None
    """
    # Generate cluster labels
    a = [0] * 120
    b = [1] * 18
    cluster_labels = a + b

    # Fit the model
    X_train = X_n140_outliers[0:138]
    X_test = X_n140_outliers[139]
    clf = loop.LocalOutlierProbability(X_train,
                                       cluster_labels=cluster_labels,
                                       use_numba=NUMBA).fit()

    with pytest.warns(UserWarning) as record:
        clf.stream(X_test)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "Stream approach does not support clustered data. " \
                     "Automatically refit using single cluster of points."
예제 #18
0
def test_stream_fit():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X = np.r_[X, X_outliers]

    # Fit the model
    X_train = X[0:138]
    X_test = X[139]
    clf = loop.LocalOutlierProbability(X_train)

    with pytest.warns(UserWarning) as record:
        warnings.warn(
            "Must fit on historical data by calling fit() prior to "
            "calling stream(x).", UserWarning)

    clf.stream(X_test)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
               0] == "Must fit on historical data by calling fit() prior to " \
                     "calling stream(x)."
예제 #19
0
def get_TOS_loop(X, y, k_list, feature_list):
    # only compatible with pandas
    df_X = pd.DataFrame(X)

    result_loop = np.zeros([X.shape[0], len(k_list)])
    roc_loop = []
    prec_loop = []

    for i in range(len(k_list)):
        k = k_list[i]
        clf = loop.LocalOutlierProbability(df_X, n_neighbors=k).fit()
        score_pred = clf.local_outlier_probabilities.astype(float)

        roc = np.round(roc_auc_score(y, score_pred), decimals=4)
        # apc = np.round(average_precision_score(y, score_pred), decimals=4)
        prec_n = np.round(get_precn(y, score_pred), decimals=4)

        print('LoOP @ {k} - ROC: {roc} Precision@n: {pren}'.format(
            k=k, roc=roc, pren=prec_n))

        feature_list.append('loop_' + str(k))
        roc_loop.append(roc)
        prec_loop.append(prec_n)
        result_loop[:, i] = score_pred
    print()
    return feature_list, roc_loop, prec_loop, result_loop
예제 #20
0
def test_small_cluster_size():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X = np.r_[X, X_outliers]
    # Generate cluster labels
    a = [0] * 120
    b = [1] * 18
    cluster_labels = a + b

    with pytest.warns(UserWarning) as record:
        warnings.warn(
            "Number of neighbors specified larger than smallest cluster. Specify a number of neighbors smaller than the smallest cluster size (observations in smallest cluster minus one).",
            UserWarning)

    loop.LocalOutlierProbability(X,
                                 n_neighbors=50,
                                 cluster_labels=cluster_labels)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
        0] == "Number of neighbors specified larger than smallest cluster. Specify a number of neighbors smaller than the smallest cluster size (observations in smallest cluster minus one)."
예제 #21
0
def test_small_cluster_size(X_n140_outliers) -> None:

    # Generate cluster labels
    a = [0] * 120
    b = [1] * 18
    cluster_labels = a + b

    clf = loop.LocalOutlierProbability(X_n140_outliers,
                                       n_neighbors=50,
                                       cluster_labels=cluster_labels)

    with pytest.raises(SystemExit) as record_a, pytest.warns(
            UserWarning) as record_b:
        clf.fit()

    assert record_a.type == SystemExit

    # check that only one warning was raised
    assert len(record_b) == 1
    # check that the message matches
    assert record_b[0].message.args[
               0] == "Number of neighbors specified larger than smallest " \
                     "cluster. Specify a number of neighbors smaller than " \
                     "the smallest cluster size (observations in smallest " \
                     "cluster minus one)."
예제 #22
0
def get_anomalies_by_LOop(graph, k_nn, threshold):
    graph_matrix = nx.to_numpy_matrix(graph)
    neigh = NearestNeighbors(n_neighbors=k_nn, metric='hamming')
    neigh.fit(graph_matrix)
    d, idx = neigh.kneighbors(graph_matrix, return_distance=True)
    m = loop.LocalOutlierProbability(distance_matrix=d,
                                     neighbor_matrix=idx,
                                     n_neighbors=k_nn).fit()
    scores = m.local_outlier_probabilities
    nodes = []
    anomalies = []
    ragular = []
    for i, node in enumerate(graph.nodes):
        nodes.append(node)
        if scores[i] > threshold:
            anomalies.append(node)
        else:
            ragular.append(node)

    anomalies_edges = []
    for i in range(len(anomalies)):
        for j in range(len(anomalies)):
            if i != j and anomalies[j] in nx.all_neighbors(graph, anomalies[i]) \
                    and not (anomalies[j], anomalies[i]) in anomalies_edges:
                anomalies_edges.append((anomalies[i], anomalies[j]))
    return anomalies_edges
예제 #23
0
def test_stream_cluster():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # Generate some abnormal novel observations
    X_outliers = rng.uniform(low=-4, high=4, size=(20, 2))
    X = np.r_[X, X_outliers]

    # Generate cluster labels
    a = [0] * 120
    b = [1] * 18
    cluster_labels = a + b

    # Fit the model
    X_train = X[0:138]
    X_test = X[139]
    clf = loop.LocalOutlierProbability(X_train,
                                       cluster_labels=cluster_labels).fit()

    with pytest.warns(UserWarning) as record:
        warnings.warn(
            "Stream approach does not support clustered data. Automatically refit using single cluster of points.",
            UserWarning)

    clf.stream(X_test)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
        0] == "Stream approach does not support clustered data. Automatically refit using single cluster of points."
예제 #24
0
def loOP(S, n_neighbours):
    X = np.array(S)
    m = loop.LocalOutlierProbability(X, extent=0.95,
                                     n_neighbors=n_neighbours).fit()
    scores = m.local_outlier_probabilities
    for i in scores:
        print(i)
    return scores
예제 #25
0
def test_missing_values():
    X = np.array([1.3, 1.1, 0.9, 1.4, 1.5, np.nan, 3.2])
    clf = loop.LocalOutlierProbability(X, n_neighbors=3)

    with pytest.raises(SystemExit) as record:
        clf.fit()

    assert record.type == SystemExit
예제 #26
0
def test_loop_dist_matrix():
    # Generate train/test data
    rng = check_random_state(2)
    X = 0.3 * rng.randn(120, 2)

    # generate distance and neighbor indices
    neigh = NearestNeighbors(n_neighbors=10, metric='euclidean')
    neigh.fit(X)
    d, idx = neigh.kneighbors(X, return_distance=True)

    # fit loop using data and distance matrix
    clf1 = loop.LocalOutlierProbability(X)
    clf2 = loop.LocalOutlierProbability(distance_matrix=d, neighbor_matrix=idx)
    scores1 = clf1.fit().local_outlier_probabilities
    scores2 = clf2.fit().local_outlier_probabilities

    # compare the agreement between the results
    assert_almost_equal(scores1, scores2, decimal=1)
예제 #27
0
def loOP(train, extent = 2, n = 20):
    from PyNomaly import loop
    ## input: train data 
    ## output: train column + 1 (LocalOutlierProbability)
    prob = loop.LocalOutlierProbability(train, extent=extent, 
                                        n_neighbors=n).fit()
    scores = prob.local_outlier_probabilities.reshape(train.shape[0],1)
        
    return scores
예제 #28
0
파일: test_loop.py 프로젝트: P1ZZ4/PyNomaly
def test_data_format() -> None:
    """
    Test to ensure that a UserWarning is issued when the shape of the input
    data is not explicitly correct. This is corrected by the software when
    possible.
    :return: None
    """
    X = [1.3, 1.1, 0.9, 1.4, 1.5, 3.2]
    clf = loop.LocalOutlierProbability(X, n_neighbors=3, use_numba=NUMBA)
    assert_warns(UserWarning, clf.fit)
예제 #29
0
def test_lambda_values(X_n140_outliers) -> None:

    # Fit the model with different extent (lambda) values
    clf1 = loop.LocalOutlierProbability(X_n140_outliers, extent=1)
    clf2 = loop.LocalOutlierProbability(X_n140_outliers, extent=2)
    clf3 = loop.LocalOutlierProbability(X_n140_outliers, extent=3)

    # predict scores (the lower, the more normal)
    score1 = clf1.fit().local_outlier_probabilities
    score2 = clf2.fit().local_outlier_probabilities
    score3 = clf3.fit().local_outlier_probabilities

    # Get the mean of all the scores
    score_mean1 = np.mean(score1)
    score_mean2 = np.mean(score2)
    score_mean3 = np.mean(score3)

    # check that expected the means align with expectation
    assert_greater(score_mean1, score_mean2)
    assert_greater(score_mean2, score_mean3)
예제 #30
0
def test_input_nodata(X_n140_outliers) -> None:

    with pytest.warns(UserWarning) as record:
        # attempt to fit loop without data or a distance matrix
        loop.LocalOutlierProbability(n_neighbors=X_n140_outliers.shape[0] - 1)

    # check that only one warning was raised
    assert len(record) == 1
    # check that the message matches
    assert record[0].message.args[
        0] == "Data or a distance matrix must be provided."