Example #1
0
    def test__compute_perc_cutoff(self):
        X = self._gen_data()

        z = np.arange(1, 101).astype(int)
        cutoff = 10
        perc = 100 - cutoff + 1
        tof = TOF().fit(X)

        tof.outlier_score_ = z
        perc_cutoff = tof._compute_perc_cutoff(cutoff).astype(int)
        self.assertTrue(perc_cutoff == perc)
Example #2
0
    def test__find_nearest_neighbors(self):
        X = self._gen_data()
        tof = TOF().fit(X)
        tof._find_nearest_neighbors(X)

        tof = TOF().fit(X)
        tof._find_nearest_neighbors(X, k=7)
Example #3
0
 def test__compute_tof(self):
     X = self._gen_data()
     nn_ids = np.array([[1, 3], [0, 2], [3, 2], [0, 1]])
     ids = np.arange(4).reshape([4, 1])
     score = np.mean((nn_ids - ids)**2, axis=1)**(1 / 2)
     calcscore = TOF().fit(X)._compute_tof(nn_ids, ids)
     is_eq = np.all(score == calcscore)
     self.assertTrue(is_eq)
Example #4
0
def detect_outlier(
    time_series,
    cutoff_n=1.0,
    k=None,
    in_percent=False,
    embedding_dimension=3,
    embedding_delay=1,
    **other_method_kwargs
):
    """Detects outliers with TOF

    :param pandas.DataFrame time_series: pandas dataframe with the time series
    :param float cutoff_n: the threshold for the detector
                            (max event length, or % of #datapoints)
    :param int k: numbert of neighbors to use (default is embedding)dimension+1)
    :param bool in_percent: if True then the threshold is draw at the given percentage not in event length
    :param int embedding_dimension: embedding dimension value (>=1) [default: 3]
    :param int embedding_delay: embedding delay (>=1) [default: 1]
    :return: result DataFrame
    :rtype: pandas.DataFrame
    """

    # Conversion to numpy array
    np_time_series = time_series.values[:, 0]

    # Time series embedding, and new time axis
    embededd_time_series = TimeDelayEmbedder(
        d=embedding_dimension, tau=embedding_delay
    ).fit_transform(np_time_series)
    new_time_axis = TransformYTrue(
        d=embedding_dimension, tau=embedding_delay
    ).fit_transform(time_series.index)
    new_time_axis = pd.DataFrame(new_time_axis).values

    # initialize method object
    mytof = TOF(cutoff_n=cutoff_n, k=k, **other_method_kwargs)
    mytof = mytof.fit(embededd_time_series)
    if in_percent:
        mytof.cutoff_ = mytof._compute_perc_cutoff(cutoff_n)
    y_pred = mytof.predict(embededd_time_series)

    # locally scoring outlierness for each time series points
    outlier_score = mytof.outlier_score_

    res_df = _make_result_df(
        new_time_axis, outlier_score, y_pred, inv_it=True, prefix="TOF"
    )
    return pd.concat([time_series, res_df], axis=1, sort=False)
Example #5
0
from uniqed.models.tof import TOF
from uniqed.data.gen_logmap import generate_logmapdata
from uniqed.transformers.transformers import TimeDelayEmbedder
import matplotlib.pyplot as plt

# Generate some data
data = generate_logmapdata(rseed=231)
x = data['value'].values
t = data.index.values

# Time delay embedding of the time series
X = TimeDelayEmbedder().fit_transform(x)
T = TimeDelayEmbedder().fit_transform(t)

# Initialize TOF instance and find the anomaly
mytof = TOF(cutoff_n=100)
y = mytof.fit_predict(X)
tof_score = 1. / mytof.outlier_score_

# Plot the results
plt.figure()
plt.subplot(211)
plt.plot(data)
plt.legend(['time series', 'anomaly'], loc='upper left')
plt.ylabel('values')
plt.xlim(0, 2000)

plt.subplot(212)
plt.scatter(T[:, 0], tof_score, c=y)
plt.ylabel("TOF score")
plt.xlabel("t")
Example #6
0
 def test__compute_outlier_score(self):
     X = self._gen_data()
     TOF().fit(X)._compute_outlier_score(X)
Example #7
0
 def test__compute_p_value(self):
     x = np.arange(100)
     p = np.arange(0.01, 1.01, 0.01)
     p_calculated = TOF()._compute_p_value(x)
     is_equal = np.round(p, 2) == np.round(p_calculated, 2)
     self.assertTrue(np.all(is_equal))
Example #8
0
 def test__compute_cutoff2(self):
     X = self._gen_data()
     tof = TOF(k=21).fit(X)
     tof._compute_cutoff(cutoff_n=100)
Example #9
0
 def test__compute_cutoff(self):
     X = self._gen_data()
     tof = TOF().fit(X)
     tof._compute_cutoff(cutoff_n=100)
     with self.assertRaises(ValueError):
         tof._compute_cutoff(cutoff_n='goosebump')
Example #10
0
 def test__get_outliers_inds(self):
     X = self._gen_data()
     TOF()._get_outliers_inds(TOF().fit(X).predict(X))
Example #11
0
 def test_predict(self):
     X = self._gen_data()
     TOF().fit(X).predict(X)
     TOF(cutoff_n=70).fit(X).predict(X)
Example #12
0
 def test_fit(self):
     X = self._gen_data()
     TOF().fit(X)