예제 #1
0
def get_anom(magnetic, axis):
    '''
    Axis anomaly detection helper function

    :param magnetic: (dataframe) dataframe of magnetic data (series: X, Y, Z)
    :param axis: One of the three axes ('X', 'Y', 'Z')

    :return: Data frame containing timestamps, values for the anomalies in that axis.
    '''
    print("Detecting anomalies for", axis, "axis", end='')
    start = process_time()

    # preprocessing data
    df = magnetic[['Date', axis]]
    df.columns = ["timestamp", "value"]

    # using pyculiarity to detect anomalies
    # TODO: mess around with maximum_anomalies and alpha to improve resulting plots
    eq_anom = pyc.detect_ts(df,
                            maximum_anomalies=0.025,
                            direction='pos',
                            alpha=0.05)

    print(" --- took", round(process_time() - start, 2), " s")
    return eq_anom['anoms']
예제 #2
0
    def transform(self, X):
        """
        This will run the pyculiarity anomaly detection routine on all columns of a dataset. First it is coerced into a
        pandas DataFrame if it isn't already one, then if there is a specified timestamp index or index col, that is set
        as the index. Otherwise a naive integer is used.


        :param X:
        :return:
        """

        if not isinstance(X, DataFrame):
            X = DataFrame(X)

        if self.datetimestr_col is not None:
            X[self.datetimestr_col] = to_datetime(X[self.datetimestr_col])
            X.rename(columns={self.datetimestr_col: '_index'}, inplace=True)
        elif self.index_col is not None:
            X.rename(columns={self.index_col: '_index'}, inplace=True)
        else:
            X['_index'] = X.index.values

        for col in X.columns.values:
            if col is not '_index':
                df_col = X.reindex(columns=['_index', col])
                out = detect_ts(df_col, max_anoms=self.max_anoms, alpha=self.alpha, direction=self.direction, only_last=None)
                X[col] = 0
                X.loc[X['_index'].isin(out['anoms']['timestamp'].values), col] = 1

        if self.datetimestr_col is not None:
            X.rename(columns={'_index': self.datetimestr_col}, inplace=True)
        elif self.index_col is not None:
            X.rename(columns={'_index': self.index_col}, inplace=True)
        else:
            X.drop(labels=['_index'], inplace=True)

        return X
예제 #3
0
 def test_both_directions_e_value_threshold_med_max(self):
     results = detect_ts(self.raw_data, maximum_anomalies=0.02, direction='both', threshold="med_max", e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 4)
예제 #4
0
 def test_both_directions_e_value_longterm(self):
     results = detect_ts(self.raw_data, maximum_anomalies=0.02, direction='both', long_term=True, plot=False,
                         e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 131)
예제 #5
0
 def test_both_directions_with_plot(self):
     results = detect_ts(self.raw_data, maximum_anomalies=0.02, direction='both', only_last='day', plot=False)
     eq_(len(results['anoms'].columns), 2)
     eq_(len(results['anoms'].iloc[:, 1]), 25)
예제 #6
0
     Rosner, B., (May 1983), "Percentage Points for a Generalized ESD
     Many-Outlier Procedure" , Technometrics, 25(2), pp. 165-172.

'''

# First prepare data from truncated series
my_df = pd.DataFrame({'timestamp':ts.values, 'observation':ts.index})

results = detect_ts(df=my_df,
                  max_anoms=0.1,
                  direction="pos",
                  alpha=0.05,
                  only_last=None,
                  threshold=None,
                  e_value=False,
                  longterm=False,
                  piecewise_median_period_weeks=2,
                  plot=False,
                  y_log=False,
                  xlabel=XLABEL,
                  ylabel=YLABEL,
                  title='Google Trends Data - Twitter + IQR Method',
                  verbose=False)

plt.title(KEYWORD + ' - Google Trends Data - Twitter + GES')
#plt.subtitle('United States search volume')
plt.xlabel(XLABEL)
plt.tick_params(axis='x', rotation=-45)
plt.ylabel(YLABEL)
plt.tight_layout()
plt.autoscale()