def do_ad(df, alpha=0.005, max_anoms=0.1, only_last=None, longterm=False, e_value=False, direction='both'):
    """
    This method performs the actual anomaly detection.  Expecting the a dataframe with multiple sensors,
    and a specification of which sensor to use for anomaly detection.

    :param df: a dataframe with a timestamp column and one more columns with telemetry data
    :param column: name of the column on which to perform AD
    :param alpha: see pyculiarity documentation for the meaning of these parameters
    :param max_anoms:
    :param only_last:
    :param longterm:
    :param e_value:
    :param direction:
    :return: a pd.Series containing anomalies.  If not an anomaly, entry will be NaN, otherwise the sensor reading
    """


    results = detect_ts(df,
                        max_anoms=max_anoms,
                        alpha=alpha,
                        direction=direction,
                        e_value=e_value,
                        longterm=longterm,
                        only_last=only_last)

    return results['anoms']['timestamp'].values
Example #2
0
def detect_anomaly():
    twitter_example_data = pd.read_csv('1.csv', usecols=['timestamp', 'count'])
    results = detect_ts(twitter_example_data,
                        max_anoms=0.02,
                        direction='both',
                        only_last='day')
    print results['anoms'].iloc[:, 1]
Example #3
0
def twitter_anomaly_detection(data_path):
    data = get_data(data_path)
    # 异常检测
    data['date'] = data['timestamp'].apply(lambda i: datetime.fromtimestamp(i))
    anomalies = detect_ts(data[['date', 'value']],
                          max_anoms=0.001,
                          direction='both')
    print(anomalies)

    # 时间转换

    plt.plot(pd.to_datetime(data['date']), data['value'], '-')

    # 绘制实际异常散点图
    date = data.loc[data['label'] == 1]['date']
    value = data.loc[data['label'] == 1]['value']
    plt.scatter(pd.to_datetime(date), value, c='b', linewidths=3)

    # 绘制检测结果
    anoms_date = anomalies['anoms']['timestamp']
    plt.plot(
        pd.to_datetime(pd.to_datetime(anoms_date), format="%Y%m%d %H:%M:%S"),
        anomalies['anoms']['anoms'], 'ro')

    plt.grid(True)
    # 保存图片
    plt.savefig(img_path + "twitter_anomaly_detection.png", dpi=1000)
    plt.show()
Example #4
0
 def detect_bw(cell):
     try:
         example_data = df_concat_kpi_bw_T[['timestamp', cell]]
         example_data.loc[:, cell] = example_data[cell].fillna(
             method='ffill').fillna(method='bfill')
         results = detect_ts(example_data,
                             max_anoms=0.09,
                             alpha=0.001,
                             direction='both',
                             only_last=None)
         results['anoms']['cell'] = cell
         return results['anoms'].reset_index(drop=True)
     except:
         example_data = df_concat_kpi_bw_T[['timestamp', cell]]
         example_data.loc[:, cell] = example_data[cell].fillna(
             method='ffill').fillna(method='bfill')
         results = sesd.seasonal_esd(example_data[cell],
                                     periodicity=20,
                                     hybrid=True,
                                     max_anomalies=int(
                                         len(example_data[cell]) * 0.05))
         tmp = example_data.loc[results]
         tmp.columns = ['timestamp', 'anoms']
         tmp['cell'] = cell
         return tmp
Example #5
0
    def test_handling_of_leading_trailing_nas(self):
        for i in range(10) + [len(self.raw_data) - 1]:
            self.raw_data.set_value(i, 'count', np.nan)

        results = detect_ts(self.raw_data, max_anoms=0.02,
                            direction='both', plot=False)
        eq_(len(results['anoms'].columns), 2)
        eq_(len(results['anoms'].iloc[:,1]), 131)
Example #6
0
 def test_both_directions_with_plot(self):
     results = detect_ts(self.raw_data,
                         max_anoms=0.02,
                         direction='both',
                         only_last='day',
                         plot=False)
     eq_(len(results['anoms'].columns), 2)
     eq_(len(results['anoms'].iloc[:, 1]), 25)
Example #7
0
 def test_both_directions_e_value_threshold_med_max(self):
     results = detect_ts(self.raw_data,
                         max_anoms=0.02,
                         direction='both',
                         threshold="med_max",
                         e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 4)
Example #8
0
 def test_both_directions_with_plot(self):
     results = detect_ts(
         self.raw_data,
         max_anoms=0.02,
         direction='both',
         only_last='day',
         plot=False)
     eq_(len(results['anoms'].columns), 2)
     eq_(len(results['anoms'].iloc[:, 1]), 21)
Example #9
0
 def test_both_directions_e_value_threshold_med_max(self):
     results = detect_ts(
         self.raw_data,
         max_anoms=0.02,
         direction='both',
         threshold="med_max",
         e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 4)
Example #10
0
 def test_both_directions_e_value_longterm(self):
     results = detect_ts(self.raw_data,
                         max_anoms=0.02,
                         direction='both',
                         longterm=True,
                         plot=False,
                         e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 131)
Example #11
0
    def test_handling_of_leading_trailing_nas(self):
        for i in list(range(10)) + [len(self.raw_data) - 1]:
            self.raw_data.at[i, 'count'] = np.nan

        results = detect_ts(self.raw_data,
                            max_anoms=0.02,
                            direction='both',
                            plot=False)
        eq_(len(results['anoms'].columns), 2)
        eq_(len(results['anoms'].iloc[:, 1]), 131)
Example #12
0
 def test_both_directions_e_value_longterm(self):
     results = detect_ts(
         self.raw_data,
         max_anoms=0.02,
         direction='both',
         longterm=True,
         plot=False,
         e_value=True)
     eq_(len(results['anoms'].columns), 3)
     eq_(len(results['anoms'].iloc[:, 1]), 114)
Example #13
0
 def predict(self,data=None):
     results = detect_ts(data, max_anoms=self.max_anoms,
                         direction=self.direction, alpha=self.alpha, only_last=self.only_last,
                         threshold=self.threshold, e_value=self.e_value, longterm = self.longterm,
                         piecewise_median_period=self.piecewise_median_period, custom_period=self.custom_period,
                         use_period=self.use_period)
     anoms = results['anoms']
     self.anomaly_idx = anoms.index
     self.anom_val = anoms['anoms']
     return anoms
Example #14
0
    def test_check_midnight_date_format(self):
        data = pd.read_csv(os.path.join(self.path,
                                        'midnight_test_data.csv'),
                           usecols=['date', 'value'])

        data.date = date_format(data.date, "%Y-%m-%d %H:%M:%S")
        results = detect_ts(data, max_anoms=0.2, threshold=None,
                                    direction='both', plot=False,
                                    only_last="day",
                                    e_value=True)
        eq_(len(results['anoms'].anoms), len(results['anoms'].expected_value))
Example #15
0
def filter_outliers(data):
    """ 异常值过滤 """
    from pyculiarity import detect_ts
    results = detect_ts(data,
                        max_anoms=0.10,
                        alpha=1000,
                        direction='both',
                        only_last=None)["anoms"]
    outliers_arr = results["timestamp"].array
    data = data[data["timestamp"].apply(lambda ele: ele not in outliers_arr)]
    data = data.rename(columns={"value": "y", "timestamp": "ds"})
    return data
def detect_anoms(dataframe):
    """
    Run anomaly detection.
    :param dataframe: dataframe with 'timestamp' and 'pred_price' columns
    :return: list of timestamps
    """
    df = dataframe[['timestamp', 'pred_price']].reset_index(drop=True)
    results = detect_ts(df,
                        max_anoms=0.3,
                        alpha=0.001,
                        direction='both',
                        only_last=None,
                        longterm=True,
                        verbose=True,
                        piecewise_median_period_weeks=3)
    return results['anoms']
Example #17
0
 def detect_ts_online(df_smooth, window_size, stop):
     is_anomaly = False
     run_time = 9999
     start_index = max(0, stop - window_size)
     df_win = df_smooth.iloc[start_index:stop, :]
     start_time = time.time()
     results = detect_ts(df_win,
                         alpha=0.05,
                         max_anoms=0.02,
                         only_last=None,
                         longterm=False,
                         e_value=False,
                         direction='both')
     run_time = time.time() - start_time
     if results['anoms'].shape[0] > 0:
         timestamp = df_win['timestamp'].tail(1).values[0]
         if timestamp == results['anoms'].tail(1)['timestamp'].values[0]:
             is_anomaly = True
     return is_anomaly, run_time
Example #18
0
    def detect_outlier_peculiarity(self, frame, yColumn, max_anoms = 0.05, alpha = 0.001, direction='both', printFigure=True):        

        def plotOutliers(data, results, columnName):
            # format the data nicely
            data['timestamp'] = pd.to_datetime(data['timestamp'])
            data.set_index('timestamp', drop=True)

            # make a nice plot
            f, ax = plt.subplots(2, 1, sharex=True)
            ax[0].plot(data['timestamp'], data[columnName], 'b')
            ax[0].plot(results['anoms'].index, results['anoms']['anoms'], 'ro')
            ax[0].set_title('Detected Anomalies')
            ax[1].set_xlabel('Time Stamp')
            ax[0].set_ylabel(columnName)
            ax[1].plot(results['anoms'].index, results['anoms']['anoms'], 'b')
            ax[1].set_ylabel('Anomaly Magnitude')
            figTitle = columnName + " - Outliers using TwitterDetector"
            plt.savefig(figTitle+".png")
            plt.show()

        frame = frame.copy()
        
        frame['timestamp'] = (frame['date'] - datetime(1970,1,1)).dt.total_seconds()

        twoColumnsFrame = frame[['timestamp', yColumn]]

        #s = twoColumnsFrame.set_index('date')[yColumn]
        #results = detts.anomaly_detect_ts(s, max_anoms=0.05, alpha=0.001, direction='both')        
        try:
            results = detect_ts(twoColumnsFrame, max_anoms=0.05, alpha=0.001, direction='both')        
        except Exception as e:
            return []

        if (printFigure):
            plotOutliers(twoColumnsFrame, results, yColumn)                

        return results
Example #19
0
def get_anomolies(data):
    return detect_ts(data, max_anoms=0.01, alpha=0.01, direction='pos', only_last=None, longterm=True)
from datetime import time
import numpy as np

plt.style.use('ggplot')

__author__ = 'Raj Shanmuganathan'

if __name__ == '__main__':
    rawdata = pd.read_csv('/Users/rshanm200/Workbench/Anamoly_detection/data/newrawdata1.csv', usecols=['datetime','online'])
    rawdata['timestamp'] = pd.to_datetime(rawdata['datetime'],format='%Y-%m-%d %H:%M:%S')
    rawdata['timestamp'] = rawdata['timestamp'].astype(np.int64) // 10**9
    rawdata['value'] = rawdata['online'].apply(lambda x: 0 if pd.isna(x) else x)
    rawdata = rawdata.drop(['datetime','online'],axis=1)
    print(rawdata)

    results = detect_ts(rawdata, max_anoms=0.01, alpha=0.05, direction='both',piecewise_median_period_weeks=10,granularity='hr')
    print(results)
    
    # format the twitter data nicely
    results['timestamp'] = pd.to_datetime(rawdata['timestamp'])
    rawdata.set_index('timestamp', drop=True)

    # make a nice plot
    f, ax = plt.subplots(2, 1, sharex=True)
    ax[0].plot(rawdata['timestamp'], rawdata['value'], 'b')
    ax[0].plot(results['anoms'].index, results['anoms']['anoms'], 'ro')
    ax[0].set_title('Detected Anomalies')
    ax[1].set_xlabel('Time Stamp')
    ax[0].set_ylabel('Count')
    ax[1].plot(results['anoms'].index, results['anoms']['anoms'], 'b')
    ax[1].set_ylabel('Anomaly Magnitude')
Example #21
0
def run(data, window=14 * 24):
    """

    :param data:
    :param window:
    :return:
    """

    # set some parameters for the AD algorithm
    alpha = 0.1
    max_anoms = 0.05
    only_last = None  # alternative, we can set this to 'hr' or 'day'

    data = pd.read_json(json.loads(data)['data'])
    # return json.dumps(data.columns)

    sensors = ['volt','pressure','vibration', 'rotate']  # list(data.columns[2:])

    # load dataframe
    df = load_df(data)

    # add current sensor readings to data frame, also adds fields for anomaly detection results
    df = append_data(df, data, sensors)

    # calculate running averages
    running_avgs(df, sensors)

    # note timestamp  so that we can update the correct row of the dataframe later
    timestamp = data['timestamp'].values[0]

    # we get a copy of the current (also last) row of the dataframe
    current_row = df.loc[df['timestamp'] == timestamp, :]

    
    # determine how many sensor readings we already have
    rows = df.shape[0]

    # if the data frame doesn't have enough rows for our sliding window size, we just return (setting that we have no
    #  anomalies)
    if rows < window:
        save_df(df)
        json_data = current_row.to_json()
        
        return json.dumps(json_data)

    
    # determine the first row of the data frame that falls into the sliding window
    start_row = rows - window

    # a flag to indicate whether we detected an anomaly in any of the sensors after this reading
    detected_an_anomaly = False

    # we loop over the sensor columns
    for column in sensors:
        df_s = df.ix[start_row:rows, ('timestamp', column + "_avg")]

        # pyculiarity expects two columns with particular names
        df_s.columns = ['timestamp', 'value']

        # we reset the timestamps, so that the current measurement is the last within the sliding time window
        # df_s = reset_time(df_s)

        # calculate the median value within each time sliding window
        # values = df_s.groupby(df_s.index.date)['value'].median()

        # create dataframe with median values etc.
        # df_agg = pd.DataFrame(data={'timestamp': pd.to_datetime(values.index), 'value': values})

        # find anomalies
        results = detect_ts(df_s, max_anoms=max_anoms,
                            alpha=alpha,
                            direction='both',
                            e_value=False,
                            only_last=only_last)

        # create a data frame where we mark for each day whether it was an anomaly
        df_s = df_s.merge(results['anoms'], on='timestamp', how='left')

        # mark the current sensor reading as anomaly Specifically, if we get an anomaly in the the sliding window
        # leading up (including) the current sensor reading, we mark the current sensor reading as anomaly note,
        # alternatively one could mark all the sensor readings that fall within the sliding window as anomalies.
        # However, we prefer our approach, because without the current sensor reading the other sensor readings in
        # this sliding window may not have been an anomaly
        # current_row[column + '_an'] = not np.isnan(df_agg.tail(1)['anoms'].iloc[0])
        if not np.isnan(df_s.tail(1)['anoms'].iloc[0]):
            current_row.ix[0,column + '_an'] = True
            detected_an_anomaly = True

    # It's only necessary to update the current row in the data frame, if we detected an anomaly
    if detected_an_anomaly:
        df.loc[df['timestamp'] == timestamp, :] = current_row
    save_df(df)
    
    json_data = current_row.to_json()
    
    return json.dumps(json_data)
Example #22
0
def run(rawdata, window=14 * 24):
    """

    :param data:
    :param window:
    :return:
    """

    try:
        # set some parameters for the AD algorithm
        alpha = 0.1
        max_anoms = 0.05
        only_last = None  # alternative, we can set this to 'hr' or 'day'
        
        json_data = json.loads(rawdata)['data']

        # this is the beginning of anomaly detection code
        # TODO: the anomaly detection service expected one row of a pd.DataFrame w/ a timestamp and machine id, but here we only get a list of values
        # we therefore create a time stamp ourselves
        # and create a data frame that the anomaly detection code can understand
        # eventually, we want this to be harmonized!
        timestamp = time.strftime("%m/%d/%Y %H:%M:%S", time.localtime())
        machineID = 1 # TODO scipy.random.choice(100)
        telemetry_data = json_data[0][8:16:2]
        sensors = ['volt','pressure','vibration', 'rotate']
        
        data_dict = {}
        data_dict['timestamp'] = [timestamp]
        data_dict['machineID'] = [machineID]
        
        for i in range(0,4):
            data_dict[sensors[i]] = [telemetry_data[i]]
            
        telemetry_df = pd.DataFrame(data=data_dict)
        telemetry_df['timestamp'] = pd.to_datetime(telemetry_df['timestamp'])
    
        # load dataframe
        df = load_df(telemetry_df)
        
        # add current sensor readings to data frame, also adds fields for anomaly detection results
        df = append_data(df, telemetry_df, sensors)
        
        # # calculate running averages (no need to do this here, because we are already sending preprocessed data)
        # # TODO: this is disabled for now, because we are dealing with pre-processed data
        # running_avgs(df, sensors, only_copy=True)
        
        # note timestamp  so that we can update the correct row of the dataframe later
        timestamp = df['timestamp'].max()
        
        # we get a copy of the current (also last) row of the dataframe
        current_row = df.loc[df['timestamp'] == timestamp, :]
    
        
        # determine how many sensor readings we already have
        rows = df.shape[0]
        
        # if the data frame doesn't have enough rows for our sliding window size, we just return (setting that we have no
        #  anomalies)
        if rows < window:
            save_df(df)
            json_data = current_row.to_json()
            
            return json.dumps({"result": [0]})

        # determine the first row of the data frame that falls into the sliding window
        start_row = rows - window
    
        # a flag to indicate whether we detected an anomaly in any of the sensors after this reading
        detected_an_anomaly = False
    
        anom_list = []
        # we loop over the sensor columns
        for column in sensors:
            df_s = df.ix[start_row:rows, ('timestamp', column + "_avg")]
        
            # pyculiarity expects two columns with particular names
            df_s.columns = ['timestamp', 'value']

            # we reset the timestamps, so that the current measurement is the last within the sliding time window
            # df_s = reset_time(df_s)
        
            # calculate the median value within each time sliding window
            # values = df_s.groupby(df_s.index.date)['value'].median()
        
            # create dataframe with median values etc.
            # df_agg = pd.DataFrame(data={'timestamp': pd.to_datetime(values.index), 'value': values})
        
            # find anomalies
            results = detect_ts(df_s, max_anoms=max_anoms,
                                alpha=alpha,
                                direction='both',
                                e_value=False,
                                only_last=only_last)

            # create a data frame where we mark for each day whether it was an anomaly
            df_s = df_s.merge(results['anoms'], on='timestamp', how='left')
        
            # mark the current sensor reading as anomaly Specifically, if we get an anomaly in the the sliding window
            # leading up (including) the current sensor reading, we mark the current sensor reading as anomaly note,
            # alternatively one could mark all the sensor readings that fall within the sliding window as anomalies.
            # However, we prefer our approach, because without the current sensor reading the other sensor readings in
            # this sliding window may not have been an anomaly
            # current_row[column + '_an'] = not np.isnan(df_agg.tail(1)['anoms'].iloc[0])
            if not np.isnan(df_s.tail(1)['anoms'].iloc[0]):
                current_row.ix[0,column + '_an'] = True
                detected_an_anomaly = True
                anom_list.append(1.0)
            else:   
                anom_list.append(0.0)

        # It's only necessary to update the current row in the data frame, if we detected an anomaly
        if detected_an_anomaly:
            df.loc[df['timestamp'] == timestamp, :] = current_row
        save_df(df)

        json_data[0][8:16:2] = anom_list
    
        # # this is the end of anomaly detection code
        
        data = np.array(json_data)
        result = model.predict(data)
        prediction_dc.collect(result)
        print ("saving prediction data" + time.strftime("%H:%M:%S"))
    except Exception as e:
        result = str(e)
        return json.dumps({"error": result})

    return json.dumps({"result":result.tolist()})
Example #23
0
import pandas as pd
import matplotlib
import datetime

matplotlib.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    # first run the models
    example_data = pd.read_csv('db_test_data.csv',
                               usecols=['time_stamp', 'temp'])

    results = detect_ts(example_data,
                        max_anoms=0.05,
                        alpha=0.001,
                        granularity='day',
                        direction='both')

    # format the twitter data nicely
    example_data['time_stamp'] = pd.to_datetime(example_data['time_stamp'])
    example_data.set_index('time_stamp', drop=True)

    # make a nice plot
    f, ax = plt.subplots(2, 1, sharex=True)
    ax[0].plot(example_data['time_stamp'], example_data['temp'], 'b')
    ax[0].plot(results['anoms'].index, results['anoms']['anoms'], 'ro')
    ax[0].set_title('Detected Anomalies')
    ax[1].set_xlabel('Time Stamp')
    ax[0].set_ylabel('Count')
    ax[1].plot(results['anoms'].index, results['anoms']['anoms'], 'b')
Example #24
0
matplotlib.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    # first run the models
    twitter_example_data = pd.read_csv('../tests/raw_data.csv',
                                       usecols=['timestamp', 'count'])
    print(twitter_example_data['timestamp'].values[:10])
    twitter_example_data['timestamp'] = twitter_example_data['timestamp'].map(
        lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp(
        ))
    print(twitter_example_data['timestamp'].values[:10])
    results = detect_ts(twitter_example_data,
                        max_anoms=0.05,
                        alpha=0.001,
                        direction='both',
                        verbose=True)
    print(results['anoms']['timestamp'].values[:10])

    # format the twitter data nicely
    twitter_example_data['timestamp'] = pd.to_datetime(
        twitter_example_data['timestamp'])
    twitter_example_data.set_index('timestamp', drop=True)

    twitter_example_data.to_csv('raw.csv', index=False)
    results['anoms'].to_csv('results.csv', index=False)

    # make a nice plot
    f, ax = plt.subplots(2, 1, sharex=True)
    ax[0].plot(twitter_example_data['timestamp'],
Example #25
0
from pyculiarity import detect_ts
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import datetime

matplotlib.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    # first run the models
    example_data = pd.read_csv('db_test_data.csv', usecols=['time_stamp', 'temp'])

    results = detect_ts(example_data, max_anoms=0.05, alpha=0.001, granularity='day', direction='both')

    # format the twitter data nicely
    example_data['time_stamp'] = pd.to_datetime(example_data['time_stamp'])
    example_data.set_index('time_stamp', drop=True)

    # make a nice plot
    f, ax = plt.subplots(2, 1, sharex=True)
    ax[0].plot(example_data['time_stamp'], example_data['temp'], 'b')
    ax[0].plot(results['anoms'].index, results['anoms']['anoms'], 'ro')
    ax[0].set_title('Detected Anomalies')
    ax[1].set_xlabel('Time Stamp')
    ax[0].set_ylabel('Count')
    ax[1].plot(results['anoms'].index, results['anoms']['anoms'], 'b')
    ax[1].set_ylabel('Anomaly Magnitude')
    plt.show()
Example #26
0
 def test_handling_of_middle_nas(self):
     self.raw_data.at[len(self.raw_data) / 2, 'count'] = np.nan
     detect_ts(self.raw_data, max_anoms=0.02, direction='both')
Example #27
0
 def test_handling_of_middle_nas(self):
     self.raw_data.set_value(len(self.raw_data) / 2, 'count', np.nan)
     detect_ts(self.raw_data, max_anoms=0.02, direction='both')
Example #28
0
 def test_handling_of_middle_nas(self):
     self.raw_data.set_value(len(self.raw_data) / 2, 'count', np.nan)
     detect_ts(self.raw_data, max_anoms=0.02, direction='both')
Example #29
0
import sys
import os
curPath = os.path.abspath(os.path.dirname(__file__))
rootPath = os.path.split(curPath)[0]
sys.path.append(rootPath)

from pyculiarity import detect_ts
import pandas as pd
twitter_example_data = pd.read_csv(
    '/Users/mac/IdeaProjects/AIOps/src/python/pyculiarity/raw_data.csv',
    usecols=['timestamp', 'count'])
results = detect_ts(twitter_example_data,
                    max_anoms=0.02,
                    direction='both',
                    only_last='day')
print(str(results['anoms']['anoms']))
resultfile = open(
    "/Users/mac/IdeaProjects/AIOps/src/python/pyculiarity/result.csv", 'a')
resultfile.write(str(results['anoms']['anoms']))
Example #30
0
import matplotlib.pyplot as plt
import pandas as pd
import matplotlib
import datetime

matplotlib.style.use('ggplot')

__author__ = 'willmcginnis'

if __name__ == '__main__':
    # first run the models
    twitter_example_data = pd.read_csv('../tests/raw_data.csv', usecols=['timestamp', 'count'])
    print(twitter_example_data['timestamp'].values[:10])
    twitter_example_data['timestamp'] = twitter_example_data['timestamp'].map(lambda x: datetime.datetime.strptime(x, '%Y-%m-%d %H:%M:%S').timestamp())
    print(twitter_example_data['timestamp'].values[:10])
    results = detect_ts(twitter_example_data, max_anoms=0.05, alpha=0.001, direction='both', verbose=True)
    print(results['anoms']['timestamp'].values[:10])

    # format the twitter data nicely
    twitter_example_data['timestamp'] = pd.to_datetime(twitter_example_data['timestamp'])
    twitter_example_data.set_index('timestamp', drop=True)

    twitter_example_data.to_csv('raw.csv', index=False)
    results['anoms'].to_csv('results.csv', index=False)

    # make a nice plot
    f, ax = plt.subplots(2, 1, sharex=True)
    ax[0].plot(twitter_example_data['timestamp'], twitter_example_data['count'], 'b')
    ax[0].plot(results['anoms'].index, results['anoms']['anoms'], 'ro')
    ax[0].set_title('Detected Anomalies')
    ax[1].set_xlabel('Time Stamp')