def analysis():
    """ A simple API endpoint to compare data from two sensors
        Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname
    """

    if 'wotkit_token' in session:

        a = request.args.get('a')
        b = request.args.get('b')
        hours = int(request.args.get('hours'))

        if (a and b and hours):

            msph = 3600000  #milliseconds per hour
            result = defaultdict(dict)

            sensoraDataSeries = WotKitDataToSeries(
                WoTKitgetSensorData(a, msph * hours))
            sensorbDataSeries = WotKitDataToSeries(
                WoTKitgetSensorData(b, msph * hours))

            # Labels object
            result['labels'] = [ ` i ` + "h" for i in range(1, hours)]

            # Sensor A object
            sensoraDailyMeans = sensoraDataSeries.resample('H', how='mean')
            result['a']['mean'] = SeriesToList(sensoraDailyMeans)
            result['a']['rolling_mean'] = SeriesToList(
                pd.rolling_mean(sensoraDailyMeans, 5))
            result['a']['rolling_stdev'] = SeriesToList(
                pd.rolling_std(sensoraDailyMeans, 5))
            result['a']['rolling_skewness'] = SeriesToList(
                pd.rolling_skew(sensoraDailyMeans, 5))
            result['a']['rolling_kurtosis'] = SeriesToList(
                pd.rolling_kurt(sensoraDailyMeans, 5))

            #Sensor B object
            sensorbDailyMeans = sensorbDataSeries.resample('H', how='mean')
            result['b']['mean'] = SeriesToList(sensorbDailyMeans)
            result['b']['rolling_mean'] = SeriesToList(
                pd.rolling_mean(sensorbDailyMeans, 5))
            result['b']['rolling_stdev'] = SeriesToList(
                pd.rolling_std(sensorbDailyMeans, 5))
            result['b']['rolling_skewness'] = SeriesToList(
                pd.rolling_skew(sensorbDailyMeans, 5))
            result['b']['rolling_kurtosis'] = SeriesToList(
                pd.rolling_kurt(sensorbDailyMeans, 5))

            #Comparison object
            result['comparison']['correlation'] = SeriesToList(
                pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5))
            result['comparison']['covariance'] = SeriesToList(
                pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5))

            json_response = json.dumps(result)

            return Response(json_response, content_type='application/json')
Example #2
0
def plot_rolling_functions(series, window_size=128):
    pd.rolling_median(series,window_size).plot(label='median')
    pd.rolling_mean(series,window_size).plot(label='mean')
    pd.rolling_std(series,window_size).plot(label='std')
    pd.rolling_skew(series,window_size).plot(label='skew')
    pd.rolling_kurt(series,window_size).plot(label='kurt')
    pd.rolling_min(series,window_size).plot(label='min')
    pd.rolling_max(series,window_size).plot(label='max')
    plt.title('Various rolling window functions, window size %s' % (window_size))
    plt.legend()
    plt.show()
Example #3
0
def Calc(df):
    """
    计算250日kutosis
    """
    ret = np.log(df["price_adj"]) - np.log(df["price_adj"].shift(1))
    res = pd.rolling_kurt(ret, 250).to_frame("Kurtosis250d")
    return res
Example #4
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3),
              check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3),
              check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    assert_eq(pd.rolling_window(p, 3, win_type='boxcar'),
              dd.rolling_window(d, 3, win_type='boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
Example #5
0
def visualize_sequential_relationships(training_data, plot_size, smooth=None, window=1):
    """
    Generates line plots to visualize sequential data.  Assumes the data frame index is time series.
    """
    training_data.index.name = None
    num_features = plot_size if plot_size < len(training_data.columns) else len(training_data.columns)
    num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1

    for i in range(num_plots):
        fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10))
        for j in range(16):
            index = (i * 16) + j
            if index < num_features:
                if index != 3:  # this column is all 0s in the bike set
                    if smooth == 'mean':
                        training_data.iloc[:, index] = pd.rolling_mean(training_data.iloc[:, index], window)
                    elif smooth == 'var':
                        training_data.iloc[:, index] = pd.rolling_var(training_data.iloc[:, index], window)
                    elif smooth == 'skew':
                        training_data.iloc[:, index] = pd.rolling_skew(training_data.iloc[:, index], window)
                    elif smooth == 'kurt':
                        training_data.iloc[:, index] = pd.rolling_kurt(training_data.iloc[:, index], window)

                    training_data.iloc[:, index].plot(ax=ax[j / 4, j % 4], kind='line', legend=False,
                                                      title=training_data.columns[index])
        fig.tight_layout()
Example #6
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    # see note around test_rolling_dataframe for logic concerning precision
    assert_eq(pd.rolling_skew(p, 3),
              dd.rolling_skew(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_kurt(p, 3),
              dd.rolling_kurt(d, 3), check_less_precise=True)
    assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        assert_eq(pd.rolling_window(p, 3, 'boxcar'),
                  dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    assert_eq(pd.rolling_sum(p, 3, min_periods=3),
              dd.rolling_sum(d, 3, min_periods=3))
def analysis():
    """ A simple API endpoint to compare data from two sensors
        Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname
    """

    if 'wotkit_token' in session:

        a = request.args.get('a')
        b = request.args.get('b')
        hours = int(request.args.get('hours'))
        
        if (a and b and hours):
            
            msph = 3600000 #milliseconds per hour
            result = defaultdict(dict)
            
            sensoraDataSeries = WotKitDataToSeries(WoTKitgetSensorData(a, msph*hours))
            sensorbDataSeries = WotKitDataToSeries(WoTKitgetSensorData(b, msph*hours))
           
            # Labels object
            result['labels'] = [`i`+"h" for i in range(1,hours)]

            # Sensor A object             
            sensoraDailyMeans = sensoraDataSeries.resample('H', how = 'mean')
            result['a']['mean'] = SeriesToList( sensoraDailyMeans )
            result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5) )
            result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5) )
            result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5) )
            result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5) )

            #Sensor B object         
            sensorbDailyMeans = sensorbDataSeries.resample('H', how = 'mean')
            result['b']['mean'] = SeriesToList(sensorbDailyMeans)
            result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5) )
            result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5) )
            result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5) )
            result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5) )
            
            #Comparison object
            result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5) )
            result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5) )         
          
            json_response = json.dumps(result)

            return Response(json_response, content_type='application/json')
Example #8
0
def get_estimator(ticker, start, end, window=30, clean=True):
    
    prices = data.get_data(ticker, start, end)
    
    log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log)

    result = pandas.rolling_kurt(log_return, window=window)
    
    result[:window-1] = np.nan
    
    if clean:
        return result.dropna()
    else:
        return result
Example #9
0
def get_estimator(ticker, start, end, window=30, clean=True):

    prices = data.get_data(ticker, start, end)

    log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(
        np.log)

    result = pandas.rolling_kurt(log_return, window=window)

    result[:window - 1] = np.nan

    if clean:
        return result.dropna()
    else:
        return result
Example #10
0
def get_heartbeat(data, col):
    '''
	featurizes arrythmia data to indicate individual hearbeats

	Args:
		data (DataFrame): mitdb DataFrame
		col (str): column of mitdb DataFrame to base heartbeat feature on

	Returns:
		heartbeats (list): temporal list of heartbeat probabilities
	'''
    x1 = data.index.astype(int).tolist()
    y1 = data[col]
    y2 = pd.rolling_kurt(y1, 100)
    y3 = pd.rolling_std(y1 - pd.rolling_mean(y1, 10), 10)
    return reduce(lambda x, y: x * y, [y1, y2, y3])
Example #11
0
def rolling_tests(p, d):
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    mad = lambda x: np.fabs(x - x.mean()).mean()
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
Example #12
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar"))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
Example #13
0
def rolling_functions_tests(p, d):
    # Old-fashioned rolling API
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
Example #14
0
 def test_ts_kurt(self):
     self.env.add_operator('ts_kurt', {
         'operator': OperatorTSKurt,
         'arg1': {
             'value': [3, 5]
         },
     })
     string1 = 'ts_kurt(2, open1)'
     gene1 = self.env.parse_string(string1)
     self.assertFalse(gene1.validate())
     string2 = 'ts_kurt(5, open1)'
     gene2 = self.env.parse_string(string2)
     self.assertTrue(gene2.validate())
     self.assertEqual(gene2.dimension, '')
     self.assertRaises(IndexError, gene2.eval, self.env, self.date1,
                       self.date2)
     date1 = self.env.shift_date(self.date1, 4)
     df = pd.rolling_kurt(self.env.get_data_value('open1'), 5).iloc[4:]
     self.assertTrue((gene2.eval(self.env, date1,
                                 self.date2) == df).values.all())
     self.assertTrue(
         frame_equal(gene2.eval(self.env, date1, self.date2), df))
Example #15
0
 def test_ts_kurt(self):
     self.env.add_operator('ts_kurt', {
         'operator': OperatorTSKurt,
         'arg1': {'value': [3, 5]},
         })
     string1 = 'ts_kurt(2, open1)'
     gene1 = self.env.parse_string(string1)
     self.assertFalse(gene1.validate())
     string2 = 'ts_kurt(5, open1)'
     gene2 = self.env.parse_string(string2)
     self.assertTrue(gene2.validate())
     self.assertEqual(gene2.dimension, '')
     self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2)
     date1 = self.env.shift_date(self.date1, 4)
     df = pd.rolling_kurt(self.env.get_data_value('open1'), 5).iloc[4:]
     self.assertTrue(
             (gene2.eval(self.env, date1, self.date2) == df).values.all()
             )
     self.assertTrue(
             frame_equal(
                 gene2.eval(self.env, date1, self.date2),
                 df)
             )
Example #16
0
def rolling_tests(p, d):
    eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3))
    eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3))
    eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3))
    eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3))
    eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3))
    eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3))
    eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3))
    eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3))
    eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3))
    eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3))
    eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5))
    mad = lambda x: np.fabs(x - x.mean()).mean()
    eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad))
    with ignoring(ImportError):
        eq(pd.rolling_window(p, 3, 'boxcar'),
           dd.rolling_window(d, 3, 'boxcar'))
    # Test with edge-case window sizes
    eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0))
    eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1))
    # Test with kwargs
    eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3,
                                                           min_periods=3))
Example #17
0
def visualize_sequential_relationships(training_data,
                                       plot_size,
                                       smooth=None,
                                       window=1):
    """
    Generates line plots to visualize sequential data.  Assumes the data frame index is time series.
    """
    training_data.index.name = None
    num_features = plot_size if plot_size < len(
        training_data.columns) else len(training_data.columns)
    num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1

    for i in range(num_plots):
        fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10))
        for j in range(16):
            index = (i * 16) + j
            if index < num_features:
                if index != 3:  # this column is all 0s in the bike set
                    if smooth == 'mean':
                        training_data.iloc[:, index] = pd.rolling_mean(
                            training_data.iloc[:, index], window)
                    elif smooth == 'var':
                        training_data.iloc[:, index] = pd.rolling_var(
                            training_data.iloc[:, index], window)
                    elif smooth == 'skew':
                        training_data.iloc[:, index] = pd.rolling_skew(
                            training_data.iloc[:, index], window)
                    elif smooth == 'kurt':
                        training_data.iloc[:, index] = pd.rolling_kurt(
                            training_data.iloc[:, index], window)

                    training_data.iloc[:, index].plot(
                        ax=ax[j / 4, j % 4],
                        kind='line',
                        legend=False,
                        title=training_data.columns[index])
        fig.tight_layout()
Example #18
0
 def ts_kurt(self, x, n):
     return pd.rolling_kurt(x, n)
def ts_kurtFn(arr, min_periods, max_periods):
    if not (max_periods): max_periods = len(arr)
    return pd.rolling_kurt(arr, max_periods, min_periods=min_periods)
Example #20
0
 def rolling_smoother(self, data, stype='rolling_mean', win_size=10, win_type='boxcar', center=False, std=0.1,
                      beta=0.1,
                      power=1, width=1):
     """
     
     Perform a espanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html
     
     :param data:
     :param stype:
     :param win_size:
     :param win_type:
     :param center:
     :param std:
     :param beta:
     :param power:
     :param width:
     :moothing types:
         ROLLING :
             rolling_count	Number of non-null observations
             rolling_sum	Sum of values
             rolling_mean	Mean of values
             rolling_median	Arithmetic median of values
             rolling_min	Minimum
             rolling_max	Maximum
             rolling_std	Unbiased standard deviation
             rolling_var	Unbiased variance
             rolling_skew	Unbiased skewness (3rd moment)
             rolling_kurt	Unbiased kurtosis (4th moment)
             rolling_window	Moving window function
                 window types:
                     boxcar
                     triang
                     blackman
                     hamming
                     bartlett
                     parzen
                     bohman
                     blackmanharris
                     nuttall
                     barthann
                     kaiser (needs beta)
                     gaussian (needs std)
                     general_gaussian (needs power, width)
                     slepian (needs width)
     
     """
     if stype == 'count':
         newy = pd.rolling_count(data, win_size)
     if stype == 'sum':
         newy = pd.rolling_sum(data, win_size)
     if stype == 'mean':
         newy = pd.rolling_mean(data, win_size)
     if stype == 'median':
         newy = pd.rolling_median(data, win_size)
     if stype == 'min':
         newy = pd.rolling_min(data, win_size)
     if stype == 'max':
         newy = pd.rolling_max(data, win_size)
     if stype == 'std':
         newy = pd.rolling_std(data, win_size)
     if stype == 'var':
         newy = pd.rolling_var(data, win_size)
     if stype == 'skew':
         newy = pd.rolling_skew(data, win_size)
     if stype == 'kurt':
         newy = pd.rolling_kurt(data, win_size)
     if stype == 'window':
         if win_type == 'kaiser':
             newy = pd.rolling_window(data, win_size, win_type, center=center, beta=beta)
         if win_type == 'gaussian':
             newy = pd.rolling_window(data, win_size, win_type, center=center, std=std)
         if win_type == 'general_gaussian':
             newy = pd.rolling_window(data, win_size, win_type, center=center, power=power, width=width)
         else:
             newy = pd.rolling_window(data, win_size, win_type, center=center)
     return newy
Example #21
0
 def ts_operation(df, n):
     return pd.rolling_kurt(df, n)
Example #22
0
def calculate_features(data: pd.DataFrame,
                       normalization=False,
                       train_data: list = None,
                       start=None,
                       end=None):
    Open = data['Open'].values
    High = data['High'].values
    Low = data['Low'].values
    Close = data['Close'].values
    Volume = data['Volume'].values
    data['ret'] = data['Close'].pct_change() * 100.0
    data['ret_2'] = data['Close'].pct_change().shift() * 100.0
    data['ret_3'] = data['Close'].pct_change().shift(2) * 100.0
    data['ret_4'] = data['Close'].pct_change().shift(3) * 100.0
    data['ret_5'] = data['Close'].pct_change().shift(4) * 100.0
    data['ret_ratio'] = (data['ret'] / data['ret_5'] - 1) * 100.0
    data['log_ret'] = (np.log(data['Close'])).diff() * 100.0
    data['gap'] = ((data['Open'] - data['Close'].shift()) / data['Open'] *
                   100.0)
    data['gap2'] = ((data['Open'] - data['Close'].shift()) / data['Open'] *
                    100.0).shift()
    data['gap3'] = ((data['Open'] - data['Close'].shift()) / data['Open'] *
                    100.0).shift(2)
    data['gap4'] = ((data['Open'] - data['Close'].shift()) / data['Open'] *
                    100.0).shift(3)
    data['gap5'] = ((data['Open'] - data['Close'].shift()) / data['Open'] *
                    100.0).shift(4)
    data['hl'] = ((data['High'] - data['Low']) / data['Open'] * 100.0)
    data['hl2'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift()
    data['hl3'] = ((data['High'] - data['Low']) / data['Open'] *
                   100.0).shift(2)
    data['hl4'] = ((data['High'] - data['Low']) / data['Open'] *
                   100.0).shift(3)
    data['hl5'] = ((data['High'] - data['Low']) / data['Open'] *
                   100.0).shift(4)
    data['oc'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0)
    data['oc2'] = ((data['Close'] - data['Open']) / data['Open'] *
                   100.0).shift()
    data['oc3'] = ((data['Close'] - data['Open']) / data['Open'] *
                   100.0).shift(2)
    data['oc4'] = ((data['Close'] - data['Open']) / data['Open'] *
                   100.0).shift(3)
    data['oc5'] = ((data['Close'] - data['Open']) / data['Open'] *
                   100.0).shift(4)
    data['MA_short'] = talib.EMA(data['Close'].values, 10)
    data['MA_long'] = talib.EMA(data['Close'].values, 120)
    data['MA_ratio'] = (data['MA_short'] / data['MA_long'] - 1) * 100.0
    data['MA2_short'] = talib.EMA(data['Close'].values, 10)
    data['MA2_long'] = talib.EMA(data['Close'].values, 60)
    data['MA2_ratio'] = (data['MA2_short'] / data['MA2_long'] - 1) * 100.0
    data['vol_long'] = pd.rolling_std(data['Close'], 30)
    data['vol_short'] = pd.rolling_std(data['Close'], 15)
    data['vol_ratio'] = (data['vol_short'] / data['vol_long'] - 1) * 100.0
    data['EMA'] = (Close / talib.EMA(Close, 5) - 1) * 100.0
    data['EMA_long'] = (Close / talib.EMA(Close, 60) - 1) * 100.0
    data['RSI'] = talib.RSI(data['Close'].values) / 100.0
    data['MOM'] = talib.MOM(data['Close'].values, timeperiod=14) / 100.0
    data['MACD_vfast'], data['MACD_signal_vfast'], data['MACD_hist'] = \
        talib.MACD(data['Close'].values, fastperiod=4, slowperiod=9, signalperiod=3)
    data['MACD_fast'], data['MACD_signal_fast'], _ = \
        talib.MACD(data['Close'].values, fastperiod=12, slowperiod=26, signalperiod=9)
    data['MACD_slow'], _, _ = talib.MACD(data['Close'].values,
                                         fastperiod=25,
                                         slowperiod=50)
    data['MACD'], data['MACD_signal'], data['MACD_hist'] = talib.MACD(
        data['Close'].values, fastperiod=30, slowperiod=65, signalperiod=22)
    data['ATR'] = talib.ATR(High, Low, Close, timeperiod=28)
    data['ADX_vlong'] = talib.ADX(High, Low, Close, timeperiod=120)
    data['ADX_long'] = talib.ADX(High, Low, Close, timeperiod=28)
    data['ADX_short'] = talib.ADX(High, Low, Close, timeperiod=14)
    data['TSF_short'] = talib.TSF(data['Close'].values, timeperiod=14)
    data['TSF_long'] = talib.TSF(data['Close'].values, timeperiod=28)
    data['TSF_ratio'] = (data['TSF_short'] / data['TSF_long'] - 1) * 100.0
    data['BBand_up'], data['BBand_mid'], data['BBand_low'] = talib.BBANDS(
        data['Close'].values, timeperiod=20)
    data['BBand_width'] = (data['BBand_up'] / data['BBand_low'] - 1) * 100.0
    data['HMA_short'] = HMA(data['Close'].values, timeperiod=9)
    data['HMA_long'] = HMA(data['Close'].values, timeperiod=60)
    data['HMA_ratio'] = (data['HMA_short'] / data['HMA_long'] - 1) * 100.0
    data['HMA_ret'] = HMA(data['Close'].values, 100)
    # data['HMA_ret'] = data['HMA_ret'].pct_change()
    data['OBV'] = talib.OBV(Close, Volume)
    data['mean'] = pd.rolling_mean(data['ret'], 10)
    data['std'] = pd.rolling_std(data['ret'], 10)
    data['skewness'] = pd.rolling_skew(data['ret'], 10)
    data['kurtosis'] = (pd.rolling_kurt(data['ret'], 10) - 3)
    data['STOCHk'], data['STOCHd'] = talib.STOCH(High,
                                                 Low,
                                                 Close,
                                                 fastk_period=28,
                                                 slowk_period=3,
                                                 slowd_period=3)
    data['STOCHRSId'], data['STOCHRSIk'] = talib.STOCHRSI(Close)
    data['Chaikin_vol'] = Chaikin_vol(High, Low)
    data['Chaikin_oscillator'] = Chaikin_oscillator(High, Low, Close, Volume)
    data['PDI'] = talib.PLUS_DI(High, Low, Close, timeperiod=14)
    data['MDI'] = talib.MINUS_DI(High, Low, Close, timeperiod=14)
    data['DI'] = data['ADX_short'] - data['PDI'] + data['MDI']
    # train_data  = ['ret', 'ret_2', 'ret_3', 'ret_4', 'ret_5', 'vol_ratio', 'hl', 'oc', 'gap']
    # 'ret_2', 'ret_3', 'ret_4', 'ret_5']
    # data = include_VIX(data)
    data.replace(np.nan, 0, inplace=True)
    if normalization is True:
        for feature in data.columns:

            if feature not in [
                    'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',
                    'Product', 'log_ret', 'ret', 'ret_2', 'ret_3', 'ret_4',
                    'ret_5', 'Date'
            ]:
                data[feature] = (normalize(data[feature], start=start,
                                           end=end))

    if train_data is None:
        # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'ret', 'ADX_short', 'MA_ratio', 'MA2_ratio',
        #               'RSI', 'skewness', 'kurtosis', 'mean', 'std']
        train_data = ['oc', 'vol_ratio', 'hl', 'ret']
        # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'gap', 'ret', 'ADX_short', 'BBand_width', 'MA_ratio',
    #                   'RSI', 'skewness', 'kurtosis', 'mean', 'std'] # most original
    # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'gap', 'ret',
    #               'ADX_short', 'BBand_width', 'MA_ratio', 'RSI', 'skewness', 'kurtosis', 'mean', 'std']
    data = feature_analysis(data,
                            feature=train_data,
                            pca_components=len(train_data),
                            start=start,
                            end=end)

    return data
Example #23
0
#newstock.plot()

grouped = newstock.groupby('TSYMBOL')

#plottest = grouped.get_group('BAC')

#plottest.plot(x='date', y='PRC')



fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(9,9))


newstock['STD'] = pd.rolling_std(newstock['PRC'],25,min_periods=1)

newstock['KURTOSIS'] = pd.rolling_kurt(newstock['PRC'],25,min_periods=1)
'''
for symbol in symbols:
    plottest = grouped.get_group(symbol)
    plottest.plot(x='date',y='sprtrn',ax=ax,label=symbol)
#    print('here-')
#    print(plottest)
       
#    print(newstock)
'''


for name, group in newstock.groupby('TSYMBOL'):
        #   print(newstock.date)
           #print('here')
           #print(group)
Example #24
0
 def ts_kurt(self, x, n):
     return pd.rolling_kurt(x, n)
Example #25
0
    def sequential_relationships(self,
                                 time='index',
                                 smooth_method=None,
                                 window=1,
                                 grid_size=4):
        """
        Generates line plots to visualize sequential data.

        Parameters
        ----------
        time : string, optional, default 'index'
            Datetime input column to use for visualization.

        smooth_method : {'mean', 'var', 'skew', 'kurt', None}, optional, default None
            Apply a function to the time series to smooth out variations.

        window : int, optional, default 1
            Size of the moving window used to calculate the smoothing function.

        grid_size : int, optional, default 4
            Number of vertical/horizontal plots to display in a single window.
        """
        self.print_message('Generating sequential relationship plots...')

        if smooth_method not in ['mean', 'var', 'skew', 'kurt', None]:
            raise Exception('Invalid value for smooth_method.')

        data = self.data.fillna(0)

        if time is not 'index':
            data = data.reset_index()
            data = data.set_index(time)

        data.index.name = None
        n_features = len(data.columns)
        plot_size = grid_size**2
        n_plots = n_features // plot_size if n_features % plot_size == 0 else n_features // plot_size + 1

        for i in range(n_plots):
            fig, ax = plt.subplots(grid_size,
                                   grid_size,
                                   sharex=True,
                                   figsize=(self.fig_size, self.fig_size / 2))
            for j in range(plot_size):
                index = (i * plot_size) + j
                if index < n_features:
                    if type(data.iloc[0, index]) is not str:
                        if smooth_method == 'mean':
                            data.iloc[:, index] = pd.rolling_mean(
                                data.iloc[:, index], window)
                        elif smooth_method == 'var':
                            data.iloc[:, index] = pd.rolling_var(
                                data.iloc[:, index], window)
                        elif smooth_method == 'skew':
                            data.iloc[:, index] = pd.rolling_skew(
                                data.iloc[:, index], window)
                        elif smooth_method == 'kurt':
                            data.iloc[:, index] = pd.rolling_kurt(
                                data.iloc[:, index], window)

                        data.iloc[:, index].plot(ax=ax[j // grid_size,
                                                       j % grid_size],
                                                 kind='line',
                                                 legend=False,
                                                 title=data.columns[index])
            fig.tight_layout()

        self.print_message('Plot generation complete.')
Example #26
0
def run_kurtosis(data, nfft, decimate_by, overlap_fraction, info="", whiten=False, save_plot=False, twosided=False):
    if whiten==True:
        #Apply an lpc filter to perform "pre-whitening"
        #See "The Application of Spectral Kurtosis to Bearing Diagnostics", N. Sawalhi and R. Randall, ACOUSTICS 2004
        coeffs = 100
        data = data - np.mean(data)

        #These two lines work, but are very, very slow on large datasets. Since we only need coeffs+1 correlations, why not do that?
        #acorr_data = np.correlate(data, data, mode='full')
        #r = acorr_data[data.size-1:data.size+coeffs]

        extended_data = np.hstack((data,data))
        acorr_data = np.asarray([np.convolve(extended_data[0+i:data.size+i], data[::-1].conj(), 'valid') for i in range(coeffs+1)])
        acorr_data.shape = (acorr_data.shape[0])
        r = acorr_data

        #Equivalent
        #print np.correlate(data,data,'full')[data.size-1]
        #print np.convolve(data,data[::-1].conj(), 'valid')

        phi = np.dot(sp.linalg.inv(sp.linalg.toeplitz(r[:-1])), -r[1:])
        lpfilt = np.concatenate(([1.], phi))
        data = sg.lfilter(lpfilt, 1, data)
        #Remove filter transient
        data = data[coeffs+1:]

    #Heuristic window to get nice plots
    base_window_length = int(overlap_fraction*nfft)
    f, axarr = plot.subplots(2)
    if decimate_by > 1:
        data = filterbank.polyphase_single_filter(data, decimate_by, sg.firwin(200, 1./(decimate_by+.25)))
        window_length = base_window_length/decimate_by
    else:
        window_length = base_window_length
    overlapped = overlap_data_stream(data, chunk=nfft, overlap_percentage=overlap_fraction).T
    windowed_overlapped = np.apply_along_axis(lambda x: np.hanning(len(x))*x,0,overlapped)
    raw_spectrogram = np.fft.fftshift(np.fft.fft(windowed_overlapped, n=nfft, axis=0), axes=0)
    if twosided:
        spec_dframe = pd.DataFrame(np.abs(raw_spectrogram))
    else:
        spec_dframe = pd.DataFrame(np.abs(raw_spectrogram[:raw_spectrogram.shape[0]/2,:]))
    fulltitle = "Spectrogram and spectral kurtosis" + (", prewhitened" if whiten else "") + "\n" + info + " $F_s=$" + `44100/decimate_by` + ", $O=$" + `overlap_fraction` + ", $NFFT=$" + `nfft if twosided else nfft/2` + ",  $NWND=$" + `base_window_length`
    f.suptitle(fulltitle)
    #axarr[0].specgram(data,
    #        NFFT=nfft,
    #        noverlap=int(overlap_fraction*nfft),
    #        cmap=cm.gray,
    #        origin='lower',
    #        interpolation='bicubic',
    #        sides='onesided',
    #        aspect='normal')
    log_spec = copy.copy(spec_dframe.values.flatten())
    log_spec = np.ma.log(log_spec)
    log_spec = np.reshape(log_spec, spec_dframe.values.shape)
    lower, upper = get_adjusted_lims(log_spec, num_bins=10000)
    specax = axarr[0].imshow(log_spec,
            cmap=cm.gray,
            vmin=lower,
            vmax=upper,
    #        cmap=cm.spectral,
    #        cmap=cm.gist_stern,
            interpolation='bicubic',
            origin='lower',
            aspect='normal')
    xaxislabel="Time (Overlapped Samples)"
    yaxislabel="Frequency (FFT Bins)"
    axarr[0].set_xlabel(xaxislabel)
    axarr[0].set_ylabel(yaxislabel)
    rolling_kurtosis = pd.rolling_kurt(spec_dframe, window_length, axis=1).fillna()
    lower,upper = get_adjusted_lims(rolling_kurtosis, num_bins=10000)
    #Remove 0:nfft*overlap_fraction column values to adjust for plotting offest and get cleaner looking plots
    #kurtax = axarr[1].imshow(rolling_kurtosis.values[:, int(nfft*overlap_fraction):],
    kurtax = axarr[1].imshow(rolling_kurtosis,
            vmin=lower,
            vmax=upper,
            cmap=cm.gray,
            #cmap=cm.spectral,
            #cmap=cm.gist_stern,
            interpolation='bicubic',
            origin='lower',
            aspect='normal')
    axarr[1].set_xlabel(xaxislabel)
    axarr[1].set_ylabel(yaxislabel)
    speccblabel = "Amplitude (dB)"
    kurtcblabel = "Unbiased Kurtosis"
    f.subplots_adjust(right=0.8)
    speccbax = f.add_axes([.85,.53,.025,.35])
    kurtcbax = f.add_axes([.85,.1,.025,.35])
    speccb = f.colorbar(specax, cax=speccbax)
    speccb.set_label(speccblabel)
    kurtcb = f.colorbar(kurtax, cax=kurtcbax)
    kurtcb.set_label(kurtcblabel)

    if save_plot:
        plot.savefig("".join(fulltitle.split(" ")) + ".png")
        plot.close()
    else:
        plot.show()
Example #27
0
 def ts_operation(df, n):
     return pd.rolling_kurt(df, n)
Example #28
0
def visualize_sequential_relationships(data,
                                       time='index',
                                       smooth_method=None,
                                       window=1,
                                       grid_size=4,
                                       fig_size=20):
    """
    Generates line plots to visualize sequential data.  Assumes the data frame index is time series.

    Parameters
    ----------
    data : array-like
        Pandas data frame containing the entire data set.

    time : string, optional, default 'index'
        Datetime input column to use for visualization.

    smooth_method : {'mean', 'var', 'skew', 'kurt'}, optional, default None
        Apply a function to the time series to smooth out variations.

    window : int, optional, default 1
        Size of the moving window used to calculate the smoothing function.

    grid_size : int, optional, default 4
        Number of vertical/horizontal plots to display in a single window.

    fig_size : int, optional, default 20
        Size of the plot.
    """
    # replace NaN values with 0 to prevent exceptions in the lower level API calls
    data = data.fillna(0)

    if time is not 'index':
        data = data.reset_index()
        data = data.set_index(time)

    data.index.name = None
    n_features = len(data.columns)
    plot_size = grid_size**2
    n_plots = n_features / plot_size if n_features % plot_size == 0 else n_features / plot_size + 1

    for i in range(n_plots):
        fig, ax = plt.subplots(grid_size,
                               grid_size,
                               sharex=True,
                               figsize=(fig_size, fig_size / 2))
        for j in range(plot_size):
            index = (i * plot_size) + j
            if index < n_features:
                if type(data.iloc[0, index]) is not str:
                    if smooth_method == 'mean':
                        data.iloc[:, index] = pd.rolling_mean(
                            data.iloc[:, index], window)
                    elif smooth_method == 'var':
                        data.iloc[:, index] = pd.rolling_var(
                            data.iloc[:, index], window)
                    elif smooth_method == 'skew':
                        data.iloc[:, index] = pd.rolling_skew(
                            data.iloc[:, index], window)
                    elif smooth_method == 'kurt':
                        data.iloc[:, index] = pd.rolling_kurt(
                            data.iloc[:, index], window)

                    data.iloc[:, index].plot(ax=ax[j / grid_size,
                                                   j % grid_size],
                                             kind='line',
                                             legend=False,
                                             title=data.columns[index])
        fig.tight_layout()
Example #29
0
 def evaluate(self, table):
     expr = self.expr
     val = None
     if expr is not None:
         val = expr.evaluate(table)
     return pd.rolling_kurt(val, self.window)
Example #30
0
def get_rolling_kurt(values, window):
    """Return rolling kurt of given values, using specified window size."""
    return pd.rolling_kurt(values.shift(1), window=window)