def analysis(): """ A simple API endpoint to compare data from two sensors Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname """ if 'wotkit_token' in session: a = request.args.get('a') b = request.args.get('b') hours = int(request.args.get('hours')) if (a and b and hours): msph = 3600000 #milliseconds per hour result = defaultdict(dict) sensoraDataSeries = WotKitDataToSeries( WoTKitgetSensorData(a, msph * hours)) sensorbDataSeries = WotKitDataToSeries( WoTKitgetSensorData(b, msph * hours)) # Labels object result['labels'] = [ ` i ` + "h" for i in range(1, hours)] # Sensor A object sensoraDailyMeans = sensoraDataSeries.resample('H', how='mean') result['a']['mean'] = SeriesToList(sensoraDailyMeans) result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5)) result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5)) result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5)) result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5)) #Sensor B object sensorbDailyMeans = sensorbDataSeries.resample('H', how='mean') result['b']['mean'] = SeriesToList(sensorbDailyMeans) result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5)) result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5)) result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5)) result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5)) #Comparison object result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5)) result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5)) json_response = json.dumps(result) return Response(json_response, content_type='application/json')
def plot_rolling_functions(series, window_size=128): pd.rolling_median(series,window_size).plot(label='median') pd.rolling_mean(series,window_size).plot(label='mean') pd.rolling_std(series,window_size).plot(label='std') pd.rolling_skew(series,window_size).plot(label='skew') pd.rolling_kurt(series,window_size).plot(label='kurt') pd.rolling_min(series,window_size).plot(label='min') pd.rolling_max(series,window_size).plot(label='max') plt.title('Various rolling window functions, window size %s' % (window_size)) plt.legend() plt.show()
def Calc(df): """ 计算250日kutosis """ ret = np.log(df["price_adj"]) - np.log(df["price_adj"].shift(1)) res = pd.rolling_kurt(ret, 250).to_frame("Kurtosis250d") return res
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) assert_eq(pd.rolling_window(p, 3, win_type='boxcar'), dd.rolling_window(d, 3, win_type='boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def visualize_sequential_relationships(training_data, plot_size, smooth=None, window=1): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. """ training_data.index.name = None num_features = plot_size if plot_size < len(training_data.columns) else len(training_data.columns) num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1 for i in range(num_plots): fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10)) for j in range(16): index = (i * 16) + j if index < num_features: if index != 3: # this column is all 0s in the bike set if smooth == 'mean': training_data.iloc[:, index] = pd.rolling_mean(training_data.iloc[:, index], window) elif smooth == 'var': training_data.iloc[:, index] = pd.rolling_var(training_data.iloc[:, index], window) elif smooth == 'skew': training_data.iloc[:, index] = pd.rolling_skew(training_data.iloc[:, index], window) elif smooth == 'kurt': training_data.iloc[:, index] = pd.rolling_kurt(training_data.iloc[:, index], window) training_data.iloc[:, index].plot(ax=ax[j / 4, j % 4], kind='line', legend=False, title=training_data.columns[index]) fig.tight_layout()
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def analysis(): """ A simple API endpoint to compare data from two sensors Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname """ if 'wotkit_token' in session: a = request.args.get('a') b = request.args.get('b') hours = int(request.args.get('hours')) if (a and b and hours): msph = 3600000 #milliseconds per hour result = defaultdict(dict) sensoraDataSeries = WotKitDataToSeries(WoTKitgetSensorData(a, msph*hours)) sensorbDataSeries = WotKitDataToSeries(WoTKitgetSensorData(b, msph*hours)) # Labels object result['labels'] = [`i`+"h" for i in range(1,hours)] # Sensor A object sensoraDailyMeans = sensoraDataSeries.resample('H', how = 'mean') result['a']['mean'] = SeriesToList( sensoraDailyMeans ) result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5) ) result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5) ) result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5) ) result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5) ) #Sensor B object sensorbDailyMeans = sensorbDataSeries.resample('H', how = 'mean') result['b']['mean'] = SeriesToList(sensorbDailyMeans) result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5) ) result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5) ) result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5) ) result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5) ) #Comparison object result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5) ) result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5) ) json_response = json.dumps(result) return Response(json_response, content_type='application/json')
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply(np.log) result = pandas.rolling_kurt(log_return, window=window) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_return = (prices['Adj Close'] / prices['Adj Close'].shift(1)).apply( np.log) result = pandas.rolling_kurt(log_return, window=window) result[:window - 1] = np.nan if clean: return result.dropna() else: return result
def get_heartbeat(data, col): ''' featurizes arrythmia data to indicate individual hearbeats Args: data (DataFrame): mitdb DataFrame col (str): column of mitdb DataFrame to base heartbeat feature on Returns: heartbeats (list): temporal list of heartbeat probabilities ''' x1 = data.index.astype(int).tolist() y1 = data[col] y2 = pd.rolling_kurt(y1, 100) y3 = pd.rolling_std(y1 - pd.rolling_mean(y1, 10), 10) return reduce(lambda x, y: x * y, [y1, y2, y3])
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def test_ts_kurt(self): self.env.add_operator('ts_kurt', { 'operator': OperatorTSKurt, 'arg1': { 'value': [3, 5] }, }) string1 = 'ts_kurt(2, open1)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_kurt(5, open1)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, '') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 4) df = pd.rolling_kurt(self.env.get_data_value('open1'), 5).iloc[4:] self.assertTrue((gene2.eval(self.env, date1, self.date2) == df).values.all()) self.assertTrue( frame_equal(gene2.eval(self.env, date1, self.date2), df))
def test_ts_kurt(self): self.env.add_operator('ts_kurt', { 'operator': OperatorTSKurt, 'arg1': {'value': [3, 5]}, }) string1 = 'ts_kurt(2, open1)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_kurt(5, open1)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, '') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 4) df = pd.rolling_kurt(self.env.get_data_value('open1'), 5).iloc[4:] self.assertTrue( (gene2.eval(self.env, date1, self.date2) == df).values.all() ) self.assertTrue( frame_equal( gene2.eval(self.env, date1, self.date2), df) )
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def visualize_sequential_relationships(training_data, plot_size, smooth=None, window=1): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. """ training_data.index.name = None num_features = plot_size if plot_size < len( training_data.columns) else len(training_data.columns) num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1 for i in range(num_plots): fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10)) for j in range(16): index = (i * 16) + j if index < num_features: if index != 3: # this column is all 0s in the bike set if smooth == 'mean': training_data.iloc[:, index] = pd.rolling_mean( training_data.iloc[:, index], window) elif smooth == 'var': training_data.iloc[:, index] = pd.rolling_var( training_data.iloc[:, index], window) elif smooth == 'skew': training_data.iloc[:, index] = pd.rolling_skew( training_data.iloc[:, index], window) elif smooth == 'kurt': training_data.iloc[:, index] = pd.rolling_kurt( training_data.iloc[:, index], window) training_data.iloc[:, index].plot( ax=ax[j / 4, j % 4], kind='line', legend=False, title=training_data.columns[index]) fig.tight_layout()
def ts_kurt(self, x, n): return pd.rolling_kurt(x, n)
def ts_kurtFn(arr, min_periods, max_periods): if not (max_periods): max_periods = len(arr) return pd.rolling_kurt(arr, max_periods, min_periods=min_periods)
def rolling_smoother(self, data, stype='rolling_mean', win_size=10, win_type='boxcar', center=False, std=0.1, beta=0.1, power=1, width=1): """ Perform a espanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: :param stype: :param win_size: :param win_type: :param center: :param std: :param beta: :param power: :param width: :moothing types: ROLLING : rolling_count Number of non-null observations rolling_sum Sum of values rolling_mean Mean of values rolling_median Arithmetic median of values rolling_min Minimum rolling_max Maximum rolling_std Unbiased standard deviation rolling_var Unbiased variance rolling_skew Unbiased skewness (3rd moment) rolling_kurt Unbiased kurtosis (4th moment) rolling_window Moving window function window types: boxcar triang blackman hamming bartlett parzen bohman blackmanharris nuttall barthann kaiser (needs beta) gaussian (needs std) general_gaussian (needs power, width) slepian (needs width) """ if stype == 'count': newy = pd.rolling_count(data, win_size) if stype == 'sum': newy = pd.rolling_sum(data, win_size) if stype == 'mean': newy = pd.rolling_mean(data, win_size) if stype == 'median': newy = pd.rolling_median(data, win_size) if stype == 'min': newy = pd.rolling_min(data, win_size) if stype == 'max': newy = pd.rolling_max(data, win_size) if stype == 'std': newy = pd.rolling_std(data, win_size) if stype == 'var': newy = pd.rolling_var(data, win_size) if stype == 'skew': newy = pd.rolling_skew(data, win_size) if stype == 'kurt': newy = pd.rolling_kurt(data, win_size) if stype == 'window': if win_type == 'kaiser': newy = pd.rolling_window(data, win_size, win_type, center=center, beta=beta) if win_type == 'gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, std=std) if win_type == 'general_gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, power=power, width=width) else: newy = pd.rolling_window(data, win_size, win_type, center=center) return newy
def ts_operation(df, n): return pd.rolling_kurt(df, n)
def calculate_features(data: pd.DataFrame, normalization=False, train_data: list = None, start=None, end=None): Open = data['Open'].values High = data['High'].values Low = data['Low'].values Close = data['Close'].values Volume = data['Volume'].values data['ret'] = data['Close'].pct_change() * 100.0 data['ret_2'] = data['Close'].pct_change().shift() * 100.0 data['ret_3'] = data['Close'].pct_change().shift(2) * 100.0 data['ret_4'] = data['Close'].pct_change().shift(3) * 100.0 data['ret_5'] = data['Close'].pct_change().shift(4) * 100.0 data['ret_ratio'] = (data['ret'] / data['ret_5'] - 1) * 100.0 data['log_ret'] = (np.log(data['Close'])).diff() * 100.0 data['gap'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0) data['gap2'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift() data['gap3'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift(2) data['gap4'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift(3) data['gap5'] = ((data['Open'] - data['Close'].shift()) / data['Open'] * 100.0).shift(4) data['hl'] = ((data['High'] - data['Low']) / data['Open'] * 100.0) data['hl2'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift() data['hl3'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift(2) data['hl4'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift(3) data['hl5'] = ((data['High'] - data['Low']) / data['Open'] * 100.0).shift(4) data['oc'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0) data['oc2'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift() data['oc3'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift(2) data['oc4'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift(3) data['oc5'] = ((data['Close'] - data['Open']) / data['Open'] * 100.0).shift(4) data['MA_short'] = talib.EMA(data['Close'].values, 10) data['MA_long'] = talib.EMA(data['Close'].values, 120) data['MA_ratio'] = (data['MA_short'] / data['MA_long'] - 1) * 100.0 data['MA2_short'] = talib.EMA(data['Close'].values, 10) data['MA2_long'] = talib.EMA(data['Close'].values, 60) data['MA2_ratio'] = (data['MA2_short'] / data['MA2_long'] - 1) * 100.0 data['vol_long'] = pd.rolling_std(data['Close'], 30) data['vol_short'] = pd.rolling_std(data['Close'], 15) data['vol_ratio'] = (data['vol_short'] / data['vol_long'] - 1) * 100.0 data['EMA'] = (Close / talib.EMA(Close, 5) - 1) * 100.0 data['EMA_long'] = (Close / talib.EMA(Close, 60) - 1) * 100.0 data['RSI'] = talib.RSI(data['Close'].values) / 100.0 data['MOM'] = talib.MOM(data['Close'].values, timeperiod=14) / 100.0 data['MACD_vfast'], data['MACD_signal_vfast'], data['MACD_hist'] = \ talib.MACD(data['Close'].values, fastperiod=4, slowperiod=9, signalperiod=3) data['MACD_fast'], data['MACD_signal_fast'], _ = \ talib.MACD(data['Close'].values, fastperiod=12, slowperiod=26, signalperiod=9) data['MACD_slow'], _, _ = talib.MACD(data['Close'].values, fastperiod=25, slowperiod=50) data['MACD'], data['MACD_signal'], data['MACD_hist'] = talib.MACD( data['Close'].values, fastperiod=30, slowperiod=65, signalperiod=22) data['ATR'] = talib.ATR(High, Low, Close, timeperiod=28) data['ADX_vlong'] = talib.ADX(High, Low, Close, timeperiod=120) data['ADX_long'] = talib.ADX(High, Low, Close, timeperiod=28) data['ADX_short'] = talib.ADX(High, Low, Close, timeperiod=14) data['TSF_short'] = talib.TSF(data['Close'].values, timeperiod=14) data['TSF_long'] = talib.TSF(data['Close'].values, timeperiod=28) data['TSF_ratio'] = (data['TSF_short'] / data['TSF_long'] - 1) * 100.0 data['BBand_up'], data['BBand_mid'], data['BBand_low'] = talib.BBANDS( data['Close'].values, timeperiod=20) data['BBand_width'] = (data['BBand_up'] / data['BBand_low'] - 1) * 100.0 data['HMA_short'] = HMA(data['Close'].values, timeperiod=9) data['HMA_long'] = HMA(data['Close'].values, timeperiod=60) data['HMA_ratio'] = (data['HMA_short'] / data['HMA_long'] - 1) * 100.0 data['HMA_ret'] = HMA(data['Close'].values, 100) # data['HMA_ret'] = data['HMA_ret'].pct_change() data['OBV'] = talib.OBV(Close, Volume) data['mean'] = pd.rolling_mean(data['ret'], 10) data['std'] = pd.rolling_std(data['ret'], 10) data['skewness'] = pd.rolling_skew(data['ret'], 10) data['kurtosis'] = (pd.rolling_kurt(data['ret'], 10) - 3) data['STOCHk'], data['STOCHd'] = talib.STOCH(High, Low, Close, fastk_period=28, slowk_period=3, slowd_period=3) data['STOCHRSId'], data['STOCHRSIk'] = talib.STOCHRSI(Close) data['Chaikin_vol'] = Chaikin_vol(High, Low) data['Chaikin_oscillator'] = Chaikin_oscillator(High, Low, Close, Volume) data['PDI'] = talib.PLUS_DI(High, Low, Close, timeperiod=14) data['MDI'] = talib.MINUS_DI(High, Low, Close, timeperiod=14) data['DI'] = data['ADX_short'] - data['PDI'] + data['MDI'] # train_data = ['ret', 'ret_2', 'ret_3', 'ret_4', 'ret_5', 'vol_ratio', 'hl', 'oc', 'gap'] # 'ret_2', 'ret_3', 'ret_4', 'ret_5'] # data = include_VIX(data) data.replace(np.nan, 0, inplace=True) if normalization is True: for feature in data.columns: if feature not in [ 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'Product', 'log_ret', 'ret', 'ret_2', 'ret_3', 'ret_4', 'ret_5', 'Date' ]: data[feature] = (normalize(data[feature], start=start, end=end)) if train_data is None: # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'ret', 'ADX_short', 'MA_ratio', 'MA2_ratio', # 'RSI', 'skewness', 'kurtosis', 'mean', 'std'] train_data = ['oc', 'vol_ratio', 'hl', 'ret'] # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'gap', 'ret', 'ADX_short', 'BBand_width', 'MA_ratio', # 'RSI', 'skewness', 'kurtosis', 'mean', 'std'] # most original # train_data = ['MACD_vfast', 'vol_ratio', 'oc', 'hl', 'gap', 'ret', # 'ADX_short', 'BBand_width', 'MA_ratio', 'RSI', 'skewness', 'kurtosis', 'mean', 'std'] data = feature_analysis(data, feature=train_data, pca_components=len(train_data), start=start, end=end) return data
#newstock.plot() grouped = newstock.groupby('TSYMBOL') #plottest = grouped.get_group('BAC') #plottest.plot(x='date', y='PRC') fig, axes = plt.subplots(nrows=3, ncols=1, figsize=(9,9)) newstock['STD'] = pd.rolling_std(newstock['PRC'],25,min_periods=1) newstock['KURTOSIS'] = pd.rolling_kurt(newstock['PRC'],25,min_periods=1) ''' for symbol in symbols: plottest = grouped.get_group(symbol) plottest.plot(x='date',y='sprtrn',ax=ax,label=symbol) # print('here-') # print(plottest) # print(newstock) ''' for name, group in newstock.groupby('TSYMBOL'): # print(newstock.date) #print('here') #print(group)
def sequential_relationships(self, time='index', smooth_method=None, window=1, grid_size=4): """ Generates line plots to visualize sequential data. Parameters ---------- time : string, optional, default 'index' Datetime input column to use for visualization. smooth_method : {'mean', 'var', 'skew', 'kurt', None}, optional, default None Apply a function to the time series to smooth out variations. window : int, optional, default 1 Size of the moving window used to calculate the smoothing function. grid_size : int, optional, default 4 Number of vertical/horizontal plots to display in a single window. """ self.print_message('Generating sequential relationship plots...') if smooth_method not in ['mean', 'var', 'skew', 'kurt', None]: raise Exception('Invalid value for smooth_method.') data = self.data.fillna(0) if time is not 'index': data = data.reset_index() data = data.set_index(time) data.index.name = None n_features = len(data.columns) plot_size = grid_size**2 n_plots = n_features // plot_size if n_features % plot_size == 0 else n_features // plot_size + 1 for i in range(n_plots): fig, ax = plt.subplots(grid_size, grid_size, sharex=True, figsize=(self.fig_size, self.fig_size / 2)) for j in range(plot_size): index = (i * plot_size) + j if index < n_features: if type(data.iloc[0, index]) is not str: if smooth_method == 'mean': data.iloc[:, index] = pd.rolling_mean( data.iloc[:, index], window) elif smooth_method == 'var': data.iloc[:, index] = pd.rolling_var( data.iloc[:, index], window) elif smooth_method == 'skew': data.iloc[:, index] = pd.rolling_skew( data.iloc[:, index], window) elif smooth_method == 'kurt': data.iloc[:, index] = pd.rolling_kurt( data.iloc[:, index], window) data.iloc[:, index].plot(ax=ax[j // grid_size, j % grid_size], kind='line', legend=False, title=data.columns[index]) fig.tight_layout() self.print_message('Plot generation complete.')
def run_kurtosis(data, nfft, decimate_by, overlap_fraction, info="", whiten=False, save_plot=False, twosided=False): if whiten==True: #Apply an lpc filter to perform "pre-whitening" #See "The Application of Spectral Kurtosis to Bearing Diagnostics", N. Sawalhi and R. Randall, ACOUSTICS 2004 coeffs = 100 data = data - np.mean(data) #These two lines work, but are very, very slow on large datasets. Since we only need coeffs+1 correlations, why not do that? #acorr_data = np.correlate(data, data, mode='full') #r = acorr_data[data.size-1:data.size+coeffs] extended_data = np.hstack((data,data)) acorr_data = np.asarray([np.convolve(extended_data[0+i:data.size+i], data[::-1].conj(), 'valid') for i in range(coeffs+1)]) acorr_data.shape = (acorr_data.shape[0]) r = acorr_data #Equivalent #print np.correlate(data,data,'full')[data.size-1] #print np.convolve(data,data[::-1].conj(), 'valid') phi = np.dot(sp.linalg.inv(sp.linalg.toeplitz(r[:-1])), -r[1:]) lpfilt = np.concatenate(([1.], phi)) data = sg.lfilter(lpfilt, 1, data) #Remove filter transient data = data[coeffs+1:] #Heuristic window to get nice plots base_window_length = int(overlap_fraction*nfft) f, axarr = plot.subplots(2) if decimate_by > 1: data = filterbank.polyphase_single_filter(data, decimate_by, sg.firwin(200, 1./(decimate_by+.25))) window_length = base_window_length/decimate_by else: window_length = base_window_length overlapped = overlap_data_stream(data, chunk=nfft, overlap_percentage=overlap_fraction).T windowed_overlapped = np.apply_along_axis(lambda x: np.hanning(len(x))*x,0,overlapped) raw_spectrogram = np.fft.fftshift(np.fft.fft(windowed_overlapped, n=nfft, axis=0), axes=0) if twosided: spec_dframe = pd.DataFrame(np.abs(raw_spectrogram)) else: spec_dframe = pd.DataFrame(np.abs(raw_spectrogram[:raw_spectrogram.shape[0]/2,:])) fulltitle = "Spectrogram and spectral kurtosis" + (", prewhitened" if whiten else "") + "\n" + info + " $F_s=$" + `44100/decimate_by` + ", $O=$" + `overlap_fraction` + ", $NFFT=$" + `nfft if twosided else nfft/2` + ", $NWND=$" + `base_window_length` f.suptitle(fulltitle) #axarr[0].specgram(data, # NFFT=nfft, # noverlap=int(overlap_fraction*nfft), # cmap=cm.gray, # origin='lower', # interpolation='bicubic', # sides='onesided', # aspect='normal') log_spec = copy.copy(spec_dframe.values.flatten()) log_spec = np.ma.log(log_spec) log_spec = np.reshape(log_spec, spec_dframe.values.shape) lower, upper = get_adjusted_lims(log_spec, num_bins=10000) specax = axarr[0].imshow(log_spec, cmap=cm.gray, vmin=lower, vmax=upper, # cmap=cm.spectral, # cmap=cm.gist_stern, interpolation='bicubic', origin='lower', aspect='normal') xaxislabel="Time (Overlapped Samples)" yaxislabel="Frequency (FFT Bins)" axarr[0].set_xlabel(xaxislabel) axarr[0].set_ylabel(yaxislabel) rolling_kurtosis = pd.rolling_kurt(spec_dframe, window_length, axis=1).fillna() lower,upper = get_adjusted_lims(rolling_kurtosis, num_bins=10000) #Remove 0:nfft*overlap_fraction column values to adjust for plotting offest and get cleaner looking plots #kurtax = axarr[1].imshow(rolling_kurtosis.values[:, int(nfft*overlap_fraction):], kurtax = axarr[1].imshow(rolling_kurtosis, vmin=lower, vmax=upper, cmap=cm.gray, #cmap=cm.spectral, #cmap=cm.gist_stern, interpolation='bicubic', origin='lower', aspect='normal') axarr[1].set_xlabel(xaxislabel) axarr[1].set_ylabel(yaxislabel) speccblabel = "Amplitude (dB)" kurtcblabel = "Unbiased Kurtosis" f.subplots_adjust(right=0.8) speccbax = f.add_axes([.85,.53,.025,.35]) kurtcbax = f.add_axes([.85,.1,.025,.35]) speccb = f.colorbar(specax, cax=speccbax) speccb.set_label(speccblabel) kurtcb = f.colorbar(kurtax, cax=kurtcbax) kurtcb.set_label(kurtcblabel) if save_plot: plot.savefig("".join(fulltitle.split(" ")) + ".png") plot.close() else: plot.show()
def visualize_sequential_relationships(data, time='index', smooth_method=None, window=1, grid_size=4, fig_size=20): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. Parameters ---------- data : array-like Pandas data frame containing the entire data set. time : string, optional, default 'index' Datetime input column to use for visualization. smooth_method : {'mean', 'var', 'skew', 'kurt'}, optional, default None Apply a function to the time series to smooth out variations. window : int, optional, default 1 Size of the moving window used to calculate the smoothing function. grid_size : int, optional, default 4 Number of vertical/horizontal plots to display in a single window. fig_size : int, optional, default 20 Size of the plot. """ # replace NaN values with 0 to prevent exceptions in the lower level API calls data = data.fillna(0) if time is not 'index': data = data.reset_index() data = data.set_index(time) data.index.name = None n_features = len(data.columns) plot_size = grid_size**2 n_plots = n_features / plot_size if n_features % plot_size == 0 else n_features / plot_size + 1 for i in range(n_plots): fig, ax = plt.subplots(grid_size, grid_size, sharex=True, figsize=(fig_size, fig_size / 2)) for j in range(plot_size): index = (i * plot_size) + j if index < n_features: if type(data.iloc[0, index]) is not str: if smooth_method == 'mean': data.iloc[:, index] = pd.rolling_mean( data.iloc[:, index], window) elif smooth_method == 'var': data.iloc[:, index] = pd.rolling_var( data.iloc[:, index], window) elif smooth_method == 'skew': data.iloc[:, index] = pd.rolling_skew( data.iloc[:, index], window) elif smooth_method == 'kurt': data.iloc[:, index] = pd.rolling_kurt( data.iloc[:, index], window) data.iloc[:, index].plot(ax=ax[j / grid_size, j % grid_size], kind='line', legend=False, title=data.columns[index]) fig.tight_layout()
def evaluate(self, table): expr = self.expr val = None if expr is not None: val = expr.evaluate(table) return pd.rolling_kurt(val, self.window)
def get_rolling_kurt(values, window): """Return rolling kurt of given values, using specified window size.""" return pd.rolling_kurt(values.shift(1), window=window)