def get_dstat_snpwindow(af, quadruples, jackknife_window=2000, snp_window=None): # min fraction of snps to report value #(only makes a difference for right-most interval) min_observation_fraction = 0.75 dstats = [] jackknife_window_dstats = [] snp_window_dstats = [] for j, (h1, h2, h3, o) in enumerate(quadruples): dstat = pd.DataFrame(columns=['num', 'denom']) dstat['num'] = ((af[h1] - af[h2]) * (af[h3] - af[o])).dropna() dstat['denom'] = ((af[h1] + af[h2] - 2 * af[h1] * af[h2]) * (af[h3] + af[o] - 2 * af[h3] * af[o])).dropna() # only use informative SNPs dstat = dstat[dstat['denom'] != 0] dstats.append([dstat['num'].sum(), dstat['denom'].sum()]) jackknife_window_sum = pd.rolling_sum(dstat, jackknife_window, min_periods=int( min_observation_fraction * jackknife_window), center=True).iloc[jackknife_window / 2::jackknife_window].dropna() jackknife_window_dstats.append( jackknife_window_sum.reset_index(level=1).values.tolist()) #del jackknife_window_sum #del dstat # gc.collect if snp_window is not None: snp_window_sum = pd.rolling_sum(dstat, snp_window, min_periods=0, # what should this be? center=True).iloc[snp_window / 2::snp_window].dropna() # gc.collect() snp_window_dstats.append( snp_window_sum.reset_index(level=1).values.tolist()) return dstats, jackknife_window_dstats, snp_window_dstats
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4): M = df["Close"].diff(r1 - 1) N = df["Close"].shift(r1 - 1) ROC1 = M / N M = df["Close"].diff(r2 - 1) N = df["Close"].shift(r2 - 1) ROC2 = M / N M = df["Close"].diff(r3 - 1) N = df["Close"].shift(r3 - 1) ROC3 = M / N M = df["Close"].diff(r4 - 1) N = df["Close"].shift(r4 - 1) ROC4 = M / N KST = Series( rolling_sum(ROC1, n1) + rolling_sum(ROC2, n2) * 2 + rolling_sum(ROC3, n3) * 3 + rolling_sum(ROC4, n4) * 4, name="KST_" + str(r1) + "_" + str(r2) + "_" + str(r3) + "_" + str(r4) + "_" + str(n1) + "_" + str(n2) + "_" + str(n3) + "_" + str(n4), ) df = df.join(KST) return df
def mfi(prices, params={"window": 14}): """ 1. Typical Price = (High + Low + Close)/3 2. Raw Money Flow = Typical Price x Volume 3. Money Flow Ratio = (14-period Positive Money Flow)/(14-period Negative Money Flow) 4. Money Flow Index = 100 - 100/(1 + Money Flow Ratio) Parameters ---------- prices: DataFrame Includes the open, close, high, low and volume. params: dict Returns ---------- mfi_val: DataFrame """ window = params["window"] tp = __tp(prices) rmf = tp * prices['Volume'] close = prices["Close"] ret = close - close.shift(1) prmf = rmf.copy() nrmf = rmf.copy() prmf[ret < 0] = 0 nrmf[ret > 0] = 0 mfr = pd.rolling_sum(prmf, window)/pd.rolling_sum(nrmf, window) mfi_val = 100 - 100. / (1 + mfr) return pd.DataFrame(mfi_val.values, index=prices.index, columns=["MFI"])
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4): """ KST Oscillator """ M = df['Close'].diff(r1 - 1) N = df['Close'].shift(r1 - 1) ROC1 = M / N M = df['Close'].diff(r2 - 1) N = df['Close'].shift(r2 - 1) ROC2 = M / N M = df['Close'].diff(r3 - 1) N = df['Close'].shift(r3 - 1) ROC3 = M / N M = df['Close'].diff(r4 - 1) N = df['Close'].shift(r4 - 1) ROC4 = M / N result = pd.Series( pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name='KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4) ) return out(SETTINGS, df, result)
def getDF(): rsite = sys.argv[1] fdate = sys.argv[2] tdate = sys.argv[3] # rsite = '1236' # set arbitrarily for now # fdate = "2015-04-25" # tdate = "2016-04-25" engine = create_engine( 'mysql+pymysql://updews:[email protected]/senslopedb') query = "select * from senslopedb.%s where timestamp between '%s' and '%s'" % ( rsite, fdate, tdate) df = pd.io.sql.read_sql(query, engine) df.columns = ['ts', 'cumm', 'rval'] df = df.set_index(['ts']) df = df["rval"].astype(float) df = df[df >= 0] df = df.resample('15Min', how="sum") dfs = pd.rolling_sum(df, 96, min_periods=1) dfs1 = pd.rolling_sum(df, 288, min_periods=1) dfs = dfs[dfs >= 0] dfs1 = dfs1[dfs1 >= 0] dfa = pd.DataFrame({"rval": df, "cumm": dfs, "hrs72": dfs1}) dfajson = dfa.reset_index().to_json(orient="records", date_format='iso') dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "") print dfajson
def ULTOSC(df,ksgn='close'): ''' def ULTOSC(df,ksgn='close'): UOS,终极指标(Ultimate Oscillator,UOS) 终极指标,由拉瑞·威廉(Larry Williams)所创。他认为现行使用的各种振荡指标,对于周期参数的选择相当敏感。 不同的市况,不同参数设定的振荡指标,产生的结果截然不同。因此,选择最佳的参数组含,成为使用振荡指标之前,最重要的一道手续。 为了将参数周期调和至最佳状况,拉瑞·威廉经过不断测试的结果,先找出三个周期不同的振荡指标,再将这些周期参数,按照反比例的方式,制作成常数因子。 然后,依照加权的方式,将三个周期不同的振荡指标,分别乘以不同比例的常数,加以综合制作成UOS指标。 经过一连串参数顺化的过程后,UOS指标比一般单一参数的振荡指标,更能够顺应各种不同的市况。 【输入】 df, pd.dataframe格式数据源 ksgn,列名,一般是:close收盘价 【输出】 df, pd.dataframe格式数据源, 增加了一栏:uos,输出数据 ''' i = 0 TR_l = [0] BP_l = [0] xnam='uos' while i < len(df) - 1: #df.index[-1]: #TR = max(df.get_value(i + 1, 'high'), df.get_value(i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close')) TR = max(df['high'].iloc[i+1],df[ksgn].iloc[i])-min(df['low'].iloc[i+1],df[ksgn].iloc[i]) TR_l.append(TR) #BP = df.get_value(i + 1, 'close') - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close')) BP =df[ksgn].iloc[i+1]-min(df['low'].iloc[i+1], df[ksgn].iloc[i]) BP_l.append(BP) i = i + 1 UltO = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name =xnam) # 'Ultimate_Osc' #df = df.join(UltO) UltO.index=df.index; df[xnam]=UltO return df
def extract_windows(days): # Add categories to count types of crimes committed in time windows leading to date we're trying to predict for label in ['Violent', 'Severe', 'Minor', 'Petty']: days[label + ' Crimes in Last Week'] = pd.rolling_sum(days[label + ' Crimes'], 7) days[label + ' Crimes in Last Month'] = pd.rolling_sum(days[label + ' Crimes'], 30) # The earliest 30 days in the time series have missing values for their first 30 days. Remove those days. return days[30:]
def CumSum(r,offsetstart,end,tsn, data): ##DESCRIPTION: ##prints timestamp and intsantaneous rainfall ##plots instantaneous rainfall data, 24-hr cumulative and 72-hr rainfall, and half of 2-yr max and 2-yr max rainfall for 10 days ##INPUT: ##r; string; site code ##offsetstart; datetime; starting point of interval with offset to account for moving window operations ##end; datetime; end of interval ##tsn; string; datetime format allowed in savefig ## if r!='lipw': continue rainfall = data rainfall=rainfall[(rainfall.index>=offsetstart)] rainfall=rainfall[(rainfall.index<=end)] rainfall=rainfall.resample('15min',how='sum') #getting the rolling sum for the last24 hours rainfall2=pd.rolling_sum(rainfall,96,min_periods=1) rainfall2=np.round(rainfall2,4) #getting the rolling sum for the last 3 days rainfall3=pd.rolling_sum(rainfall,288,min_periods=1) rainfall3=np.round(rainfall3,4) return rainfall2, rainfall3
def find_capm_gap(df_prices, i_lookback, switch): # df_spread = pd.merge(df_prices, df_prices, left_index=True, right_index=True, how='outer') frames = [df_prices, df_prices] df_spread = pd.concat(frames, keys=ls_symbols) print "in" print "df_spread:::", df_spread df_capm_gap = np.NAN * copy.deepcopy(df_prices) ts_index = df_prices[ls_symbols[-1]] tsu.returnize0(ts_index) for s_symbol in ls_symbols[:len(ls_symbols)-1]: ts_price = df_prices[s_symbol] tsu.returnize0(ts_price) # print "returns", ts_price # print "index", ts_index ts_x_ret = pd.rolling_sum(ts_index, i_lookback) ts_y_ret = pd.rolling_sum(ts_price, i_lookback) beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback) alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback) df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) # print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol] ldt_timestamps = df_capm_gap.index print df_capm_gap for i in range(1, len(ldt_timestamps)): df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]]) print df_spread.ix[[('AMZN',df_prices.index[i])]] return df_capm_gap
def Vortex(df, n): """ Vortex Indicator """ i = 0 TR = [0] while i < len(df) - 1: # df.index[-1]: Range = max( df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min( df.get_value(i + 1, 'Low'), df.get_value(i, 'Close') ) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < len(df) - 1: # df.index[-1]: Range = abs( df.get_value(i + 1, 'High') - df.get_value(i, 'Low') ) - abs( df.get_value(i + 1, 'Low') - df.get_value(i, 'High') ) VM.append(Range) i = i + 1 result = pd.Series( pd.rolling_sum( pd.Series(VM), n ) / pd.rolling_sum(pd.Series(TR), n), name='Vortex_' + str(n) ) return out(SETTINGS, df, result)
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def calculate_signals(self, event): """ create signal events in asof, ticker, exchange, signal format """ if event.type == 'MARKET': for s in self.symbol_list: bars = self.bars.get_latest_bars(s, N = 201) df = pd.DataFrame(bars, columns = self.bars.bar_columns) df['closepctchg'] = df['adjclose'].pct_change(periods=1) df['lowpctchg'] = df['low'].pct_change(periods=1) df['highpctchg'] = df['high'].pct_change(periods=1) df['logreturn'] = np.log(df['adjclose'].pct_change(periods=1)+1) df['ma200'] = pd.rolling_mean(df['adjclose'], 200) df['ma5'] = pd.rolling_mean(df['adjclose'], 5) df['lowpctsign'] = np.sign(df['lowpctchg']) df['highpctsign'] = np.sign(df['highpctchg']) df['closepctsign'] = np.sign(df['closepctchg']) df['sumlowsign'] = pd.rolling_sum(df['lowpctsign'], 3) df['sumhighsign'] = pd.rolling_sum(df['highpctsign'], 3) df['sumclosesign'] = pd.rolling_sum(df['closepctsign'], 3) cv = df.tail(1) #print bars[0][0], bars[0][1], bars[0][2], cv['ma200'], cv['adjclose'], cv['lowpctsign'], cv['highpctsign'], cv['closepctsign'] if bars is not None and len(bars) == 201 and cv['ma200'] < cv['adjclose'] and cv['sumclosesign'] == -3 and cv['closepctchg'] < -0.015: if self.bought[s] == False: print 'SIGNAL: ', bars[-1][0], bars[-1][1], bars[-1][2], bars[-1][7], 'LONG' signal = SignalEvent(bars[-1][1], bars[-1][2], bars[-1][0], bars[-1][7], 'LONG') self.events.put(signal) self.bought[s] = True if self.bought[s] == True and cv['adjclose'] > cv['ma5']: print 'SIGNAL: ', bars[-1][0], bars[-1][1], bars[-1][2], bars[-1][7], 'EXIT' signal = SignalEvent(bars[-1][1], bars[-1][2], bars[-1][0], bars[-1][7], 'EXIT') self.events.put(signal) self.bought[s] = False
def get_estimator(ticker, start, end, window=30, clean=True): prices = data.get_data(ticker, start, end) log_ho = (prices['Adj High'] / prices['Adj Open']).apply(np.log) log_lo = (prices['Adj Low'] / prices['Adj Open']).apply(np.log) log_co = (prices['Adj Close'] / prices['Adj Open']).apply(np.log) log_oc = (prices['Adj Open'] / prices['Adj Close'].shift(1)).apply(np.log) log_oc_sq = log_oc**2 log_cc = (prices['Close'] / prices['Close'].shift(1)).apply(np.log) log_cc_sq = log_cc**2 rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co) close_vol = pandas.rolling_sum(log_cc_sq, window=window) * (1.0 / (window - 1.0)) open_vol = pandas.rolling_sum(log_oc_sq, window=window) * (1.0 / (window - 1.0)) window_rs = pandas.rolling_sum(rs, window=window) * (1.0 / (window - 1.0)) result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * math.sqrt(252) result[:window-1] = np.nan if clean: return result.dropna() else: return result
def rolling_proportion(df, date_col, value_col, value, window=30, dropna=True, label=False, fig=None, ax=None, color=None): """ Interpolated proportion of binary risk factor over time. df = pandas df date_col = name of column containing dates value_col = name of column to be tallied value = value to tally (e.g. 'Male') window = number of days to include. Default is 30. dropna = exclude rows where val is NaN. Default is true. False will include those rows. label = legend label fig, ax = matplotlib objects ----- Returns Series of proportions with date index, fig, and ax. ----- Example: datetime_df.index = df.dates rolling_proportion(datetime_df.sex, 'Male') Note: If you are having trouble, make ensure that your date_col is a datetime. """ df = df[df[date_col].isnull() == False] df.index = df[date_col] if dropna == False: df = df[value_col].fillna(False) else: df = df[df[value_col].isnull() == False] df['matches'] = df[value_col] == value df['matches'] = df['matches'].astype(np.int) df['ones'] = 1 prop = pd.DataFrame() prop['numerator'] = df.matches.groupby(by=df.index).sum() prop['denom'] = df.ones.groupby(by=df.index).sum() prop['proportion'] = pd.rolling_sum(prop.numerator, window, 5)/pd.rolling_sum(prop.denom, window, 5) prop = prop.dropna(how='any') ts = pd.date_range(min(prop.index), max(prop.index)) new_prop = prop['proportion'] new_prop = new_prop.reindex(ts) new_prop = new_prop.fillna(method='pad') if fig is None and ax is None: fig, ax = plt.subplots() if color is None: color = 'b' ax.xaxis_date() new_prop.plot(ax=ax, label=label, color=color) fig.autofmt_xdate() ax.set_ylim(-0.05, 1.05) ax.set_xlabel('') if label != False: ax.legend() return new_prop, fig, ax
def _calculate_cmo_values(returns, periods): only_gains = returns.map(lambda x: 0 if x < 0 else 1) only_losses = returns.map(lambda x: 0 if x >= 0 else 1) sums_of_gains = pd.rolling_sum(only_gains, window=periods) sums_of_losses = pd.rolling_sum(only_losses, window=periods) result = pd.Series(index=sums_of_gains.index) for date, sog in sums_of_gains.items(): result[date] = ((sog - sums_of_losses[date]) / (sog + sums_of_losses[date])) return result
def rsi(data, periods=14): # pdb.set_trace() change = data['Adjusted Close'] - data['Adjusted Close'].shift(1) gains = change.apply(lambda x: 0 if x < 0 else x) losses = change.apply(lambda x: 0 if x >= 0 else -x) f*g = pd.rolling_sum(gains, periods) fal = pd.rolling_sum(losses, periods) ag = (f*g.shift(1) * (periods-1) + gains)/periods al = (fal.shift(1) * (periods-1) + losses)/periods rs = ag / al return 1 - 1/(1+rs)
def SensorPlot(r,offsetstart,end,tsn, data, halfmax, twoyrmax): ##INPUT: ##r; str; site ##offsetstart; datetime; starting point of interval with offset to account for moving window operations ##end; datetime; end of rainfall data ##tsn; str; time format acceptable as file name ##data; dataframe; rainfall data ##halfmax; float; half of 2yr max rainfall, one-day cumulative rainfall threshold ##twoyrmax; float; 2yr max rainfall, three-day cumulative rainfall threshold ##OUTPUT: ##rainfall2, rainfall3; dataframe containing one-day and three-day cumulative rainfall if PrintPlot: plt.xticks(rotation=70, size=5) #getting the rolling sum for the last24 hours rainfall2=pd.rolling_sum(data,48,min_periods=1) rainfall2=np.round(rainfall2,4) #getting the rolling sum for the last 3 days rainfall3=pd.rolling_sum(data,144,min_periods=1) rainfall3=np.round(rainfall3,4) if PrintPlot: #assigning the thresholds to their own columns for plotting sub=base sub['maxhalf'] = halfmax sub['max'] = twoyrmax #assigning df to plot variables (to avoid caveats ? expressed from Spyder) plot1=data.dropna() # instantaneous rainfall data plot2=rainfall2 # 24-hr cumulative rainfall plot3=rainfall3 # 72-hr cumulative rainfall plot4=sub['maxhalf'] # half of 2-yr max rainfall plot5=sub['max'] # 2-yr max rainfall #plots instantaneous rainfall data, 24-hr cumulative rainfall, 72-hr cumulative rainfall, #half of 2-yr max rainfall, 2-yr max rainfall plt.plot(plot1.index,plot1,color='#db4429', label = 'instantaneous rainfall') # instantaneous rainfall data plt.plot(plot2.index,plot2,color='#5ac126', label = '24hr cumulative rainfall') # 24-hr cumulative rainfall plt.plot(plot3.index,plot3,color='#0d90d0', label = '72hr cumulative rainfall') # 72-hr cumulative rainfall plt.plot(plot4.index,plot4,color="#fbb714", label = 'half of 2yr max rainfall') # half of 2-yr max rainfall plt.plot(plot5.index,plot5,color="#963bd6", label = '2yr max rainfall') # 2-yr max rainfall plt.legend(loc='upper left', fontsize = 8) plt.title(r) plt.savefig(RainfallPlotsPath+tsn+"_"+r, dpi=160, facecolor='w', edgecolor='w',orientation='landscape',mode='w') plt.close() return rainfall2, rainfall3
def getrain(rainsite,start,end): raindf = q.GetRawRainData(rainsite, fromTime=start, toTime=end) raindf = raindf.set_index('ts') raindf = raindf.resample('30min',how='sum') raindf['one_d'] = pd.rolling_sum(raindf.rain,48,min_periods=1) raindf['thr_d'] = pd.rolling_sum(raindf.rain,144,min_periods=1) raindf=raindf.reset_index() raindf['gts'] = raindf.ts.apply(datenum) return raindf
def ULTOSC(df): i = 0 TR_l = [0] BP_l = [0] while i < df.index[-1]: TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) TR_l.append(TR) BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) BP_l.append(BP) i = i + 1 UltO = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name = 'Ultimate_Osc') df = df.join(UltO) return df
def prediction_augmented(df_train, col_names, df_day_avg_values, adjacency_list, df_model, prediction_model, window_size=10, do_rounding = False): staircaseA_nodes = ['S42', 'S46'] staircaseB_nodes = ['S34', 'S35'] staircaseC_nodes = ['S52', 'S53'] # Dataframe to store the model prediction df_model_lr = df_model.copy() # Building the moving sum for the features before/after for each neighbor model_curr_before = pd.rolling_sum(df_model.sort(ascending=False), window_size+1) - df_model model_curr_after = pd.rolling_sum(df_model, window_size+1) - df_model model_curr_before = model_curr_before.rename(columns={col:col+'before' for col in col_names}) model_curr_after = model_curr_after.rename(columns={col:col+'after' for col in col_names}) window_features = model_curr_after.join(model_curr_before[[col_+'before' for col_ in col_names]]) for col in col_names: # X will store the features and the outcome Y X = df_train.copy() X = X.rename(columns={col:'Y'}) X = pd.merge(X, df_day_avg_values[[col]], left_on='day_time', right_index=True) X = X.rename(columns={col:col+'avg'}) # Building the neighbors (from adjacency list) with missing values filled as in model neighbors_col = ['S'+str(n) for n in adjacency_list[int(col[1:])]] X = X[['Y']].join(df_model[neighbors_col]) X = X.join(window_features[[col_+'before' for col_ in neighbors_col] + [col_+'after' for col_ in neighbors_col]]) # Removing the first and last element impossible to compute given the window_size X = X.sort()[window_size: - window_size] # augment with staircase info X['sA'] = (col in staircaseA_nodes) * 1. X['sB'] = (col in staircaseB_nodes) * 1. X['sC'] = (col in staircaseC_nodes) * 1. X_train = X[X['Y'] != -1] X_test = X[X['Y'] == -1] test_indices = X[X['Y'] == -1].index col_values = df_model_lr[col] if len(X_test): # Models prediction_model = prediction_model.fit(X_train.drop('Y', axis=1), X_train.Y) col_values.ix[test_indices] = prediction_model.predict(X_test.drop('Y', axis=1)) # Filling the result with the current sensor prediction if do_rounding: df_model_lr[col] = np.round(col_values) else: df_model_lr[col] = col_values return df_model_lr
def VWAP_dataPre(qx,xnam0,ksgn0): ''' vwap 数据预处理函数,vwap策略,成交量加权平均价 Args: qx (zwQuantX): zwQuantX数据包 xnam0 (str):函数标签 ksgn0 (str): 价格列名称,一般是'adj close' ''' zwx.sta_dataPre0xtim(qx,xnam0); # ksgn,qx.priceCalc=ksgn0,ksgn0; #'adj close';'close'; for xcod in zw.stkLibCode: d20=zw.stkLib[xcod]; #---------------dprice,kprice #d20['dprice']=d20['open']*d20['adj close']/d20['close'] d20['dprice']=d20[ksgn] #d20['kprice']=d20['dprice'].shift(-1) #d20['kprice']=d20['dprice'].shift(-1) d20['kprice']=d20['open'].shift(-1) # #d=qx.staVars[0];d20=zwta.MA(d20,d,ksgn); #d=qx.staVars[1];d20=zwta.MA(d20,d,ksgn); # #d20=zwta.MA(d20,qx.staMA_short,'adj close'); #d20=zwta.MA(d20,qx.staMA_long,'adj close'); #ksma='ma_'+str(qx.staMA_long); #d20['ma1n']=d20[ksma].shift(1) #d20['ma1n']=d20[ksma] # #---------------dprice,kprice #d20['dprice']=d20['open']*d20['adj close']/d20['close'] #d20['dprice']=d20['adj close'] #d20['kprice']=d20['dprice'] #vwap,基于成交量的加权平均价 #vwap = (prices * volume).sum(n) / volume.sum(n) #sum函数自动忽略NaN值 #vwapWindowSize,threshold #qx.staVarLst=[15,0.01]# nwin=qx.staVars[0]; d20['vw_sum']=pd.rolling_sum(d20['dprice']*d20['volume'],nwin); d20['vw_vol']=pd.rolling_sum(d20['volume'],nwin); d20['vwap']=d20['vw_sum']/d20['vw_vol'] #--- zw.stkLib[xcod]=d20; if qx.debugMod>0: print(d20.tail()) fss='tmp\\'+qx.prjName+'_'+xcod+'.csv' d20.to_csv(fss)
def tdi(price, window=20, multiple=2): ''' Trend Detection Index ''' price = utils.safe_series(price) mom = price - price.shift(window) mom[np.isnan(mom)] = 0 di = pd.rolling_sum(mom, window) di_abs = di.abs() mom_2n_abs = pd.rolling_sum(mom.abs(), window*multiple) mom_1n_abs = pd.rolling_sum(mom.abs(), window) tdi_ = di_abs - (mom_2n_abs - mom_1n_abs) return pd.DataFrame(dict(tdi=tdi_, di=di), index=price.index)
def ULTOSC(df): """ Ultimate Oscillator """ i = 0 TR_l = [0] BP_l = [0] while i < len(df) - 1: #df.index[-1]: TR = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) TR_l.append(TR) BP = df.get_value(i + 1, 'Close') - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) BP_l.append(BP) i = i + 1 result = pd.Series((4 * pd.rolling_sum(pd.Series(BP_l), 7) / pd.rolling_sum(pd.Series(TR_l), 7)) + (2 * pd.rolling_sum(pd.Series(BP_l), 14) / pd.rolling_sum(pd.Series(TR_l), 14)) + (pd.rolling_sum(pd.Series(BP_l), 28) / pd.rolling_sum(pd.Series(TR_l), 28)), name = 'Ultimate_Osc') return out(SETTINGS, df, result)
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4): M = df['close'].diff(r1 - 1) N = df['close'].shift(r1 - 1) ROC1 = M / N M = df['close'].diff(r2 - 1) N = df['close'].shift(r2 - 1) ROC2 = M / N M = df['close'].diff(r3 - 1) N = df['close'].shift(r3 - 1) ROC3 = M / N M = df['close'].diff(r4 - 1) N = df['close'].shift(r4 - 1) ROC4 = M / N KST = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name = 'KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4)) return KST
def mfi(hlc, volume, window=14): '''MFI''' high, low, close = utils.safe_hlc(hlc) volume = utils.safe_series(volume) / 1000 price = (high+low+close) * 1.0 / 3 mf = price * volume pmf = (mf > mf.shift(1)).astype(int) * mf nmf = (mf < mf.shift(1)).astype(int) * mf mr = pd.rolling_sum(pmf, window) / pd.rolling_sum(nmf, window) rval = 100 - (100/(1 + mr)) utils.safe_name(rval, name='MFI') rval.index = hlc.index return rval
def collection_freq(breath_df, win): print(breath_df.columns) for ds_type in ['ds', 'pl', 'pvt', 'ie']: breath_df['{0}_rolling'.format(ds_type)] = pd.rolling_sum(breath_df['analysis.' + ds_type], window = 60 * win, center = True, min_periods = 1) breath_df[ds_type + '_tot_rolling'] = pd.rolling_count(breath_df['analysis.' + ds_type], window = 60 * win, center = True) breath_df[ds_type + '_freq'] = breath_df[ds_type + '_rolling'] / breath_df[ds_type + '_tot_rolling'] # add rolling average for Fio2, PEEP, p_mean try: breath_df['peep_rolling'] = pd.rolling_mean(breath_df['vent_settings.PEEP'], window = 60 * win, center = True, min_periods = 1) except KeyError: pass try: breath_df['p_mean_rolling'] = pd.rolling_mean(breath_df['vent_settings.p_mean'], window = 60 * win, center = True, min_periods = 1) except KeyError: pass try: breath_df['fio2_rolling'] = pd.rolling_mean(breath_df['vent_settings.FiO2'], window = 60 * win, center = True, min_periods = 1) except KeyError: pass return breath_df
def calculateRewards(data, n): logging.info('Rewards: calculating {0}...'.format(n)) # get tick changes diffs = data['close'].diff(1) # print 'DIFFS' # print diffs # get rolling sum sums = pd.rolling_sum(diffs, n) # print 'SUMS' # print sums # shift rewards = sums.shift(-n) # print 'SHIFTS' # print rewards # label data rewards[rewards >= 0] = 'bull' rewards[rewards < 0] = 'bear' # print rewards logging.info('Rewards: calculated') return rewards
def Vortex(df, n): i = 0 TR = [0] while i < df.index[-1]: Range = max(df.get_value(i + 1, 'High'), df.get_value(i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < df.index[-1]: Range = abs(df.get_value(i + 1, 'High') - df.get_value(i, 'Low')) - abs(df.get_value(i + 1, 'Low') - df.get_value(i, 'High')) VM.append(Range) i = i + 1 VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name = 'Vortex_' + str(n)) df = df.join(VI) return df
def aggregate_per_week(self, daily_values, last_week_ending, weeks): """ Aggregates daily values into weekly values. Args: daily_values: Pandas Series of daily values, indexed by date. All dates are assumed to be contiguous, though their values may be NaN. Dates do not have to cover the periods being sampled. last_week_ending: last day of last week. weeks: number of weeks to sample (including the last day) Returns: Pandas Series with weekly values, indexed by date of last day of week. Any day with NaN will result in the corresponding week also being NaN. As a consequence, any week requested that is not completely covered by the input daily_values will be NaN. """ # For each date in daily input, find sum of day's value with the previous # six days. week_window = pandas.rolling_sum(daily_values, window=7) # Pull out the requested end-of-week days. If requested week dates are # not in the range of the daily input, NaN values are returned. days = [last_week_ending - timedelta(i * 7) for i in reversed(xrange(weeks))] return week_window.loc[days]
def downloadRainfallNOAH(rsite, fdate, tdate): url = "http://weather.asti.dost.gov.ph/home/index.php/api/data/%s/from/%s/to/%s" % (rsite,fdate,tdate) r = requests.get(url) try: df = pd.DataFrame(r.json()["data"]) except TypeError: print " No device with id of %s" % rsite return pd.DataFrame() try: df = df.set_index(['dateTimeRead']) df.index = pd.to_datetime(df.index) df = df["rain_value"].astype(float) df = df.resample('15Min').fillna(0.00) dfs = pd.rolling_sum(df,96) dfa = pd.DataFrame({"rval":df,"cumm":dfs}) dfa = dfa.fillna(0) dfa = dfa[96:] #rename the "index" into "timestamp" dfa.index.names = ["timestamp"] return dfa except: return pd.DataFrame()
def vortex(df, n): i = 0 tr = [0] while i < df.index[-1]: Range = max(df.get_value(i + 1, 'High'), df.get_value( i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) tr.append(Range) i = i + 1 i = 0 vm = [0] while i < df.index[-1]: Range = abs(df.get_value(i + 1, 'High') - df.get_value(i, 'Low')) - abs( df.get_value(i + 1, 'Low') - df.get_value(i, 'High')) vm.append(Range) i = i + 1 vi = pd.Series(pd.rolling_sum(pd.Series(vm), n) / pd.rolling_sum(pd.Series(tr), n), name='Vortex_' + str(n)) df = df.join(vi) return df
def efficiency_ratio(self, periods=[30], sample_size=500): for period in periods: label = 'er' + str(period) for symbol, frame in self.series_set.items(): abs_daily_change = abs(frame.c - frame.shift(1).c) abs_period_change = abs(frame.c - frame.shift(period).c) sum_change = pd.rolling_sum(abs_daily_change, period) frame[label] = abs_period_change / sum_change frame['m' + label] = pd.rolling_mean(frame[label], sample_size) std = pd.rolling_std(frame[label], sample_size) frame['erz' + str(period)] = (frame[label] - frame['m' + label]) / std return self
def Vortex(df, n): i = 0 TR = [0] while i < df.index[-1]: Range = max(df.get_value(i + 1, 'High'), df.get_value( i, 'Close')) - min(df.get_value(i + 1, 'Low'), df.get_value(i, 'Close')) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < df.index[-1]: Range = abs(df.get_value(i + 1, 'High') - df.get_value(i, 'Low')) - abs( df.get_value(i + 1, 'Low') - df.get_value(i, 'High')) VM.append(Range) i = i + 1 VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name='Vortex_' + str(n)) df = df.join(VI) return df
def mass_index(df): """Calculate the Mass Index for given data. :param df: pandas.DataFrame :return: pandas.DataFrame """ Range = df['High'] - df['Low'] EX1 = pd.ewma(Range, span = 9, min_periods = 8) EX2 = pd.ewma(EX1, span = 9, min_periods = 8) Mass = EX1 / EX2 MassI = pd.Series(pd.rolling_sum(Mass, 25), name = 'Mass Index') df = df.join(MassI) return df
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4): """ KST Oscillator """ M = df['close'].diff(r1 - 1) N = df['close'].shift(r1 - 1) ROC1 = M / N M = df['close'].diff(r2 - 1) N = df['close'].shift(r2 - 1) ROC2 = M / N M = df['close'].diff(r3 - 1) N = df['close'].shift(r3 - 1) ROC3 = M / N M = df['close'].diff(r4 - 1) N = df['close'].shift(r4 - 1) ROC4 = M / N result = pd.Series( pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name='KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4)) return out(SETTINGS, df, result)
def getDF(): rsite = sys.argv[1] fdate = sys.argv[2] tdate = sys.argv[3] engine = create_engine( 'mysql+pymysql://updews:[email protected]/senslopedb') query = "select timestamp, rain from senslopedb.%s where timestamp between '%s' and '%s'" % ( rsite, fdate, tdate) df = pd.io.sql.read_sql(query, engine) df.columns = ['ts', 'rain'] df = df.set_index(['ts']) df = df["rain"].astype(float) df = df.resample('15Min', how="sum") dfs = pd.rolling_sum(df, 96, min_periods=1) dfs1 = pd.rolling_sum(df, 288, min_periods=1) dfs = dfs[dfs >= 0] dfs1 = dfs1[dfs1 >= 0] dfa = pd.DataFrame({"rval": df, "hrs24": dfs, "hrs72": dfs1}) dfajson = dfa.reset_index().to_json(orient="records", date_format='iso') dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "") print dfajson
def getDF(): rsite = sys.argv[1] fdate = sys.argv[2].replace("%20", " ") tdate = sys.argv[3].replace("%20", " ") # rsite = "1069" # fdate = "2014-04-25" # tdate = "2017-04-25" engine = create_engine( 'mysql+pymysql://updews:[email protected]/senslopedb') query = "select timestamp, rval from senslopedb.rain_noah_%s " % rsite query += "where timestamp between '%s' and '%s'" % (pd.to_datetime(fdate) - td(3), tdate) df = pd.io.sql.read_sql(query, engine) df.columns = ['ts', 'rain'] df = df[df.rain >= 0] df = df.set_index(['ts']) df = df.resample('30Min').sum() df_inst = df.resample('30Min').sum() if max(df_inst.index) < pd.to_datetime(tdate): new_data = pd.DataFrame({'ts': [pd.to_datetime(tdate)], 'rain': [0]}) new_data = new_data.set_index(['ts']) df = df.append(new_data) df = df.resample('30Min').sum() df1 = pd.rolling_sum(df, 48, min_periods=1) df3 = pd.rolling_sum(df, 144, min_periods=1) df['rval'] = df_inst df['hrs24'] = df1 df['hrs72'] = df3 df = df[(df.index >= fdate) & (df.index <= tdate)] dfajson = df.reset_index().to_json(orient="records", date_format='iso') dfajson = dfajson.replace("T", " ").replace("Z", "").replace(".000", "") print dfajson
def get_df(self, query_obj=None): form_data = self.form_data df = super(NVD3TimeSeriesViz, self).get_df(query_obj) df = df.fillna(0) if form_data.get("granularity") == "all": raise Exception("Pick a time granularity for your time series") df = df.pivot_table( index="timestamp", columns=form_data.get('groupby'), values=form_data.get('metrics')) fm = form_data.get("resample_fillmethod") if not fm: fm = None how = form_data.get("resample_how") rule = form_data.get("resample_rule") if how and rule: df = df.resample(rule, how=how, fill_method=fm) if not fm: df = df.fillna(0) if self.sort_series: dfs = df.sum() dfs.sort(ascending=False) df = df[dfs.index] if form_data.get("contribution"): dft = df.T df = (dft / dft.sum()).T num_period_compare = form_data.get("num_period_compare") if num_period_compare: num_period_compare = int(num_period_compare) df = (df / df.shift(num_period_compare)) - 1 df = df[num_period_compare:] rolling_periods = form_data.get("rolling_periods") rolling_type = form_data.get("rolling_type") if rolling_type in ('mean', 'std', 'sum') and rolling_periods: if rolling_type == 'mean': df = pd.rolling_mean(df, int(rolling_periods), min_periods=0) elif rolling_type == 'std': df = pd.rolling_std(df, int(rolling_periods), min_periods=0) elif rolling_type == 'sum': df = pd.rolling_sum(df, int(rolling_periods), min_periods=0) elif rolling_type == 'cumsum': df = df.cumsum() return df
def VORTEX(df, n): ''' def VORTEX(df, n): 螺旋指标,#Vortex Indicator 参见 http://www.vortexindicator.com/VFX_VORTEX.PDF 【输入】 df, pd.dataframe格式数据源 n,时间长度 【输出】 df, pd.dataframe格式数据源, 增加了一栏:vortex__{n},输出数据 ''' xnam='vortex_{n}'.format(n=n) i = 0 TR = [0] while i < len(df) - 1: # df.index[-1]: #Range = max(df.get_value(i + 1, 'high'), df.get_value(i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close')) Range=max(df['high'].iloc[i+1],df['close'].iloc[i])-min(df['low'].iloc[i+1],df['close'].iloc[i]) #TR = max(df['High'].iloc[i + 1], df['Close'].iloc[i] - min(df['Low'].iloc[i + 1], df['Close'].iloc[i])) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < len(df) - 1: # df.index[-1]: #Range = abs(df.get_value(i + 1, 'high') - df.get_value(i, 'low')) - abs(df.get_value(i + 1, 'low') - df.get_value(i, 'high')) Range=abs(df['high'].iloc[i+1]-df['low'].iloc[i])-abs(df['low'].iloc[i+1]-df['high'].iloc[i]) VM.append(Range) i = i + 1 ds = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name=xnam) #df = df.join(ds) ds.index=df.index; df[xnam]=ds return df
def count_by_loc_time(data): temp_time = data.groupby(['zipcode', 'time', 'year_month']).agg({'vcrime': 'sum'}).reset_index() temp_time['order_within_group'] = temp_time.groupby('zipcode').cumcount() temp_time['count_1m_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime'] .apply(lambda x: pd.rolling_sum(x, window=1, min_periods=0) .shift() .fillna(0))) # counting the robberies in the previous 6 months for each zipcode and time range temp_time['count_6m_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime'] .apply(lambda x: pd.rolling_sum(x, window=6, min_periods=0) .shift() .fillna(0))) temp_time['count_2y_loc_time'] = (temp_time.groupby(['zipcode', 'time'])['vcrime'] .apply(lambda x: pd.rolling_sum(x, window=24, min_periods=0) .shift() .fillna(0))) # droping columns temp_time = temp_time.drop(['vcrime', 'order_within_group'], axis=1) return temp_time
def getRSI(close): ''' calculate RSI value :param DataFrame close: close price :return: DataFrame RSI: RSI value ''' n = 3 # calculate increment of close price of two succeeding days close_increment = close.diff() close_increment.dropna(inplace=True) close_increment.index = range(close_increment.shape[0]) close_pos = close_increment.copy() close_pos[close_pos < 0] = 0 close_abs = np.abs(close_increment) sum_pos = pd.rolling_sum(close_pos, n) sum_pos.dropna(inplace=True) sum_pos.index = range(sum_pos.shape[0]) sum_abs = pd.rolling_sum(close_abs, n) sum_abs.dropna(inplace=True) sum_abs.index = range(sum_abs.shape[0]) RSI = sum_pos / sum_abs RSI.replace([np.nan, np.inf, -np.inf], 0, inplace=True) return RSI
def calc_log_vols(ser): stk = ser.values daily = stk[-period:] print daily daily = np.std(daily, ddof=1) * np.sqrt(252.0) weekly = pd.rolling_sum(stk[-(period+5):], 5) #gets the rolling sum for every 5 days print weekly weekly = weekly[-period:] #gets that number of days back weekly = weekly[4::5] #starting from the print weekly weekly = np.std(weekly, ddof=1) * np.sqrt(52) return pd.Series([daily, weekly], index=['daily', 'weekly'])
def KAMA(df, n): """ Kaufman Moving Average """ def minusone(x): df = pd.DataFrame(x) t = df[-1] - df[-2] return t PP = (df['High'] + df['Low']) / 2 Directions = pd.Series.abs(PP[-1] - PP[-1 - n]) Volatility = pd.Series( pd.rolling_sum( pd.Series.abs(pd.rolling_apply(PP, 1, lambda x: minusone(x))), n))
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def estimateVolatility(ohlc, N=10, algo='YangZhang'): """ Volatility estimation Possible algorithms: ['YangZhang', 'CC'] """ cc = np.log(ohlc.close / ohlc.close.shift(1)) if algo == 'YangZhang': # Yang-zhang volatility ho = np.log(ohlc.high / ohlc.open) lo = np.log(ohlc.low / ohlc.open) co = np.log(ohlc.close / ohlc.open) oc = np.log(ohlc.open / ohlc.close.shift(1)) oc_sq = oc**2 cc_sq = cc**2 rs = ho * (ho - co) + lo * (lo - co) close_vol = pd.rolling_sum(cc_sq, window=N) * (1.0 / (N - 1.0)) open_vol = pd.rolling_sum(oc_sq, window=N) * (1.0 / (N - 1.0)) window_rs = pd.rolling_sum(rs, window=N) * (1.0 / (N - 1.0)) result = (open_vol + 0.164333 * close_vol + 0.835667 * window_rs).apply(np.sqrt) * np.sqrt(252) result[:N - 1] = np.nan elif algo == 'CC': # standard close-close estimator result = np.sqrt(252) * np.sqrt(((pd.rolling_sum(cc**2, N)) / N)) else: raise ValueError('Unknown algo type.') return result * 100
def KST(df, r1, r2, r3, r4, n1, n2, n3, n4,ksgn='close'): ''' def KST(df, r1, r2, r3, r4, n1, n2, n3, n4,ksgn='close'): #KST Oscillator 确然指标(KST)又称为完定指标,该指标参考长、中、短期的变速率ROC,以了解不同时间循环对市场的影响。 该指标将数个周期的价格变动率函数作加权以及再平滑绘制长短曲线,其特色在通过修正的价格变动组合来判断趋势,精准掌握转折买卖点。 tst: (r1, r2, r3, r4, n1, n2, n3, n4) = (1, 2, 3, 4, 6, 7, 9, 9) ''' ''' 【输入】 df, pd.dataframe格式数据源 r1..r4,n1..n4,时间长度 ksgn,列名,一般是:close收盘价 【输出】 df, pd.dataframe格式数据源, 增加了一栏:ksf,输出数据 ''' xnam='kst'; M = df[ksgn].diff(r1 - 1) N = df[ksgn].shift(r1 - 1) ROC1 = M / N M = df[ksgn].diff(r2 - 1) N = df[ksgn].shift(r2 - 1) ROC2 = M / N M = df[ksgn].diff(r3 - 1) N = df[ksgn].shift(r3 - 1) ROC3 = M / N M = df[ksgn].diff(r4 - 1) N = df[ksgn].shift(r4 - 1) ROC4 = M / N #'KST_' + str(r1) + '_' + str(r2) + '_' + str(r3) + '_' + str(r4) + '_' + str(n1) + '_' + str(n2) + '_' + str(n3) + '_' + str(n4) KST = pd.Series(pd.rolling_sum(ROC1, n1) + pd.rolling_sum(ROC2, n2) * 2 + pd.rolling_sum(ROC3, n3) * 3 + pd.rolling_sum(ROC4, n4) * 4, name = xnam) df = df.join(KST) return df
def CCI(data, n=20, m=7): data['cci'] = ta.CCI(np.array(data.high), np.array(data.low), np.array(data.close), n) signal = pd.DataFrame(index=data.index) #strategy 1 """ 当 CCI 上穿100,买入,信号为1 当CCI 下穿-100,卖空,信号为-1 参数为20 """ signal['1'] = ((data['cci'] > 100) & (data['cci'].shift(1) < 100)) * 1 + ( (data['cci'] < -100) & (data['cci'].shift(1) > -100)) * (-1) signal['1'] = signal['1'][signal['1'].isin([1, -1])].reindex(data.index, method='ffill') #strategy 2 """ CCI 指标上穿100 买入,信号为1 当CCI 指标回到100,并距离前次上穿100 在m 天之内,我们卖出,信号为-1 否则信号不变,直到下穿-100 才卖出 下穿-100 情况同上。 测得最优参数为n=20,m=8 """ signal['2'] = ((data['cci'] > 100) & (data['cci'].shift(1) < 100)) * 1 + ( (data['cci'] < -100) & (data['cci'].shift(1) > -100)) * (-1) signal['2'] = signal['2'] + ( ((data['cci'] < 100) & (data['cci'].shift(1) > 100)) & (pd.rolling_sum(signal['2'], m) > 0)) * (-1) + ( ((data['cci'] > -100) & (data['cci'].shift(1) < -100)) & (pd.rolling_sum(signal['2'], m) < 0)) * 1 signal['2'] = signal['2'][signal['2'].isin([1, -1])].reindex(data.index, method='ffill') signal = signal.fillna(0) return signal
def stop(trip_data): """ Алгоритм выделения остановок """ df = trip_data.copy() n = 10 df['temp'] = df.apply(lambda x: 1 if x.v < 0.5 else 0, axis=1) df['flag'] = pd.rolling_sum(df.temp, n) \ .apply(lambda x: 1 if x == n else 0) \ .shift(-n/2+1) return np.where(df.flag > 0, 1, 0)
def money_flow_index(df, col_volume='Volume_BTC', n=14): """ Money Flow Index http://stockcharts.com/school/doku.php?id=chart_school:technical_indicators:money_flow_index_mfi https://en.wikipedia.org/wiki/Money_flow_index """ # 1 typical price tp = (df['High'] + df['Low'] + df['Close']) / 3.0 # 2 money flow mf = tp * df[col_volume] # 3 positive and negative money flow with n periods df['1_Period_Positive_Money_Flow'] = 0.0 df.loc[df['Up_or_Down'] == 1, '1_Period_Positive_Money_Flow'] = mf df['1_Period_Negative_Money_Flow'] = 0.0 df.loc[df['Up_or_Down'] == 2, '1_Period_Negative_Money_Flow'] = mf n_positive_mf = pd.rolling_sum(df['1_Period_Positive_Money_Flow'], n) n_negative_mf = pd.rolling_sum(df['1_Period_Negative_Money_Flow'], n) # 4 money flow index mr = n_positive_mf / n_negative_mf # delete intermediate columns df.drop('1_Period_Positive_Money_Flow', axis=1, inplace=True) df.drop('1_Period_Negative_Money_Flow', axis=1, inplace=True) return (100 - (100 / (1 + mr)))
def boll(self, start, stop): close = [] for i in range(start, stop): close.append(self.stocks[i].close) closepd = pandas.Series(close) ma = pandas.rolling_sum(closepd, 20) / 20 md = pandas.rolling_std(closepd, 20) up = ma + 2 * md dn = ma - 2 * md for i in range(start, stop): self.stocks[i].boll = ma[i - start] self.stocks[i].up = up[i - start] self.stocks[i].dn = dn[i - start]
def vortex_indicator(df, n): """Calculate the Vortex Indicator for given data. Vortex Indicator described here: http://www.vortexindicator.com/VFX_VORTEX.PDF :param df: pandas.DataFrame :param n: :return: pandas.DataFrame """ i = 0 TR = [0] while i < df.index[-1]: Range = max(df.at[i + 1, 'High'], df.at[i, 'Close']) - min(df.at[i + 1, 'Low'], df.at[i, 'Close']) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < df.index[-1]: Range = abs(df.at[i + 1, 'High'] - df.at[i, 'Low']) - abs(df.at[i + 1, 'Low'] - df.at[i, 'High']) VM.append(Range) i = i + 1 VI = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name = 'Vortex_' + str(n)) df = df.join(VI) return df
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def rolling_sum(self, data_frame, periods): """Calculates the rolling sum Parameters ---------- data_frame : DataFrame contains time series periods : int period for rolling sum Returns ------- DataFrame """ return pandas.rolling_sum(data_frame, periods)
def Vortex(df, n): """ Vortex Indicator """ i = 0 TR = [0] while i < len(df) - 1: # df.index[-1]: Range = max(df.get_value(i + 1, 'high'), df.get_value( i, 'close')) - min(df.get_value(i + 1, 'low'), df.get_value(i, 'close')) TR.append(Range) i = i + 1 i = 0 VM = [0] while i < len(df) - 1: # df.index[-1]: Range = abs(df.get_value(i + 1, 'high') - df.get_value(i, 'low')) - abs( df.get_value(i + 1, 'low') - df.get_value(i, 'high')) VM.append(Range) i = i + 1 result = pd.Series(pd.rolling_sum(pd.Series(VM), n) / pd.rolling_sum(pd.Series(TR), n), name='Vortex_' + str(n)) return out(SETTINGS, df, result)
def ULTOSC(df): TR_l = TR(df) BP_l = df['close'] - pd.concat([df['low'], df['close'].shift(1)], axis=1).min(axis=1) UltO = pd.Series( (4 * pd.rolling_sum(BP_l, 7) / pd.rolling_sum(TR_l, 7)) + (2 * pd.rolling_sum(BP_l, 14) / pd.rolling_sum(TR_l, 14)) + (pd.rolling_sum(BP_l, 28) / pd.rolling_sum(TR_l, 28)), name='Ultimate_Osc') return UltO
def net_matrix_build(data, station_data): '''This function adds the information about the total bikes at station to our net matrix, which currently only contains small integers that represent the net change in bikes over each hour.''' for column in data.columns: try: #we add the values from the Available Docks Guess to #the first row of our net change matrix (which represents #the first hour of July 1, 2013) for i in range(793): data.ix[24*i, column] += station_data.ix[int(column), 'Available Docks Guess'] except: continue #finally, we compute a rolling sum over the columns of the net change matrix data = pd.rolling_sum(data.fillna(False), window = 24, min_periods = 1) return data
def load_we_dist(self): with h5py.File('../we/pdist.h5', 'r') as f: p = f['histograms'][:] b = f['midpoints_0'][:] pcum = np.zeros_like(p) pcum = pd.rolling_sum(p, window=40, min_periods=0, axis=0) #pcum[:50,:] = pd.rolling_sum(p[:50,:], window=10, min_periods=0, axis=0) #pcum[50:150,:] = pd.rolling_sum(p[50:150,:], window=25, min_periods=0, axis=0) #pcum[150:,:] = pd.rolling_sum(p[150:,:], window=100, min_periods=0, axis=0) #pcum = np.cumsum(p, axis=0) pcum /= pcum.sum(axis=1)[:, np.newaxis] return pcum, b
def _roll_std(self, sample): calculator = lambda x: (x['rate_square'] - x['rate'] * x['rate'] / x['trade_days']) \ / (x['trade_days'] - (x['trade_days'] > 1)) ts = (lambda x: pd.DataFrame( dict(rate=x['rate'], rate_square=(x['rate'] * x['rate']), trade_days=x['trade_days'])).resample('MS', how='sum') )(sample) result = DataFrameExtended([], index=ts.index.rename('time')) for key, value in self._column_names['M'].items(): # XXX 开根号运算会将精度缩小一半,必须在此之前就处理先前浮点运算带来的浮点误差 result[value[0]] = _deal_float_error( pd.rolling_sum(ts, key).apply(calculator, axis=1))**0.5 result.total = (lambda x: int(abs(x) > FLOAT_ERR) * x)(calculator( ts.sum()))**0.5 return _deal_float_error(result)
def get_last_changepoint(resid, max_days_over=5, plot=False): # Poor man's changepoint detection m_rngs = np.abs(np.diff(resid)) ucl = 3.267 * np.nanmean(m_rngs) if plot: plt.plot(m_rngs) plt.axhline(y=ucl) days_over = np.nan_to_num(pd.rolling_sum(m_rngs > ucl, 30)) max_arg_over = 0 args_where_over = np.argwhere(days_over > max_days_over) if len(args_where_over) > 0: max_arg_over = np.max(args_where_over) + 1 return max_arg_over