def turb_depth(panel1,panel2,sep1,sep2,probet,adcpt): a = panel1.minor_axis[:] probeturbint = [] adcpturbint = [] for i,j in enumerate(a): print j height1 = panel1.minor_xs(j) height2 = panel2.minor_xs(j) mean1 = pd.rolling_mean(height1,sep1) mean2 = pd.rolling_mean(height2,sep2) var1 = pd.rolling_var(height1,sep1) var2 = pd.rolling_var(height2,sep2) var1t = var1.apply(variance,axis=1) var2t = var2.apply(variance,axis=1) mean1t = mean1.apply(mean_vel,axis=1) mean2t = mean2.apply(mean_vel,axis=1) t_int1 = var1t/mean1t t_int2 = var2t/mean2t ptime = t_int1[probet] atime = t_int2[adcpt] print ptime print atime probeturbint.append(ptime) adcpturbint.append(atime) fig,ax = plt.subplots() ax.plot(probeturbint,a,label='FVCOM') ax.plot(adcpturbint,a,label='ADCP') ax.xaxis.grid() ax.yaxis.grid() ax.set_xlabel('Turbulence Intensity') ax.set_ylabel('Depth (m)') ax.set_title('Turbulence Intensity by Depth') plt.legend() plt.show()
def plot_wind_comp_var(self,data,xaxis): met=data[['wdir','wspd','wvert']] ''' wind components and their variance''' u_comp,v_comp = get_wind_components(met.wspd,met.wdir) w_comp=met.wvert u_var = pd.rolling_var(u_comp,60,center=True) v_var = pd.rolling_var(v_comp,60,center=True) w_var = pd.rolling_var(w_comp,60,center=True) fig,(ax1,ax2,ax3) = plt.subplots(3,sharex=True) fs=14 xpos=0.02 ypos=0.9 ''' u ''' ax1.plot(xaxis, u_comp) add_text_to(ax1,xpos,ypos,'u-comp',fontsize=fs,color='b') add_text_to(ax1,0.95-xpos,ypos,'u-var',fontsize=fs,color='r') add_second_y_in(ax1,u_var,xaxis=xaxis) ''' v ''' ax2.plot(xaxis, v_comp) add_text_to(ax2,xpos,ypos,'v-comp',fontsize=fs,color='b') add_text_to(ax2,0.95-xpos,ypos,'v-var',fontsize=fs,color='r') add_second_y_in(ax2,v_var,xaxis=xaxis) ''' w ''' ax3.plot(xaxis, w_comp) add_text_to(ax3,xpos,ypos,'w-comp',fontsize=fs,color='b') add_text_to(ax3,0.95-xpos,ypos,'w-var',fontsize=fs,color='r') add_second_y_in(ax3,w_var,xaxis=xaxis) fig.subplots_adjust(hspace=0.1) plt.draw()
def buyAll(self, d_bond,d_equity,r_bond,r_equity): ''' #this function solve the weight of spx in a buy all index to optimize a #backward looking sharpe ratio defined by total carry / vol in which #correlation is taken as last 100 days observation # d_equity: %ret of equity # d_bond: %ret of bond # r_equity - spx carry (dividend yield) # r_bond - ty carry (yield) # v_equity - spx variance # v_bond - ty variance # p - spx/ty correlation #result # x_IR - weight for maximizing IR # x_P - weight for minimizing variance assuming -50% constant correlation # x - average of the 2 above ''' t=200 t_s=30 p=pd.rolling_corr(d_equity,d_bond,t) p=pd.ewma(p,halflife=t_s) p2 = pd.Series(-0.5, index=p.index) v_equity=pd.rolling_var(d_equity,t) v_bond=pd.rolling_var(d_bond,t) m=len(p) x_IR=p.copy() x_P=x_IR.copy() for i in range(0,m): f = lambda x, : -(x*r_equity[i]+(1-x)*r_bond[i])/np.sqrt((x**2*v_equity[i]+(1-x)**2*v_bond[i]+2*x*(1-x)*np.sqrt(v_equity[i]*v_bond[i])*p[i])*16) #fitting the data with fmin x0 = 0.2 # initial parameter value x1 = op.fminbound(f, 0.1,0.8,maxfun=100) x_IR[i]=x1 #portfolio optimisation assuming a constant correlation of -50% f = lambda x, : -(x*r_equity[i]+(1-x)*r_bond[i])/np.sqrt((x**2*v_equity[i]+(1-x)**2*v_bond[i]+2*x*(1-x)*np.sqrt(v_equity[i]*v_bond[i])*p2[i])*16) # fitting the data with fmin x0 = 0.2 # initial parameter value x2 = op.fminbound(f, 0.1,0.8,maxfun=100) x_P[i]=x2 w=(x_P+x_IR)/2 return w
def visualize_sequential_relationships(training_data, plot_size, smooth=None, window=1): """ Generates line plots to visualize sequential data. Assumes the data frame index is time series. """ training_data.index.name = None num_features = plot_size if plot_size < len(training_data.columns) else len(training_data.columns) num_plots = num_features / 16 if num_features % 16 == 0 else num_features / 16 + 1 for i in range(num_plots): fig, ax = plt.subplots(4, 4, sharex=True, figsize=(20, 10)) for j in range(16): index = (i * 16) + j if index < num_features: if index != 3: # this column is all 0s in the bike set if smooth == 'mean': training_data.iloc[:, index] = pd.rolling_mean(training_data.iloc[:, index], window) elif smooth == 'var': training_data.iloc[:, index] = pd.rolling_var(training_data.iloc[:, index], window) elif smooth == 'skew': training_data.iloc[:, index] = pd.rolling_skew(training_data.iloc[:, index], window) elif smooth == 'kurt': training_data.iloc[:, index] = pd.rolling_kurt(training_data.iloc[:, index], window) training_data.iloc[:, index].plot(ax=ax[j / 4, j % 4], kind='line', legend=False, title=training_data.columns[index]) fig.tight_layout()
def find_capm_gap(df_prices, i_lookback, switch): # df_spread = pd.merge(df_prices, df_prices, left_index=True, right_index=True, how='outer') frames = [df_prices, df_prices] df_spread = pd.concat(frames, keys=ls_symbols) print "in" print "df_spread:::", df_spread df_capm_gap = np.NAN * copy.deepcopy(df_prices) ts_index = df_prices[ls_symbols[-1]] tsu.returnize0(ts_index) for s_symbol in ls_symbols[:len(ls_symbols)-1]: ts_price = df_prices[s_symbol] tsu.returnize0(ts_price) # print "returns", ts_price # print "index", ts_index ts_x_ret = pd.rolling_sum(ts_index, i_lookback) ts_y_ret = pd.rolling_sum(ts_price, i_lookback) beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback) alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback) df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) # print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol] ldt_timestamps = df_capm_gap.index print df_capm_gap for i in range(1, len(ldt_timestamps)): df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]]) print df_spread.ix[[('AMZN',df_prices.index[i])]] return df_capm_gap
def test_rollingvar_ucc_1d(self): op = np.random.rand(512) ww = 128 des = npp.variance_ucc(op, ww) ret = np.vectorize(log)(op[1:] / op[:-1]) diff = des - ps.rolling_var(ps.Series(ret), ww).values[ww - 1:] assert(abs(diff).sum() / op.shape[0] < EPSILON)
def rolling_functions_tests(p, d): # Old-fashioned rolling API assert_eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) assert_eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) assert_eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) assert_eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) assert_eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) assert_eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) assert_eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) assert_eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) # see note around test_rolling_dataframe for logic concerning precision assert_eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3), check_less_precise=True) assert_eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3), check_less_precise=True) assert_eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) assert_eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): assert_eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes assert_eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) assert_eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs assert_eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def add_rolling_stats(dataframe, fields, window=15, head=False): # TODO - add keyword args to determine which stats to apply for field in fields: # dataframe['Roll_Profit_Count'] = pd.rolling_apply(dataframe[field], window, rolling_profit_count, 1) dataframe[field + "-Roll_Mean"] = pd.rolling_mean(dataframe[field], window) dataframe[field + "-Roll_std"] = pd.rolling_std(dataframe[field], window) dataframe[field + "-Roll_var"] = pd.rolling_var(dataframe[field], window) if head: # print head if passed value dataframe[["DateTime", "Ticket", "Profit", "Trend", "Roll_Profit_Count", "Roll_Mean", "Roll_std"]].head(head) return dataframe
def prepare(self): """create all features here""" df = self.source.iloc[-3000:].copy() windows = [40, 100, 200, 300, 500, 1000, 2000, 3000] for window in windows: lengthname = str(window) df['mean'+lengthname] = pd.rolling_mean(df['diff'], window) df['var'+lengthname] = pd.rolling_var(df['diff'], window) df = df.dropna() self.data = df
def yz(self): # yang zhang log_oc = (self.data['Open']/self.data['Close'].shift(1)).apply(np.log) log_co = (self.data['Close']/self.data['Open']).apply(np.log) s2rs = self.rs().values**2 s2o = pd.rolling_var(log_oc, self.window) * self.total_days s2c = pd.rolling_var(log_co, self.window) * self.total_days # adjust other vol measures for the lag in open/close vol s2rs[self.window-1] = np.nan s2c[self.window-1] = np.nan k = 0.34/(1. + (self.window + 1.)/(self.window - 1.)) out = (s2o + k * s2c + (1-k)*s2rs).apply(np.sqrt) return out
def calc_vol_ret(data): colname = data.columns[2] # data.dropna() # Create gap here if more than 1 day is required by shifting data data['return'] = data[colname] - data[colname].shift(LAG) # Natural log data['nlog'] = data[colname].apply(lambda f: np.log(f)) # Logged Difference # print(data.head()) data['lreturn'] = data['nlog'] - data['nlog'].shift(LAG) # Variance data['var'] = pd.rolling_var(data[colname], ROLLING_VAR_WINDOW, min_periods=None, freq=None, center=True) # Logged Variance data['logvar'] = pd.rolling_var(data['nlog'], ROLLING_VAR_WINDOW, min_periods=None, freq=None, center=True) if IFACF: acfRes, pacfRes = getacf(data) return acfRes, pacfRes, data else: return data
def plot_tke(self,data,xaxis): met=data[['wdir','wspd','wvert','lats','lons']] topo=np.asarray(Terrain.get_topo(lats=met['lats'], lons=met['lons'])) u_comp,v_comp = get_wind_components(met.wspd,met.wdir) w_comp=met.wvert u_var = pd.rolling_var(u_comp,60,center=True) v_var = pd.rolling_var(v_comp,60,center=True) w_var = pd.rolling_var(w_comp,60,center=True) tke = 0.5*(u_var+v_var+w_var) plt.figure() plt.plot(xaxis,tke) ax=plt.gca() ax.set_ylim([0,10]) plt.xlabel('distance') plt.ylabel('TKE [m2 s-2]') add_second_y_in(ax,topo,xaxis=xaxis,color='g',label='Topography [m]') plt.draw()
def variance(self, window=0, rebalanced=True, from_date=None, to_date=None): ret = None returns = self.returns(rebalanced, from_date, to_date) if window == 0: ret = np.asscalar(np.var(returns)) if window > 0: ret = pd.rolling_var(returns, window) if window == -1: ret = pd.expanding_var(returns) return ret
def read_data(name, df=None): close_index = name+".Close" return_index = name+".Return" beta_index = name+".Beta" temp_df = pd.read_csv(FOLDER_PATH+name+FILE_EXTENSION, delimiter="\t", parse_dates=True, index_col=False ## date_parser=functools.partial(datetime.strptime, format = "%Y/%m/%d") ) temp_df.columns=[DATE_INDEX, name+".Open", name+".High", name+".Low", name+".Close", name+".Volume", name+".Vol", name+".MA1", name+".MA2", name+".MA3", name+".MA4", name+".MA5", name+".MA6"] temp_df = temp_df.drop([ name+".Open", name+".High", name+".Low", name+".MA1", name+".Vol", name+".MA2", name+".MA3", name+".MA4", name+".MA5", name+".MA6"],1) # Rule out invalid data temp_df = temp_df[(temp_df[name+".Volume"]>0)] for rn in range(0, len(temp_df)): temp_df.ix[rn, DATE_INDEX] = pd.to_datetime(temp_df.iloc[rn][DATE_INDEX]) # Calculate daily return get_return = lambda x: x[1]/x[0]-1 temp_df[return_index] = pd.rolling_apply(temp_df[close_index], 2, get_return, min_periods=2) # Calculate beta if not df is None: temp_df = pd.merge(df, temp_df, on=DATE_INDEX, how='outer') temp_df[beta_index] = pd.rolling_cov(temp_df[return_index], temp_df[INDEX_HISTORY_FILE_NAME+".Return"], COV_ROLLING_WINDOW, min_periods=COV_ROLLING_WINDOW)/\ pd.rolling_var(temp_df[INDEX_HISTORY_FILE_NAME+".Return"], COV_ROLLING_WINDOW, min_periods=COV_ROLLING_WINDOW) # Calculate alpha temp_df[name+".Alpha"] = temp_df[return_index] - temp_df[INDEX_HISTORY_FILE_NAME+".Return"]*temp_df[beta_index] return temp_df
def read_data(name): close_index = name+".Close" return_index = name+".Return" beta_index = name+".Beta" temp_df = pd.read_csv(FOLDER_PATH+name+FILE_EXTENSION) # Rule out invalid data temp_df = df[(df[name+".Volume"]>0)] # Calculate daily return get_return = lambda x: x[1]/x[0]-1 temp_df[return_index] = pd.rolling_apply(df[close_index], 2, get_return, min_periods=2) # Calculate beta temp_df[beta_index] = pd.rolling_cov(df[return_index], temp_df[INDEX_HISTORY_FILE_NAME+".Return"], 200, min_periods=200)/\ pd.rolling_var(df[return_index], 200, min_periods=200) # Calculate alpha temp_df[name+".Alpha"] = temp_df[return_index] - temp_df[INDEX_HISTORY_FILE_NAME+".Return"]*temp_df[beta_index] return temp_df
def spread_gap(df_prices, i_lookbak, switch): df_capm_gap = np.NAN * copy.deepcopy(df_prices) ts_index = df_prices[ls_symbols[-1]] tsu.returnize0(ts_index) for s_symbol in ls_symbols[:len(ls_symbols)-1]: ts_price = df_prices[s_symbol] tsu.returnize0(ts_price) print "returns", ts_price print "index", ts_index ts_x_ret = pd.rolling_sum(ts_index, i_lookback) ts_y_ret = pd.rolling_sum(ts_price, i_lookback) beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback) alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback) df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) print "ind", ts_x_ret, "y", ts_y_ret, "a" , alpha, "b", beta, df_capm_gap[s_symbol] ldt_timestamps = df_capm_gap.index for i in range(1, len(ldt_timestamps)): df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]]) return df_capm_gap
def rolling_tests(p, d): eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) mad = lambda x: np.fabs(x - x.mean()).mean() eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) eq(pd.rolling_window(p, 3, 'boxcar'), dd.rolling_window(d, 3, 'boxcar')) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def test_ts_var(self): self.env.add_operator('ts_var', { 'operator': OperatorTSVar, 'arg1': {'value': [3, 5]}, }) string1 = 'ts_var(2, open1)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_var(3, open1)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, 'CNY**2') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 2) df = pd.rolling_var(self.env.get_data_value('open1'), 3).iloc[2:] self.assertTrue( frame_equal( gene2.eval(self.env, date1, self.date2), df) )
def _MPXDS(self, MPcon, reqlen, ssTD, ssFD, Nc, MPconFD): """ Function to preform subspace detection on multiplexed data MPcon is time domain rep of data block, MPconFD is freq. domain, ssTD is time domain rep of subspace, ssFD id freq domain rep, Nc is the number of channels in the multiplexed stream """ n = np.int32(np.shape(ssTD)[1]) # length of each basis vector a = pd.rolling_mean(MPcon, n)[n - 1:] # rolling mean of data block b = pd.rolling_var(MPcon, n)[n - 1:] # rolling var of data block b *= n # rolling power in vector sum_ss = np.sum(ssTD, axis=1) # the sum of all the subspace basis vects ares = a.reshape(1, len(a)) # reshaped a sumres = sum_ss.reshape(len(sum_ss), 1) # reshaped sum av_norm = np.multiply(ares, sumres) # to account for non 0 mean vects m1 = np.multiply(ssFD, MPconFD) # fd correlation with each basis vect # preform inverse fft if1 = scipy.real(scipy.fftpack.ifft(m1))[:, n - 1:len(MPcon)] - av_norm result = np.sum(np.square(if1), axis=0) / b # get detection statistcs return result[::Nc] # account for multiplexing
def rolling_functions_tests(p, d): # Old-fashioned rolling API eq(pd.rolling_count(p, 3), dd.rolling_count(d, 3)) eq(pd.rolling_sum(p, 3), dd.rolling_sum(d, 3)) eq(pd.rolling_mean(p, 3), dd.rolling_mean(d, 3)) eq(pd.rolling_median(p, 3), dd.rolling_median(d, 3)) eq(pd.rolling_min(p, 3), dd.rolling_min(d, 3)) eq(pd.rolling_max(p, 3), dd.rolling_max(d, 3)) eq(pd.rolling_std(p, 3), dd.rolling_std(d, 3)) eq(pd.rolling_var(p, 3), dd.rolling_var(d, 3)) eq(pd.rolling_skew(p, 3), dd.rolling_skew(d, 3)) eq(pd.rolling_kurt(p, 3), dd.rolling_kurt(d, 3)) eq(pd.rolling_quantile(p, 3, 0.5), dd.rolling_quantile(d, 3, 0.5)) eq(pd.rolling_apply(p, 3, mad), dd.rolling_apply(d, 3, mad)) with ignoring(ImportError): eq(pd.rolling_window(p, 3, "boxcar"), dd.rolling_window(d, 3, "boxcar")) # Test with edge-case window sizes eq(pd.rolling_sum(p, 0), dd.rolling_sum(d, 0)) eq(pd.rolling_sum(p, 1), dd.rolling_sum(d, 1)) # Test with kwargs eq(pd.rolling_sum(p, 3, min_periods=3), dd.rolling_sum(d, 3, min_periods=3))
def find_capm_gap(df_prices, sharpe_lookback, switch): # frames = [df_prices, df_prices] # df_spread = pd.concat(frames, keys=ls_symbols) df_capm_gap = np.NAN * copy.deepcopy(df_prices) ts_index = df_prices[ls_symbols[-1]] tsu.returnize0(ts_index) for s_symbol in ls_symbols[:len(ls_symbols)-1]: ts_price = df_prices[s_symbol] tsu.returnize0(ts_price) ts_x_ret = pd.rolling_sum(ts_index, i_lookback) ts_y_ret = pd.rolling_sum(ts_price, i_lookback) beta = (1/pd.rolling_var(ts_index, i_lookback)) * pd.rolling_cov(ts_index, ts_price, i_lookback) alpha = pd.rolling_mean(ts_price, i_lookback) - beta * pd.rolling_mean(ts_index, i_lookback) df_capm_gap[s_symbol] = switch*(ts_y_ret - ts_x_ret)+(1-switch)*(ts_y_ret - alpha - beta * ts_x_ret) ldt_timestamps = df_capm_gap.index for i in range(1 + sharpe_lookback, len(ldt_timestamps)): df_capm_gap.ix[ldt_timestamps[i]]=scipy.stats.stats.rankdata(df_capm_gap.ix[ldt_timestamps[i]]) return df_capm_gap
def rolling_var(self, *args, **kwargs): return MySeries(pd.rolling_var(self.x, *args, **kwargs))
stock_data = pd.read_csv (path) stock_data ['Date'] = pd.to_datetime(stock_data['Date']) stock_data = stock_data.sort_values(by='Date', ascending=True) stock_data = stock_data.set_index('Date') #stock_data['Close'].plot(figsize=(16, 12)) #pylab.show () stock_data['First Difference'] = stock_data['Close'] - stock_data['Close'].shift () #stock_data['First Difference'].plot (figsize= (10,10)) #pylab.show () #Applying log-Transform from numpy 'np' stock_data ['Natural Log'] = stock_data['Close'].apply(lambda x: np.log (x)) #stock_data['Natural Log'].plot (figsize= (16,12)) #pylab.show () stock_data['Original Variance'] = pd.rolling_var(stock_data['Close'], 30, min_periods=None, freq=None, center=True) stock_data['Log Variance'] = pd.rolling_var(stock_data['Natural Log'], 30, min_periods=None, freq=None, center=True) #fig, ax = plt.subplots(2, 1, figsize=(13, 12)) #stock_data['Original Variance'].plot(ax=ax[0], title='Original Variance') #stock_data['Log Variance'].plot(ax=ax[1], title='Log Variance') #fig.tight_layout() #pylab.show () stock_data['Logged First Difference'] = stock_data['Natural Log'] - stock_data['Natural Log'].shift(1) #stock_data['Logged First Difference'].plot(figsize=(16, 12)) #pylab.show () #Trying out different Lagging possibilities. #stock_data['Lag 1'] = stock_data ['Logged First Difference'].shift (1)
def get_vols(ts, window=30): logdiffs = get_diffs(ts) monthly_vol = pd.rolling_var(logdiffs, window) return monthly_vol
count = 0 for profit in dataframe: if profit >= 0: count = count + 1 else: count = count - 1 return count # Add Rolling stats to Order DF window = 15 df_ord['Roll_Profit_Count'] = pd.rolling_apply(df_ord['Profit'], window, rolling_profit_count, 1) df_ord['Roll_Mean'] = pd.rolling_mean(df_ord['Profit'], window) df_ord['Roll_std'] = pd.rolling_std(df_ord['Profit'], window) df_ord['Roll_var'] = pd.rolling_var(df_ord['Profit'], window) # Create Trend Ranges, based on visual inspection of previous graph, and add Trends to Order dataframe trend_range = [0, 6, 33, 60, 77, 86, 150, 171, 207, 222, 271, 314, 331, 348] trend_labels = [ 'UP1', 'FLAT1', 'DOWN1', 'UP2', 'DOWN2', 'FLAT2', 'DOWN3', 'UP3', 'DOWN4', 'UP4', 'DOWN5', 'UP6', 'FLAT3' ] df_ord['Trend'] = pd.cut(df_ord.Ticket, trend_range, labels=trend_labels).astype('category') # Order Dataframe with Trends added df_ord[[ 'DateTime', 'Ticket', 'Profit', 'Trend', 'Roll_Profit_Count', 'Roll_Mean', 'Roll_std'
def rolling_smoother(self, data, stype='rolling_mean', win_size=10, win_type='boxcar', center=False, std=0.1, beta=0.1, power=1, width=1): """ Perform a espanding smooting on the data for a complete help refer to http://pandas.pydata.org/pandas-docs/dev/computation.html :param data: :param stype: :param win_size: :param win_type: :param center: :param std: :param beta: :param power: :param width: :moothing types: ROLLING : rolling_count Number of non-null observations rolling_sum Sum of values rolling_mean Mean of values rolling_median Arithmetic median of values rolling_min Minimum rolling_max Maximum rolling_std Unbiased standard deviation rolling_var Unbiased variance rolling_skew Unbiased skewness (3rd moment) rolling_kurt Unbiased kurtosis (4th moment) rolling_window Moving window function window types: boxcar triang blackman hamming bartlett parzen bohman blackmanharris nuttall barthann kaiser (needs beta) gaussian (needs std) general_gaussian (needs power, width) slepian (needs width) """ if stype == 'count': newy = pd.rolling_count(data, win_size) if stype == 'sum': newy = pd.rolling_sum(data, win_size) if stype == 'mean': newy = pd.rolling_mean(data, win_size) if stype == 'median': newy = pd.rolling_median(data, win_size) if stype == 'min': newy = pd.rolling_min(data, win_size) if stype == 'max': newy = pd.rolling_max(data, win_size) if stype == 'std': newy = pd.rolling_std(data, win_size) if stype == 'var': newy = pd.rolling_var(data, win_size) if stype == 'skew': newy = pd.rolling_skew(data, win_size) if stype == 'kurt': newy = pd.rolling_kurt(data, win_size) if stype == 'window': if win_type == 'kaiser': newy = pd.rolling_window(data, win_size, win_type, center=center, beta=beta) if win_type == 'gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, std=std) if win_type == 'general_gaussian': newy = pd.rolling_window(data, win_size, win_type, center=center, power=power, width=width) else: newy = pd.rolling_window(data, win_size, win_type, center=center) return newy
def VAR(self, param): return pd.rolling_var(param[0], param[1])
def VARP(self, param): return pd.rolling_var(param[0], param[1], ddof=0)
def turbulence_int(panel1,panel2,sep1,sep2,binn,both=False,hr=False): height1 = panel1.minor_xs(binn) height2 = panel2.minor_xs(binn) mean1 = pd.rolling_mean(height1,sep1) mean2 = pd.rolling_mean(height2,sep2) var1 = pd.rolling_var(height1,sep1) var2 = pd.rolling_var(height2,sep2) var1t = var1.apply(variance,axis=1) var2t = var2.apply(variance,axis=1) mean1t = mean1.apply(mean_vel,axis=1) mean2t = mean2.apply(mean_vel,axis=1) t_int1 = var1t/mean1t t_int2 = var2t/mean2t if both == True: fig,ax = plt.subplots() ax1 = ax.twinx() ax.plot_date(t_int2.index.to_datetime(),t_int2,'-',label = 'ADCPs I',color='red') ax.plot_date(t_int1.index.to_datetime(),t_int1,'-',label = 'Probes I') ax1.plot_date(mean1t.index.to_datetime(),mean1t,'--',label = 'Probes Speed') ax1.plot_date(t_int2.index.to_datetime(),mean2t,'--',label = 'ADCPs Speed') ax.set_title('Turbulence Intensity') if hr == True: ax.xaxis.set_minor_locator(dt.HourLocator(byhour=range(24), interval=6)) else: ax.xaxis.set_minor_locator(dt.HourLocator(byhour=range(24), interval=1)) ax.xaxis.set_minor_formatter(dt.DateFormatter('%H:%M\n%a')) ax.xaxis.grid(True, which="minor") ax.yaxis.grid() ax.xaxis.set_major_locator(dt.MonthLocator()) ax.xaxis.set_major_formatter(dt.DateFormatter('\n\n\n%b\n%Y')) plt.tight_layout() ax.set_ylabel('Turbulence Intensity') ax1.set_ylabel('Velocity (m/s)') plt.legend() plt.show() else: fig,ax = plt.subplots() ax1 = ax.twinx() ax.plot_date(t_int1.index.to_datetime(),t_int1,'-',color='red',label = 'Probes I') ax1.plot_date(mean1t.index.to_datetime(),mean1t,'--',label = 'Probes Speed') ax.set_title('Turbulence Intensity \n Calculated from FVCOM Data') if hr == True: ax.xaxis.set_minor_locator(dt.HourLocator(byhour=range(24), interval=6)) else: ax.xaxis.set_minor_locator(dt.HourLocator(byhour=range(24), interval=1)) ax.xaxis.set_minor_formatter(dt.DateFormatter('%H:%M\n%a')) ax.xaxis.grid(True, which="minor") ax.yaxis.grid() ax.xaxis.set_major_locator(dt.MonthLocator()) ax.xaxis.set_major_formatter(dt.DateFormatter('\n\n\n%b\n%Y')) plt.tight_layout() ax.set_ylabel('Turbulence Intensity') ax1.set_ylabel('Velocity (m/s)') plt.legend() plt.show() fig,ax = plt.subplots() ax1 = ax.twinx() ax.plot_date(t_int2.index.to_datetime(),t_int2,'-',color='red',label = 'ADCP I') ax1.plot_date(mean2t.index.to_datetime(),mean2t,'--',label = 'ADCP Speed') ax.set_title('Turbulence Intensity \n Calculated for Measured Data') if hr == True: ax.xaxis.set_minor_locator(dt.HourLocator(byhour=range(24), interval=6)) else: ax.xaxis.set_minor_locator(dt.HourLocator(byhour=range(24), interval=1)) ax.xaxis.set_minor_formatter(dt.DateFormatter('%H:%M\n%a')) ax.xaxis.grid(True, which="minor") ax.yaxis.grid() ax.xaxis.set_major_locator(dt.MonthLocator()) ax.xaxis.set_major_formatter(dt.DateFormatter('\n\n\n%b\n%Y')) plt.tight_layout() ax.set_ylabel('Turbulence Intensity') ax1.set_ylabel('Velocity (m/s)') plt.legend() plt.show()
def volatility(OHLC, n=10, calc="close", N=260, mean0=False): #OHLC <- try.xts(OHLC, error=as.matrix) # Choose an arg name that doesn't clash with ROC's 'type' arg #calc = match.arg(calc, # c("close","garman.klass","parkinson", # "rogers.satchell","gk.yz","yang.zhang")) # s Volatility # N Number of closing prices in a year # n Number of historical prices used for the volatility estimate # ci The closing price on the ith day # ri Log return on the ith day # Historical Close-to-Close Volatility # http://www.sitmo.com/eq/172 if calc=="close" : # Add univariate case from Cedrick Johnson's R-SIG-Finance post r=OHLC['close'].pct_change() if(mean0): # This is an alternative SD calculation using an effective mean of 0 s = np.sqrt(N) * np.sqrt(pd.rolling_sum(r**2, n-1) / (n-2)) else: # This is the standard SD calculation using the sample mean s = np.sqrt(N) * pd.rolling_std(r, n-1) # Historical Open-High-Low-Close Volatility: Garman Klass # http://www.sitmo.com/eq/402 if calc=="garman.klass" : s = np.sqrt( N/n * pd.rolling_sum( 0.5 * np.log(OHLC['high']/OHLC['low'])**2 - (2*np.log(2)-1) * np.log(OHLC['close']/OHLC['open'])**2 , n)) if calc=="parkinson": # Historical High-Low Volatility: Parkinson # http://www.sitmo.com/eq/173 s = np.sqrt( N/(4*n*np.log(2)) * pd.rolling_sum( np.log(OHLC['high']/OHLC['low'])**2, n)) if calc=="rogers.satchell": # Historical Open-High-Low-Close Volatility: Rogers Satchell # http://www.sitmo.com/eq/414 s = np.sqrt( N/n * pd.rolling_sum( np.log(OHLC['high']/OHLC['close']) * np.log(OHLC['high']/OHLC['open']) + np.log(OHLC['low']/OHLC['close']) * np.log(OHLC['low']/OHLC['open']), n ) ) if calc=="gk.yz": Cl1=OHLC['close'].shift(1) # s=np.sqrt(N/n * pd.rolling_sum(np.log(OHLC['open']/Cl1)**2 + 0.5 * np.log(OHLC['high']/OHLC['low'])**2 - (2*np.log(2)-1) * np.log(OHLC['close']/OHLC['open'])**2 , n)) s = np.sqrt(N / n * (( np.log(OHLC['open'] / Cl1) ** 2 + 0.5 * np.log(OHLC['high'] / OHLC['low']) ** 2 - ( 2 * np.log(2) - 1) * np.log(OHLC['close'] / OHLC['open']) ** 2).rolling(window=n,center=False).sum())) #if( calc=="garman.klass.yang.zhang" ) { # Historical Open-High-Low-Close Volatility: Garman and Klass (Yang Zhang) # http://www.sitmo.com/eq/409 # if(is.xts(OHLC)) { # Cl1 <- lag.xts(OHLC[,4]) # } else { # Cl1 <- c( NA, OHLC[-NROW(OHLC),4] ) # } # s <- sqrt( N/n * runSum( # log(OHLC[,1]/Cl1)^2 + # .5 * log(OHLC[,2]/OHLC[,3])^2 - # (2*log(2)-1) * log(OHLC[,4]/OHLC[,1])^2 , n) ) #s <- sqrt( Z/n * runSum( # log(op/cl[-1])^2 + # .5*log(hi/lo)^2 - # (2*log(2)-1)*log(cl/op)^2 ) ) if calc=="yang.zhang": # Historical Open-High-Low-Close Volatility: Yang Zhang # http://www.sitmo.com/eq/417 Cl1=OHLC['close'].shift(1) alpha=1.34 k = (alpha-1)/(alpha+(n+1)/(n-1)) s2o = N * pd.rolling_var(np.log(OHLC['open'] / Cl1), n) s2c = N * pd.rolling_var(np.log(OHLC['close'] / OHLC['open']), n) s2rs = volatility(OHLC=OHLC,n=n, calc="rogers.satchell", N=N) s = np.sqrt(s2o + k*s2c + (1-k)*(s2rs**2)) return s # import pandas.io.data as web # # # HS300=web.DataReader(name='000300.SS',data_source='yahoo',start='2006-01-01',end='2015-11-10') # HS300.columns=['open','high','low','close','volumne','adj close'] # import pandas as pd # # aa=signalTesting(HS300)
data[sheet_name].columns = ['DateTime', 'Close'] outrights = xls.sheet_names[0:12] flies = xls.sheet_names[12:] ButterflyData = data['BL-'+ButterflyName.upper()+' Comdty'] OutrightData = data['L '+OutrightName.upper()+' Comdty'] rolling_window = 20 trade_recorder = {} m = pd.merge(OutrightData, ButterflyData, on = "DateTime", suffixes = ('_'+OutrightName, '_'+ButterflyName)) for i in range(HedgeRatioFrom, HedgeRatioTo,1): index = 'hr_'+i.__str__() trade_recorder[index] = trades() m[index] = m['Close_'+OutrightName] + i*m['Close_'+ButterflyName] m[index+'_mean'] = pd.rolling_mean(m[index], rolling_window) m[index+'_std'] = pd.rolling_var (m[index], rolling_window) for rn in range(1,len(m)): if (m.iloc[rn-1][index] > m.iloc[rn-1][index+'_mean'] + EntryStd*m.iloc[rn-1][index+'_std']) and\ (m.iloc[rn][index] < m.iloc[rn-1][index+'_mean'] + EntryStd*m.iloc[rn-1][index+'_std']) and\ (trade_recorder[index].position == 0): trade_recorder[index].add(m.iloc[rn]['DateTime'], m.iloc[rn-1][index+'_mean'] + EntryStd*m.iloc[rn-1][index+'_std'], -1, "EnterShort") if m.iloc[rn-1][index] < m.iloc[rn-1][index+'_mean'] - EntryStd*m.iloc[rn-1][index+'_std'] and\ m.iloc[rn][index] > m.iloc[rn-1][index+'_mean'] - EntryStd*m.iloc[rn-1][index+'_std'] and\ trade_recorder[index].position == 0: trade_recorder[index].add(m.iloc[rn]['DateTime'], m.iloc[rn-1][index+'_mean'] - EntryStd*m.iloc[rn-1][index+'_std'], 1, "EnterLong")
tech_var = ['ma_1_9', 'ma_1_12', 'ma_2_9', 'ma_2_12', 'ma_3_9', 'ma_3_12', 'mom_9', 'mom_12', 'vol_1_9', 'vol_1_12', 'vol_2_9', 'vol_2_12', 'vol_3_9', 'vol_3_12'] all_var = econ_var + tech_var # get data for specified date range df_sub = df[beg_date_init:end_date_oos] # Expanding window historical average forecast for equity premium df['ha_mean'] = Series(pd.expanding_mean(df_sub['equity_premium']/100, min_periods = window_size).shift(1), index = df_sub.index) # Rolling window historical average forecast for equity premium variance # note degree of freedom adjusted to match NRZ df['ha_var'] = Series(pd.rolling_var(df_sub['equity_premium']/100, window_size, min_periods = window_size, ddof = 0).shift(1), index = df_sub.index) # Perform asset allocation using historical average forecasts using c_bp = 0 # all months df_sub = df[beg_date_oos:end_date_oos] ha_results = perform_asset_allocation(df_sub['equity_premium']/100, df_sub['Rfree'], df_sub['ha_mean'], df_sub['ha_var'], gamma_MV, 0) # expansion months df_exp = df_sub[df_sub['recession']==0] ha_results_exp = perform_asset_allocation(df_exp['equity_premium']/100, df_exp['Rfree'], df_exp['ha_mean'], df_exp['ha_var'], gamma_MV, 0) # expansion months df_rec = df_sub[df_sub['recession']==1] ha_results_rec = perform_asset_allocation(df_rec['equity_premium']/100, df_rec['Rfree'], df_rec['ha_mean'], df_rec['ha_var'], gamma_MV, 0)
def ts_operation(df, n): return pd.rolling_var(df, n)
def averageVariances(self, data, window_size=10): variances = {} for sensor in data[data.columns-['timestamp']]: variances[sensor] = pd.rolling_var(data[sensor], window_size) return pd.DataFrame(variances).mean(axis=1)
ir1_val, ir2_val, ir3_val, ir4_val = get_ir_linearised( ir_voltages, prev_V, ir_coeffs) ir1.append(ir1_val) ir2.append(ir2_val) ir3.append(ir3_val) ir4.append(ir4_val) data_vec = [sonar1, sonar2, ir1, ir2, ir3, ir4] datttt = 0 for sensor in Sensor: #Sort sensor Data based on range with window size set window = 100 sensor.data = data_vec[datttt] rollvar_sensor = pd.DataFrame(data=sensor.data, index=range_) rollvar_sensor = rollvar_sensor.sort_index() rollvar_sensor = pd.rolling_var(rollvar_sensor, window) #Calculate Var rollvar_sensor = rollvar_sensor[0].tolist() if ir3_min_max == [sensor.min_, sensor.max_]: rollvar_sensor = [x * 100 for x in rollvar_sensor] sensor.roll_var = rollvar_sensor datttt += 1 # Rolling variance of the process noise range_command = [0] for i in range(1, len(velocity_command)): range_command.append(range_command[i - 1] + velocity_command[i] * (time[i] - time[i - 1])) rollvar_W = pd.DataFrame(data=range_command, index=range_) rollvar_W = rollvar_W.sort_index() rollvar_W = pd.rolling_std(rollvar_W, window)**2