def rolling_statistics(data): ''' Calculates and plots rolling statistics (mean, std, correlation). ''' plt.figure(figsize=(11, 8)) plt.subplot(311) mr = pd.rolling_mean(data['returns'], 252) * 252 mr.plot() plt.grid(True) plt.ylabel('returns (252d)') plt.axhline(mr.mean(), color='r', ls='dashed', lw=1.5) plt.subplot(312) vo = pd.rolling_std(data['returns'], 252) * math.sqrt(252) vo.plot() plt.grid(True) plt.ylabel('volatility (252d)') plt.axhline(vo.mean(), color='r', ls='dashed', lw=1.5) vx = plt.axis() plt.subplot(313) co = pd.rolling_corr(mr, vo, 252) co.plot() plt.grid(True) plt.ylabel('correlation (252d)') cx = plt.axis() plt.axis([vx[0], vx[1], cx[2], cx[3]]) plt.axhline(co.mean(), color='r', ls='dashed', lw=1.5)
def correlation(sym1='LRCX', sym2='AAPL', bench='SPY'): # Pull the pricing data for our two stocks and S&P 500 start = datetime.datetime(2013, 1, 1) # '2013-01-01' # end = '2015-01-01' # F**k that I make my own end date bench = pull_series(bench, start) a1 = pull_series(sym1, start) a2 = pull_series(sym2, start) # Do a simple plot fig = plt.figure() ax1 = fig.add_subplot(211) ax1.scatter(a1,a2) ax1.set_xlabel(sym1) ax1.set_ylabel(sym2) ax1.set_title('Stock prices from %i-%i-%i to today' %(start.day, start.month, start.year)) print("Correlation coefficients") print("%s and %s: %.5f" %(sym1, sym2, np.corrcoef(a1,a2)[0,1])) print("%s and %s: %.5f" %(sym1, bench, np.corrcoef(a1,bench)[0,1])) print("%s and %s: %.5f" %(sym2, bench, np.corrcoef(bench,a2)[0,1])) # Get rolling correlation from pandas rolling_correlation = pd.rolling_corr(a1, a2, 60) ax2 = fig.add_subplot(222) ax2.plot(rolling_correlation) ax2.set_xlabel('Day') ax2.set_ylabel('60-day Rolling Correlation') # Get raw correlation X = np.random.rand(100) Y = X + np.random.poisson(size=100) ax3 = fig.add_subplot(223) ax3.scatter(X, Y) print('X-Y correlation coefficient: %.5f' %np.corrcoef(X, Y)[0, 1]) plt.show() return np.corrcoef(X, Y)[0,1]
def computeCorrelations(self,data1,data2): correl = pandas.rolling_corr(data1,data2,window=20) data2['CORR'] = correl data2['stime'] = data2.index json_s = data2[['stime','svalue']].to_json(orient='values') return json_s
def get_rolling_corr(self, a, b, window=252, field='Adj Close', how='pct', plot=True): data = self.get_data([a, b], field, how) corr = pd.rolling_corr(data[a], data[b], window) if plot: corr.plot(grid=True, style='b') data.plot() return corr
def make_PCA_series(lfp, window, skip): # window is the length of the window to slide (in bins) # skip is the spacing between samples in the returned frame corrseries = pd.rolling_corr(lfp.dataframe, window=window) corrseries = corrseries.iloc[::skip, :, :] corrseries = corrseries.to_frame(filter_observations=False).transpose() lfp_pairs = physutils.LFPset(corrseries, meta=lfp.meta.copy()) eigs = get_eigen_series(lfp_pairs) return eigs
def roll_corr(con_pan, var1, var2,window,plot=False): if plot == True: a = lag_plot(con_pan[var1]) plt.show() autocorrelation_plot(con_pan[var1]) plt.show() else: a = pd.rolling_corr(con_pan[var1],con_pan[var2],window=window) return a
def buyAll(self, d_bond,d_equity,r_bond,r_equity): ''' #this function solve the weight of spx in a buy all index to optimize a #backward looking sharpe ratio defined by total carry / vol in which #correlation is taken as last 100 days observation # d_equity: %ret of equity # d_bond: %ret of bond # r_equity - spx carry (dividend yield) # r_bond - ty carry (yield) # v_equity - spx variance # v_bond - ty variance # p - spx/ty correlation #result # x_IR - weight for maximizing IR # x_P - weight for minimizing variance assuming -50% constant correlation # x - average of the 2 above ''' t=200 t_s=30 p=pd.rolling_corr(d_equity,d_bond,t) p=pd.ewma(p,halflife=t_s) p2 = pd.Series(-0.5, index=p.index) v_equity=pd.rolling_var(d_equity,t) v_bond=pd.rolling_var(d_bond,t) m=len(p) x_IR=p.copy() x_P=x_IR.copy() for i in range(0,m): f = lambda x, : -(x*r_equity[i]+(1-x)*r_bond[i])/np.sqrt((x**2*v_equity[i]+(1-x)**2*v_bond[i]+2*x*(1-x)*np.sqrt(v_equity[i]*v_bond[i])*p[i])*16) #fitting the data with fmin x0 = 0.2 # initial parameter value x1 = op.fminbound(f, 0.1,0.8,maxfun=100) x_IR[i]=x1 #portfolio optimisation assuming a constant correlation of -50% f = lambda x, : -(x*r_equity[i]+(1-x)*r_bond[i])/np.sqrt((x**2*v_equity[i]+(1-x)**2*v_bond[i]+2*x*(1-x)*np.sqrt(v_equity[i]*v_bond[i])*p2[i])*16) # fitting the data with fmin x0 = 0.2 # initial parameter value x2 = op.fminbound(f, 0.1,0.8,maxfun=100) x_P[i]=x2 w=(x_P+x_IR)/2 return w
def rolling_corr(self, data_frame1, periods, data_frame2 = None, pairwise = False, flatten_labels = True): """ rolling_corr - Calculates rolling correlation wrapping around pandas functions Parameters ---------- data_frame1 : DataFrame contains time series to run correlations on periods : int period of rolling correlations data_frame2 : DataFrame (optional) contains times series to run correlation against pairwise : boolean should we do pairwise correlations only? Returns ------- DataFrame """ panel = pandas.rolling_corr(data_frame1, data_frame2, periods, pairwise = pairwise) try: df = panel.to_frame(filter_observations=False).transpose() except: df = panel if flatten_labels: if pairwise: series1 = df.columns.get_level_values(0) series2 = df.columns.get_level_values(1) new_labels = [] for i in range(len(series1)): new_labels.append(series1[i] + " v " + series2[i]) else: new_labels = [] try: series1 = data_frame1.columns except: series1 = [data_frame1.name] series2 = data_frame2.columns for i in range(len(series1)): for j in range(len(series2)): new_labels.append(series1[i] + " v " + series2[j]) df.columns = new_labels return df
def rolling_corr_with_N225(stock, window=5): d1 = pd.read_csv("".join(["stock_", stock, ".csv"]), index_col=0, parse_dates=True) d2 = pd.read_csv("stock_N225.csv", index_col=0, parse_dates=True) s1 = d1.asfreq('B')['Adj Close'].pct_change().dropna() s2 = d2.asfreq('B')['Adj Close'].pct_change().dropna() rolling_corr = pd.rolling_corr(s1, s2, window).dropna() plt.figure() rolling_corr.plot() plt.savefig("".join(["corr_", stock, ".png"])) plt.close() return rolling_corr
def transform_df_ASneeded(df,F2scores,shift,stock_name): #df.replace(df['F2(8,20)'][df['F2(8,20)']<-5],-5) """transform curr_df here if you want running means, etc""" yVar = "ooRelRet(3days rolling mean)" df[yVar] =pd.rolling_mean(df["ooRelRet"],3) shift = -4 df[yVar] = df[yVar].shift(shift) df["ooRelRet(nextDay)"] = df["ooRelRet"].shift(-1) if 'barraBeta' in df.columns: df['barraBeta'] = df['barraBeta'].fillna(method='ffill') df['barraBeta'] = df['barraBeta'].fillna(method='bfill') else: print 'barraBeta not available in this set' df['corr_Prices20'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],20) df['corr_Prices8'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],8) df['corr_Ret'] = pd.rolling_corr(df['ooRawRet'],df['ooPoolRet'],40) newF2scores = [] for f2score in F2scores: df[f2score][df[f2score] <-15] = -15 df[f2score][df[f2score] > 15] = 15 name = f2score+'Exp' newF2scores.append(name) df[name] = df[f2score]*df['corr_Ret'] xVar ='F2' newF2scores = F2scores print newF2scores #based on how you transformed df utils.dump_data_csv(df,stock_name,t_fn="t_"+stock_name+"_transformedDF_"+xVar+"_vs_"+yVar+".csv") return df,shift,xVar,yVar,newF2scores
def transform_df_ASneeded(df,F2scores,shift,stock_name): #df.replace(df['F2(8,20)'][df['F2(8,20)']<-5],-5) """transform curr_df here if you want running means, etc""" shift = -4 days = 3 df["ooRelRet("+str(days)+"D rolling mean)"] = pd.rolling_mean(df["ooRelRet"],days).shift(-(days+1)) df["ooRelRet(nextDay)"] = df["ooRelRet"].shift(-2)#technically this is incorrect but for the purposes of graphs to show the relationship between F2 and RelRet it is necessary yVar = "ooRelRet(3D rolling mean)" if 'barraBeta' in df.columns: df['barraBeta'] = df['barraBeta'].fillna(method='ffill') df['barraBeta'] = df['barraBeta'].fillna(method='bfill') else: print 'barraBeta not available in this set' df['corr_Prices20'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],20) df['corr_Prices8'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],8) df['corr_Ret'] = pd.rolling_corr(df['ooRawRet'],df['ooPoolRet'],40) newF2scores = [] for f2score in F2scores: df[f2score][df[f2score] <-15] = -15 df[f2score][df[f2score] > 15] = 15 name = f2score+'Exp' newF2scores.append(name) df[name] = df[f2score]*df['corr_Ret'] xVar ='F2' newF2scores = F2scores print newF2scores #based on how you transformed df #utils.dump_data_csv(df,stock_name,t_fn="t_"+stock_name+"_transformedDF_"+xVar+"_vs_"+yVar+".csv") return df,shift,xVar,yVar,newF2scores
def benchmark_correlation(self, window=90, bench='^GSPC'): ''' Parameters ---------- window : int Rolling window for which to calculate the estimator bins : int ''' y = self._get_estimator(window) x = self._get_estimator(window, ticker=bench) date = y.index corr = pandas.rolling_corr(x, y, window) if self._type is "Skew" or self._type is "Kurtosis": f = lambda x: "%i" % round(x, 0) else: f = lambda x: "%i%%" % round(x*100, 0) ''' Figure args ''' fig = plt.figure(figsize=(8, 6)) cones = plt.axes() ''' Cones plot args ''' # set the plots cones.plot(date, corr) # set the y-limits cones.set_ylim((corr.min() - 0.05, corr.max() + 0.05)) # set and format the y-axis labels locs = cones.get_yticks() cones.set_yticklabels(map(f, locs)) # turn on the grid cones.grid(True, axis='y', which='major', alpha=0.5) # set the title cones.set_title(self._type + ' (Correlation of ' + self._ticker + ' v. ' + bench.upper() + ', daily ' + self._start.strftime("%Y-%m-%d") + ' to ' + self._end.strftime("%Y-%m-%d") + ')') return fig, plt
def find_alphabeta(): # Import stocks into DataFrame (CURRENTLY HAS ALL DATES including non-trading) start_date = pd.to_datetime('12/31/07') #StartDate per Instructions end_date = pd.to_datetime('12/31/09') #EndDate per Instructions dates = pd.date_range(start_date, end_date) #stocklist = list_of_symbols('2008') #data = get_data(stocklist[0].values.tolist(), dates) stocklist = ['YUM','AAPL'] data = get_data(stocklist, dates) spx_data = get_data(['$SPX'],dates) # macd = pd.ewma((pd.ewma(data, span=12) - pd.ewma(data, span=26)), span=9) # harmonic_mean = 1/pd.rolling_mean(1/data,window=20) # harmonic_indicator = 100*(1-harmonic_mean/pd.rolling_mean(data, window=20)) correlation = pd.rolling_corr(data, window=20) return
def analysis(): """ A simple API endpoint to compare data from two sensors Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname """ if 'wotkit_token' in session: a = request.args.get('a') b = request.args.get('b') hours = int(request.args.get('hours')) if (a and b and hours): msph = 3600000 #milliseconds per hour result = defaultdict(dict) sensoraDataSeries = WotKitDataToSeries(WoTKitgetSensorData(a, msph*hours)) sensorbDataSeries = WotKitDataToSeries(WoTKitgetSensorData(b, msph*hours)) # Labels object result['labels'] = [`i`+"h" for i in range(1,hours)] # Sensor A object sensoraDailyMeans = sensoraDataSeries.resample('H', how = 'mean') result['a']['mean'] = SeriesToList( sensoraDailyMeans ) result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5) ) result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5) ) result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5) ) result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5) ) #Sensor B object sensorbDailyMeans = sensorbDataSeries.resample('H', how = 'mean') result['b']['mean'] = SeriesToList(sensorbDailyMeans) result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5) ) result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5) ) result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5) ) result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5) ) #Comparison object result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5) ) result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5) ) json_response = json.dumps(result) return Response(json_response, content_type='application/json')
def test_ts_corr(self): self.env.add_operator('ts_corr', { 'operator': OperatorTSCorr, 'arg1': {'value': [3, 5]}, }) string1 = 'ts_corr(2, open1, open2)' gene1 = self.env.parse_string(string1) self.assertFalse(gene1.validate()) string2 = 'ts_corr(5, open1, open2)' gene2 = self.env.parse_string(string2) self.assertTrue(gene2.validate()) self.assertEqual(gene2.dimension, '') self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2) date1 = self.env.shift_date(self.date1, 4) df = pd.rolling_corr(self.env.get_data_value('open1'), self.env.get_data_value('open2'), 5).iloc[4:] self.assertTrue( frame_equal( gene2.eval(self.env, date1, self.date2), df) )
def Relative_Arbitrage (): sp500=web.DataReader('^GSPC', data_source='yahoo', start='1/1/2000', end='5/1/2015') F=web.DataReader('IBM', data_source='yahoo', start='1/1/2000', end='5/1/2015') Struct={'sp500':sp500['Close'],'F':F['Close']} #define as dict DF=pd.DataFrame.from_dict(Struct) #Convert Dict into dataframe (Pandas function) DF['3d Correl']=pd.rolling_corr(DF['sp500'],DF['F'],window=2) DF['sp500_Return']=np.log(DF['sp500']/DF['sp500'].shift(2)) DF['F_Return']=np.log(DF['F']/DF['F'].shift(2)) DF['Sp500 op']=0 DF['F op']=0 DF['Port Op']=0 DF['Port Op']=np.where((DF['3d Correl']<-0.4),1,0) DF['Port Op']=np.where((DF['Port Op'].shift(1)==1)&(DF['3d Correl']<0.95),1,0) DF['sp500 op']=np.where((DF['Port Op']==1)&(DF['F_Return']>0.01),1,0) DF['F op']=np.where(DF['sp500 op']==1,-1,0) DF['sp500 1d return']=np.log(DF['sp500']/DF['sp500'].shift(1)) DF['F 1d return']=np.log(DF['F']/DF['F'].shift(1)) DF['Strategy']=DF['sp500 1d return']*DF['sp500 op'].shift(1)+DF['F 1d return']*DF['F op'].shift(1) DF[['sp500 1d return','Strategy']].cumsum().apply(np.exp).plot(grid=True,figsize=(8,5))
def density_pCorrCoef(dataSeriesOne, dataSeriesTwo): correlation = pd.rolling_corr(dataSeriesOne, dataSeriesTwo, window = 16, min_periods=10, center = True) return correlation.dropna()
ttl = ('Volume (' + co1 + ')') plt.ylabel(ttl) plt.show() # Now process the data # Calculate daily returns firstCo_rets = firstCo['Close'].pct_change() secondCo_rets = secondCo['Close'].pct_change() thirdCo_rets = thirdCo['Close'].pct_change() fourthCo_rets = fourthCo['Close'].pct_change() #Calculate and plot one-year moving correlations oneYrEnergy = pd.rolling_corr(firstCo_rets, secondCo_rets, 250).plot() ttl = ('One Year Rolling Correlation between 2 ' + firstInd + ' Companies') plt.title(ttl) plt.show() oneYrRetail = pd.rolling_corr(thirdCo_rets, fourthCo_rets, 250).plot() ttl = ('One year Rolling Correlation between 2 ' + secondInd + ' Companies') plt.title(ttl) plt.show() # Consider volatility # Build a least-squares regression to model the dynamic relationship import statsmodels.api as sm import statsmodels.formula.api as smf from statsmodels.sandbox.regression.predstd import wls_prediction_std import numpy as np
ax=plt.axis() x=np.linspace(ax[0],ax[1]+0.01) plt.plot(x,model.beta[1]+model.beta[0]*x,'b',lw=2) plt.grid(True) plt.axis('tight') plt.xlabel('Euro STOXX 50 returns') plt.ylabel('VSTOXX returns') #Output correlation between 2 financial time series rets.corr() # EUROSTOXX VSTOXX #EUROSTOXX 1.000000 -0.735117 #VSTOXX -0.735117 1.000000 #Plot 252day rolling corelations pd.rolling_corr(xdat,ydat,window=252).plot(grid=True,style='b') ############################# #####High Frequency Data##### ############################# url1='http://hopey.netfonds.no/posdump.php?' url2='date=%s%s%s&paper=APPL.O&csv_format=csv' url=url1+url2 year='2014' month='09' days =['22','23','24','25'] APPL=pd.DataFrame() for day in days: print url % (year, month,day)
# print(100 * portfolio_returns.groupby(portfolio_returns.index.year).sum()) # print(100 * np.sqrt(12) * portfolio_returns.groupby(portfolio_returns.index.year).std()) # portfolio_returns.cumsum().plot() # plt.legend() # plt.grid() # plt.show() #correaltion Plot # plt.matshow(portfolio_returns.corr()) # plt.xticks(range(len(portfolio_returns.columns)), portfolio_returns.columns) # plt.yticks(range(len(portfolio_returns.columns)), portfolio_returns.columns) # plt.colorbar() # plt.show() stats_df.rename() stats_df = stats_df[[ 'EW_GTAA', 'EW_GTAA_Universe', 'RiskWt_GTAA', 'RiskWt_GTAA_Universe', 'MomoPortfoli_QO', 'MomoPortfolio_Q', '70/30_QO_MP/RW_GTAA', '70/30_QQQE/RW_GTAA_bm', '60/40_ACWI/AGG', 'S&P500' ]] # print(stats_df) tcor = pd.rolling_corr(portfolio_returns['RiskWt_GTAA'], portfolio_returns['MomoPortfoli_QO'], 6) tcor.plot() plt.show() # ts1 = 100 * portfolio_returns.groupby(portfolio_returns.index.year).sum() # ts2 = 100 * np.sqrt(12) * portfolio_returns.groupby(portfolio_returns.index.year).std() # stats_df.to_csv("C:/Python27/Git/SMA_GTAA/Summary_Statistics.csv") # ts1.to_csv("C:/Python27/Git/SMA_GTAA/Return_Summary.csv") # ts2.to_csv("C:/Python27/Git/SMA_GTAA/Risk_Summary.csv")
pd.rolling_mean(close_px,60).plot(logy=True) fig,axes=plt.subplots(nrows=2,ncols=1,sharex=True,sharey=True,figsize=(12,7)) aapl_px=close_px.AAPL['2005':'2009'] ma60=pd.rolling_mean(aapl_px,50,min_periods=50) ewma6=pd.ewma(aapl_px,span=60) aapl_px.plot(style='k-',ax=axes[0]) ma60.plot(style='k--',ax=axes[0]) aapl_px.plot(style='k--',ax=axes[1]) ewma6.plot(style='k--',ax=axes[1]) axes[0].set_title('Simple MA') axes[0].set_title('Exponentially-weighted MA') spx_px=close_px_all['SPX'] spx_rets=spx_px/spx_px.shift(1)-1 returns=close_px.pct_change() corr=pd.rolling_corr(returns.AAPL,spx_rets,125,min_periods=100) corr.plot() corr=pd.rolling_corr(returns,spx_rets,125,min_periods=100) corr.plot() from scipy.stats import percentileofscore score_at_2percent=lambda x:percentileofscore(x,0.02) result=pd.rolling_apply(returns.AAPL,250,score_at_2percent) result.plot() rng=pd.date_range('1/1/2000',periods=10000000,freq='10ms') ts=pd.Series(np.random.randn(len(rng)),index=rng) ts.resample('15min',how='ohlc') %timeit ts.resample('15min',how='0hlc')
import quandl import pandas as pd import pickle import matplotlib.pyplot as plt from matplotlib import style style.use('fivethirtyeight') fig = plt.figure() ax1 = plt.subplot2grid((2, 1), (0, 0)) ax2 = plt.subplot2grid((2, 1), (1, 0), sharex=ax1) HPI_data2 = pd.read_pickle( 'C:/Users/yadag/Desktop/PythonProgrammingPractice/DataAnalysis_with_Python/fiddy_states3.pickle' ) TX_AK_Corr = pd.rolling_corr(HPI_data2['TX'], HPI_data2['AK'], 12) HPI_data2['TX'].plot(ax=ax1, label='TX HPI') HPI_data2['AK'].plot(ax=ax1, label='AK HPI') TX_AK_Corr.plot(ax=ax2, label='TX_AK_Corr') plt.legend(loc=4) plt.show()
""" import json import numpy as np import pandas as pd import pandas.io.data as web import datetime as dt from tornado.web import RequestHandler START_DATE=dt.datetime(2000,1,1) NAMES = ['AAPL','XOM','MSFT','JNJ','BRK.B','WFC','GE','PG','JPM','PFE'] symbols = pd.concat([web.get_data_yahoo(i, START_DATE)['Adj Close'] for i in NAMES],1) symbols.columns = NAMES symbols.index = [i.date() for i in list(symbols.index)] symbols.index.names = ["date"] panel_corr = pd.rolling_corr(symbols.pct_change(),21) dates = np.array(map(lambda d: d.toordinal(), symbols.index)) class StockHandler(RequestHandler): def get(self): self.write(symbols.to_csv()) self.finish() class CorrelationHandler(RequestHandler): encoder = json.JSONEncoder() def get_correlation(self,*date): f = lambda x: x[x<0][-1]; find_date = lambda d,dates: list(np.argwhere(f((dates-dt.datetime(*d).toordinal()))==(dates-dt.datetime(*d).toordinal())).flat)[0]
df = pd.DataFrame(data) print df df.apply(np.mean, axis=1).head(3) #passing a lambda is a common pattern df.apply(lambda x: (x['Open'] - x['Close']), axis=1).head(3) #define a more complex function def percent_change(x): return (x['Open'] - x['Close']) / x['Open'] print df.apply(percent_change, axis=1).head(3) #change axis, axis = 0 is default print df.apply(np.mean, axis=0) def greater_than_x(element, x): return element > x print df.Open.apply(greater_than_x, args=(100,)).head(3) #This can be used as in conjunction with subset capabilities mask = df.Open.apply(greater_than_x, args=(100,)) print df.Open[mask].head() print pd.rolling_apply(df.Close, 5, np.mean) #There are actually a several built-in rolling functions print pd.rolling_corr(df.Close, df.Open, 5)[:5]
def rolling_corr(self, data_frame1, periods, data_frame2=None, pairwise=False, flatten_labels=True): """ rolling_ewma - Calculates exponentially weighted moving average Parameters ---------- data_frame1 : DataFrame contains time series to run correlations on periods : int period of rolling correlations data_frame2 : DataFrame (optional) contains times series to run correlation against pairwise : boolean should we do pairwise correlations only? Returns ------- DataFrame """ panel = pandas.rolling_corr(data_frame1, data_frame2, periods, pairwise=pairwise) try: df = panel.to_frame(filter_observations=False).transpose() except: df = panel if flatten_labels: if pairwise: series1 = df.columns.get_level_values(0) series2 = df.columns.get_level_values(1) new_labels = [] for i in range(len(series1)): new_labels.append(series1[i] + " v " + series2[i]) else: new_labels = [] try: series1 = data_frame1.columns except: series1 = [data_frame1.name] series2 = data_frame2.columns for i in range(len(series1)): for j in range(len(series2)): new_labels.append(series1[i] + " v " + series2[j]) df.columns = new_labels return df
import QSTK.qstkutil.qsdateutil as du import QSTK.qstkutil.tsutil as tsu import datetime as dt import pandas as pd import numpy as np dt_start = dt.datetime(2006, 1, 1) dt_end = dt.datetime(2011, 12, 31) ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16)) dataobj = da.DataAccess('Yahoo', cachestalltime=24) ls_symbols = dataobj.get_symbols_from_list('sp5002012') ls_symbols.append('SPY') ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close'] ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys) d_data = dict(zip(ls_keys, ldf_data)) for s_key in ls_keys: d_data[s_key] = d_data[s_key].fillna(method='ffill') d_data[s_key] = d_data[s_key].fillna(method='bfill') d_data[s_key] = d_data[s_key].fillna(1.0) close_px_AAPL = d_data['close']['AAPL'] close_px_MSFT = d_data['close']['MSFT'] close_px_XOM = d_data['close']['XOM'] returns = d_data['close'] / d_data['close'].shift(1) - 1 aapl_std250 = pd.rolling_std(close_px_AAPL, 250, min_periods=10) aapl_std250.plot() corr = pd.rolling_corr(returns['AAPL'], returns['SPY'], 125, min_periods=100)
def correlation_study(file='challenge-data-v2.csv'): sns.set_style("whitegrid") # ***************************************************** # Loading the data data = pd.read_csv('challenge-data-v2.csv', index_col='event_date', parse_dates=True) # ***************************************************** # Changing the name of the colums to a more suitable (shorter) form data.columns = ['sups', 'moff', 'mon', 'hd'] # ***************************************************** # Scaling the values to work in a more suitable way (avoiding super-high values) for feature in data.columns: if data[feature].values.dtype != 'object': scale_factor = np.round(np.log10(np.mean(data[feature]))) data[feature] = data[feature] / 10**scale_factor print Fore.BLUE + 'The feature: ' + feature + ' was rescaled by a factor of ' + str( 10**scale_factor) print 'IMPORTANT: Take these rescaling in account when reading the plots' print(Style.RESET_ALL) # ***************************************************** # Computation of the distributions per year for the different features years = np.unique(data.index.year) # Definition of the figure. figid = plt.figure('Temporal distributions by year', figsize=(20, 10)) # Definition of the matrix of plots. col = len(data.columns[data.columns != 'hd']) gs = grds.GridSpec(3, col) for i in range(col): ax1 = plt.subplot(gs[0, i]) ax2 = plt.subplot(gs[1, i]) legend = [] for y in years: dat = data[data.columns[i]][str(y)].values ax1.plot(np.arange(len(dat)), dat, '-') legend.append(str(y)) ax1.legend(legend) ax1.set_title('Daily ' + data.columns[i]) legend = [] for y in years: dat = data[data.columns[i]][str(y)].resample('M').values ax2.plot(np.arange(len(dat)) + 1, dat, '-o') legend.append(str(y)) ax2.legend(legend) ax2.set_title('Monthly ' + data.columns[i]) plt.xlim([1, 12]) ax3 = plt.subplot(gs[2, i]) legend = [] for y in years: dat = data[data.columns[i]][str(y)] # dat = dat.groupby(data.index.dayofweek).mean() dat = dat.groupby(dat.index.dayofweek).mean() dat.index = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun'] dat.plot(style='-o') legend.append(str(y)) ax3.legend(legend) ax3.set_title('Day week ' + data.columns[i]) # ***************************************************** # Computation of the distribution of activity per month for the different time series that are present in the data years = np.unique(data.index.year) # Definition of the figure. figid = plt.figure('Monthly distribution of features', figsize=(20, 10)) # Definition of the matrix of plots. In this case the situation is more complex that is why I need to define a # matrix. It will be a dim[2x3] matrix. col = len(data.columns[data.columns != 'hd']) rows = len(years) gs = grds.GridSpec(rows, col) months = [ 'Jan', 'Feb', 'Mar', 'Aprl', 'May', 'Jun', 'Jul', 'Agst', 'Sep', 'Oct', 'Nov', 'Dec' ] colors = sns.hls_palette(12, l=.5, s=.6) for c in range(col): for r in range(rows): ax1 = plt.subplot(gs[r, c]) dat_year = data[data.columns[c]][str(years[r])] for m in range(1, 13): dat = dat_year[dat_year.index.month == m].values ax1.plot(np.arange(len(dat)), dat, '-', color=colors[m - 1]) if r == 0 and c == col - 1: ax1.legend(months, bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0., ncol=2, fancybox=True, frameon=True) if c == 0: ax1.set_ylabel('Year: ' + str(years[r])) if r == 0: ax1.set_title('Feature: ' + str(data.columns[c])) # ***************************************************** # Computation of the distribution of cross correlations per month for the different time series that are present in the data years = np.unique(data.index.year) # Definition of the figure. figid = plt.figure('Monthly features cross-correlations', figsize=(20, 10)) # Definition of the matrix of plots. feature1 = [0, 0, 2] feature2 = [2, 1, 1] col = len(data.columns[data.columns != 'hd']) rows = len(years) gs = grds.GridSpec(rows, col) months = [ 'Jan', 'Feb', 'Mar', 'Aprl', 'May', 'Jun', 'Jul', 'Agst', 'Sep', 'Oct', 'Nov', 'Dec' ] # colors = sns.color_palette("Set2", 12) colors = sns.hls_palette(12, l=.5, s=.6) for c in range(col): for r in range(rows): ax1 = plt.subplot(gs[r, c]) dat_year_feat1 = data[data.columns[feature1[c]]][str(years[r])] dat_year_feat2 = data[data.columns[feature2[c]]][str(years[r])] for m in range(1, 13): dat_feat1 = dat_year_feat1[dat_year_feat1.index.month == m].values dat_feat1 = np.subtract(dat_feat1, np.mean(dat_feat1)) dat_feat2 = dat_year_feat2[dat_year_feat2.index.month == m].values dat_feat2 = np.subtract(dat_feat2, np.mean(dat_feat2)) dat = sgn.correlate(dat_feat1, dat_feat2, mode='same') ax1.plot(np.linspace(-15, 15, len(dat)), dat, '-', color=colors[m - 1]) if c == 0: ax1.set_ylabel('Year: ' + str(years[r])) if r == 0 and c == col - 1: ax1.legend(months, bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0., ncol=2, fancybox=True, frameon=True) if r == 0: ax1.set_title('Xcorr: ' + str(data.columns[feature1[c]]) + ' and ' + str(data.columns[feature2[c]])) # ***************************************************** # Computation of the distribution of activity per month for the different time series that are present in the data years = np.unique(data.index.year) # Definition of the matrix of plots. feature1 = [0, 0, 2] feature2 = [2, 1, 1] for f in range(len(feature1)): figid = plt.figure( 'Rolling correlation coefficient and weekly activity of ' + data.columns[feature1[f]] + ' and ' + data.columns[feature2[f]], figsize=(20, 10)) rows = len(years) gs = grds.GridSpec(4, 4) for r in range(rows): ax1 = plt.subplot(gs[0, :]) ax2 = plt.subplot(gs[1, :]) ax3 = plt.subplot(gs[2, :]) dat_year_feat1 = data[data.columns[feature1[f]]][str(years[r])] dat_year_feat2 = data[data.columns[feature2[f]]][str(years[r])] ref = ax1.plot(dat_year_feat1.resample('W')) ax2.plot(dat_year_feat2.resample('W')) xcorr = pd.rolling_corr(dat_year_feat1, dat_year_feat2, 14) ax3.plot(xcorr) ax4 = plt.subplot(gs[3, r]) n, bins, patches = ax4.hist(xcorr.values[np.logical_not( np.isnan(xcorr.values))], bins=np.round(len(xcorr) / 6), facecolor=ref[0].get_color(), edgecolor=ref[0].get_color()) mediana = ax4.axvline(np.median(xcorr.values[np.logical_not( np.isnan(xcorr.values))]), color='r', linestyle='--') ax4.set_xlim([-1, 1]) ax4.set_xlabel('CorrCoef year ' + str(years[r])) ax4.set_label(mediana) print '-------------------------------------------------' print 'Correlation distribution for year ' + str(years[r]) print 'Mean:', xcorr.mean() print 'Median:', xcorr.median() print 'Standard deviation:', xcorr.std() print 'Kurtosis:', xcorr.kurtosis( ) # Kurtosis is mainly related with outliers not with the central peak print 'Skewness:', xcorr.skew( ) #Take in account that this value has not the substraction of the skew of a normal distribution (3) if (np.abs(xcorr.skew())) < 0.65: mu, sigma = stat.norm.fit(xcorr.values[np.logical_not( np.isnan(xcorr.values))]) print 'Normal distribution fitted!' print 'mu=' + str(mu) print 'sigma=' + str(sigma) fitted_normal = mlab.normpdf(bins, mu, sigma) * np.max( xcorr.values[np.logical_not(np.isnan(xcorr.values))]) # print fitted_normal normfit = ax4.plot(bins, fitted_normal * np.max(n), 'r--', linewidth=2, color="#3498db") ax4.legend([ 'Median ' + str(round(xcorr.median(), 2)), 'N(' + str(round(mu, 1)) + ',' + str(round(sigma, 2)) + ')' ], loc='best') else: ax4.legend(['Median ' + str(round(xcorr.median(), 2))], loc='best') ax1.set_ylabel(data.columns[feature1[f]]) ax2.set_ylabel(data.columns[feature2[f]]) ax3.set_ylabel('Rolling correlation, 14 days period') ax1.legend(years, bbox_to_anchor=(1, 1), loc='upper left', borderaxespad=0., ncol=1, fancybox=True, frameon=True) if file == 'challenge-data-v2.csv': conclusions = "" return True
def corr(self): temp = pd.concat([ self.underlyingYieldRate_5, self.ADV_20(20), self.ADV_20(15), self.ADV_20(10), self.ADV_20(5), self.TurnOver_20(20), self.TurnOver_20(15), self.TurnOver_20(10), self.RSI_20(20), self.RSI_20(15), self.RSI_20(10), self.RSI_20(5), self.RE_20(20), self.RE_20(15), self.RE_20(10), self.RE_20(5), self.VMC(), self.LDECC_5() ], axis=1) temp = pd.DataFrame( np.matrix(temp), index=temp.index, columns=[ 'underlyingYieldRate_5', 'ADV_20(20)', 'ADV_20(15)', 'ADV_20(10)', 'ADV_20(5)', 'TurnOver_20(20)', 'TurnOver_20(15)', 'TurnOver_20(10)', 'RSI_20(20)', 'RSI_20(15)', 'RSI_20(10)', 'RSI_20(5)', 'RE_20(20)', 'RE_20(15)', 'RE_20(10)', 'RE_20(5)', 'VMC()', 'LDECC_5()' ]) #temp=pd.concat([self.yield_rate,self.ADV_20(20),self.TurnOver_20(20),self.RSI_20(20),self.RE_20(20),self.VMC(),self.LDECC_5()],axis=1) ADV_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['ADV_20(20)'], 10) ADV_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['ADV_20(15)'], 10) ADV_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['ADV_20(10)'], 10) ADV_5_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['ADV_20(5)'], 10) TurnOver_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['TurnOver_20(20)'], 10) TurnOver_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['TurnOver_20(15)'], 10) TurnOver_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['TurnOver_20(10)'], 10) RSI_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RSI_20(20)'], 10) RSI_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RSI_20(15)'], 10) RSI_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RSI_20(10)'], 10) RSI_5_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RSI_20(5)'], 10) RE_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RE_20(20)'], 10) RE_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RE_20(15)'], 10) RE_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RE_20(10)'], 10) RE_5_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['RE_20(5)'], 10) VMC_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['VMC()'], 10) LDECC_corr = pd.rolling_corr(temp['underlyingYieldRate_5'], temp['LDECC_5()'], 10) corr = pd.concat([ ADV_20_corr, ADV_15_corr, ADV_10_corr, ADV_5_corr, TurnOver_20_corr, TurnOver_15_corr, TurnOver_10_corr, RSI_20_corr, RSI_15_corr, RSI_10_corr, RSI_5_corr, RE_20_corr, RE_15_corr, RE_10_corr, RE_5_corr, VMC_corr, LDECC_corr ], axis=1).dropna() self.corr = pd.DataFrame(np.matrix(corr), index=corr.index, columns=self.func_name)
# =============================================== # HPI_data['NY_12MSTD_old'] = pd.rolling_std(HPI_data['NY'], 12) # old way # HPI_data['NY_12MSTD'] = HPI_data['NY'].rolling(window=12, center=False).std() # print(HPI_data[['NY', 'NY_12MSTD', 'NY_12MSTD_old']].tail(9)) # print(HPI_data[['NY', 'NY_12MSTD']].tail(9)) # Nice, but standard deviation is a totally different scale. plotting the two normally isn't helpful # Plot stuff: # HPI_data[['NY', 'NY_12MSTD']].plot(ax=ax1) # So we graph it on a different graph. # HPI_data['NY_12MSTD'].plot(ax=ax2) # Get a rolling correlation b/w NY and the US Benchmark for 12 months: # =============================================== HPI_data['NY_12MCOR_TO_MEAN'] = pd.rolling_corr(HPI_data['NY'], HPI_data['USA_AVE'], 12) #old way HPI_data['NY_12MCOR_TO_MEAN'] = HPI_data['NY'].rolling(window=12).corr(other=HPI_data['USA_AVE']) print(HPI_data[['NY', 'USA_AVE', 'NY_12MCOR_TO_MEAN']]) HPI_data['NY'].plot(ax=ax1, label='NY_HPI') HPI_data['USA_AVE'].plot(ax=ax1, label='US_AVE') HPI_data['NY_12MCOR_TO_MEAN'].plot(ax=ax2, label='NY-US_AVE-Correlation') ax1.legend(loc=2) plt.legend(loc=4) plt.show()
f = plt.figure(figsize=(12, 8)) ax = f.add_subplot(111) stats.probplot(aapl, dist="norm", plot=ax) plt.show() # Volatility min_periods = 75 vol = pd.rolling_std(daily_pct_change, min_periods) * np.sqrt(min_periods) vol.plot(figsize=(10, 8)) # Correlation - fixed, over the whole period daily_pct_change.corr() # - fixed, on a subset daily_pct_change["2012":"2013"].corr() # - rolling, on an year period, on 2 stocks rolling_corr = pd.rolling_corr(daily_pct_change["AAPL"], daily_pct_change["MSFT"], window=252).dropna() # monthly calculations aaplM = aapl.resample("M", how="last") aaplMpct_chg = aaplM.pct_change() aaplMpct_chg.hist(bins=50, figsize=(12, 8)) aaplMpct_chg.describe(percentiles=[0.025, 0.5, 0.975]) # ----- get some info on more exchanged stocks ----- volumes = all_data[["Volume"]].reset_index() daily_volume = volumes.pivot("Date", "Ticker", "Volume") vol_mean = pd.DataFrame(daily_volume.mean(), columns=["vol_mean"]) more_vol = vol_mean.sort_index(by="vol_mean", ascending=False) it_more_vol_tickers = more_vol[:12].index.tolist() # todo: > valore medio
ax2=sm.graphics.tsa.plot_acf(df_last['log_returns_min'],lags=10,ax=ax2) ax3=fig.add_subplot(133) ax3=sm.graphics.tsa.plot_pacf(df_last['log_returns_min'],lags=30,ax=ax3) # In[ ]: ACF0=sm.tsa.stattools.acf(df_last['log_returns_min'], nlags=14) #ACF0.plot() plt.bar(np.arange(14),ACF0[1:15]) # In[ ]: window=500 aa=pd.rolling_corr(df_last['log_returns_min'],df_last['log_returns_min'].shift(1),window) fig=plt.figure(figsize=(18,6)) fig.suptitle('Minute by Minute Returns Rolling Correlation ( %s minutes)' %(window),y=1.05,fontsize=20) ax1 = fig.add_subplot(121) ax1=plt.hist(aa.ix[window:]) ax1=plt.axvline(x=0,color='black') ax2=fig.add_subplot(122) ax2=aa.plot() ax2=plt.axhline(y=0,color='black') # In[ ]: minute_lag=np.zeros((len(df_last),7)) for i in range(0,7):
def ts_corrFn(df, col1, col2, min_periods, max_periods): if not (max_periods): max_periods = len(df[col1]) return pd.rolling_corr(df[col1], df[col2], max_periods, min_periods=min_periods)
""" Another interesting visualization would be to compare the Texas HPI to the overall HPI. Then do a rolling correlation between the two of them. The assumption would be that when correlation was falling, there would soon be a reversion. Every time correlation drops, you should in theory sell property in the are that is rising, and then you should buy property in the area that is falling. The idea is that, these two areas are so highly correlated that we can be very confident that the correlation will eventually return back to about 0.98. As such, when correlation is -0.5, we can be very confident in our decision to make this move, as the outcome can be one of the following: HPI forever diverges like this and never returns (unlikely), the falling area rises up to meet the rising one, in which case we win, the rising area falls to meet the other falling one, in which case we made a great sale, or both move to re-converge, in which case we definitely won out. """ fig = plt.figure() ax1 = plt.subplot2grid((2,1), (0,0)) ax2 = plt.subplot2grid((2,1), (1,0), sharex=ax1) AZ_AK_12corr = pd.rolling_corr(HPI_data['AZ'], HPI_data['AK'], 12) HPI_data['AZ'].plot(ax=ax1, label="AZ HPI") HPI_data['AK'].plot(ax=ax1, label="AK HPI") ax1.legend(loc=4) AZ_AK_12corr.plot(ax=ax2) plt.show()
pd.options.display.max_columns = None print(main_df.head()) pickle_out = open('fiddy_states3.pickle', 'wb') pickle.dump(main_df, pickle_out) pickle_out.close() def HPI_Benchmark(): df = quandl.get('FMAC/HPI_USA', authtoken=api_key) df['Value'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0 return df fig = plt.figure() ax1 = plt.subplot2grid((2, 1), (0, 0)) ax2 = plt.subplot2grid((2, 1), (1, 0), sharex=ax1) HPI_data = pd.read_pickle('fiddy_states3.pickle') TX_AK_12correleation = pd.rolling_corr(HPI_date['TX'], HPI_data['AK'], 12) HPI_data['TX'].plot(ax=ax1, label='TX HPI') HPI_data['AK'].plot(ax=ax1, label='AK HPI') ax1.legend(loc=4) TX_AK_12corr.plot(ax=ax2, label='TK_AK_12') plt.legend(loc=2) plt.show()
for mon in xrange(0, len(ts1), 12): ann1.append(np.sum(ts1[mon:mon+12])) ann2.append(np.sum(ts2[mon:mon+12])) # calculate moving correlation # Mapping for the year x = np.arange(styr, edyr+1, 1.) y = np.arange(1., edyr-styr+1, 1.) X, Y = np.meshgrid(x, y) stat = np.empty((tstep-1, tstep)) stat.fill(np.nan) for wind in xrange(1, tstep): data1 = Series(ann1, index=dates) data2 = Series(ann2, index=dates) print pd.rolling_corr(data1, data2, window=wind) stat[wind-1, :] = pd.rolling_corr(data1, data2, window=wind) # tstep-wind-1 print wind # create figure fig = plt.figure(figsize=(12, 8), dpi=100, facecolor="white") font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 20} clevs = np.arange(-1.0, 1.1, 0.1) im = plt.contourf(X, Y, stat, clevs, cmap=plt.cm.jet) del clevs cb = plt.colorbar(im, ticks=np.arange(-1.0, 1.1, 0.2)) #, "right", size="5%", pad='2%', plt.xlabel("ENDING YEAR") plt.ylabel("WINDOW LENGTH") plt.title('CMIP5 Global Moving COR(%s,%s) %s-%s' % (forcing[i], forcing[j], str(styr), str(edyr)))
ret_data2 = ret_data2[~np.isnan(ret_data2)] model = pd.ols(y=ret_data2, x=ret_data1) plt.plot(ret_data1, ret_data2) ax = plt.axis() # grab axis values x = np.linspace(ax[0], ax[1] + 0.01) plt.plot(x, model.beta[1] + model.beta[0] * x, 'b', lw=2) plt.grid(True) plt.axis('tight') plt.xlabel('Nifty Returns') plt.ylabel('ACC Returns') np.correlate(ret_data1, ret_data2) pd.rolling_corr(ret_data1, ret_data2, window=252).plot(grid=True, style='b') # Augmented Dicky Fuller Test stock = web.DataReader('AMBUJACEM.NS', data_source='yahoo', start='4/4/2008', end='4/4/2016') x = stock['Close'] ret_stock = np.log(x / x.shift(1)) ret_stock = ret_stock[~np.isnan(ret_stock)] lag = 1
from convert_to_timeseries import convert_data_to_timeseries # Input file containing data input_file = 'data_timeseries.txt' # Load data data1 = convert_data_to_timeseries(input_file, 2) data2 = convert_data_to_timeseries(input_file, 3) dataframe = pd.DataFrame({'first': data1, 'second': data2}) # Print max and min print('\nMaximum:\n', dataframe.max()) print('\nMinimum:\n', dataframe.min()) # Print mean print('\nMean:\n', dataframe.mean()) print('\nMean row-wise:\n', dataframe.mean(1)[:10]) # Plot rolling mean pd.rolling_mean(dataframe, window=24).plot() # Print correlation coefficients print('\nCorrelation coefficients:\n', dataframe.corr()) # Plot rolling correlation plt.figure() pd.rolling_corr(dataframe['first'], dataframe['second'], window=60).plot() plt.show()
title('title') show() plot(gg, 'y') legend() show() #y = pd.rolling_mean(z,90) #figure() #plot(z, 'r') #xlabel('x') #ylabel('y') #title('REPO Index') #show() repo = repo[['CLOSE']] repo.columns = ['REPO'] #y = micex['CLOSE'] #mix = y.resample('Q', how='mean') s = kv2['Ошибки и пропуски'] / 1000 figure() dr = gg.values #plot(mix, 'g', label='MICEX') plot(gg.values, 'r', label='REPO') plot(s, 'y', label='ЧОП') f = pd.DataFrame({'NEO': s, 'REPO': dr}).plot() pd.rolling_corr(s, dr, window=5) f.legend(['REPO', 'NEO'], 'corr').plot(style='.') show() #repo_only.to_excel('D:\work\data\Репо.xlsx')
add_cls['Consecutive Up Days'] = ((add_cls['Daily Change'] - add_cls['Daily Change'].shift())>0).apply(lambda y : y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1)) # every object in add_clas dictionary is a data frame # so turn it into a panel and join it with the original Q=PNL.join( pd.Panel( add_cls ) ) # this section is for functions that return panels # they join with a simple panel.join call Q = Q.join( PNL.pct_change(periods = 1), how='inner', rsuffix=' Pct Change') # this one handles cases when a panel hasn't been fitted to some built-in yet # (this would probably be the function to wrap in a GenericWrapper for pipleine stuff) Q = Q.join(Q.apply( lambda x : pd.rolling_window( x, 5, 'gaussian', std=0.1) ), rsuffix=' Gaussian Mean') # lag a few days NUM_LAG_DAYS=3 Q = Q.join( [Q.shift(k).add_suffix(' Lag ' + str(k)) for k in range(1, NUM_LAG_DAYS+1)] ) # add some rolling correlation between series' Q = Q.join(pd.rolling_corr( Q['Daily Change'] , pairwise=True, window=5).transpose(2,0,1)) # add some rolling std Q = Q.join ( pd.Panel( { 'rolling std' : pd.rolling_std( Q['Daily Change'], 5 )} ) ) Q = Q.join ( Q.apply( lambda x : pd.rolling_std( x, 5 ) ), rsuffix=' rolling std' ) print Q.items.tolist() print Q.to_frame().unstack().head()
def ts_operation(df1, df2, n): return pd.rolling_corr(df1, df2, n)
pickle_out = open('fiddy_states3.pickle','wb') pickle.dump(main_df, pickle_out) pickle_out.close() print df.head() def HPI_Benchmark(): df = quandl.get("FMAC/HPI_USA", authtoken = api_key) df["Value"] = (df["Value"] -df["Value"][0] / df["Value"][0] * 100.0) return df #grab_initial_state_data() fig = plt.figure() ax1 = plt.subplot2grid((2,1),(0,0)) ax2 = plt.subplot2grid((2,1),(1,0), sharex = ax1) HPI_data = pd.read_pickle('fiddy_states3.pickle') TX_AK_12corr = pd.rolling_corr(HPI_data['TX'], HPI_data['AK'], 12) HPI_data['TX'].plot(ax = ax1, label= 'TX HPI') HPI_data['AK'].plot(ax = ax1, label= 'AK HPI') ax1.legend(loc = 4) TX_AK_12corr.plot(ax = ax2, label = 'TX_AK_12corr') plt.legend(loc = 4) plt.show()
close() close() fig,axes = plt.subplots(nrows=2,ncols=1,sharex=True,sharey=True,figsize=(12,7)) aapl_px.plot(style='k-',ax=axes[0]) ma60.plot(style='k--',ax=axes[0]) aapl_px.plot(style='k-',ax=axes[1]) ewma60.plot(style='k--',ax=axes[1]) axes[0].set_title('Simple MA') axes[1].set_title('Exponentially-weighted MA') spx_rets=spx_px/spx_px.shift(1)-1 spx_px = close_px_all['SPX'] spx_rets=spx_px/spx_px.shift(1)-1 returns=close_px.pct_change() corr=returns.AAPL.rolling(spx_rets,125,min_periods=100).corr() corr=returns.AAPL.rolling_corr(spx_rets,125,min_periods=100) corr=pd.rolling_corr(returns.AAPL,spx_rets,125,min_periods=100) corr=returns.AAPL.rolling(window=125,min_periods=100).corr(spx_rets) close() corr.plot() corr=returns.rolling(window=125,min_periods=100).corr(spx_rets) corr.plot() close() from scipy.stats import percentileofscore score_at_2percent = lambda x:percentileofscore(x,0.02) result = returns.AAPL.rolling(250).apply(score_at_2percent) result.plot() close() rng = pd.date_range('1/1/2000',periods=100000000,freq='10ms') ts=Series(np.random.randn(len(rng)),index=rng) rng = pd.date_range('1/1/2000',periods=10000000,freq='10ms') ts=Series(np.random.randn(len(rng)),index=rng)
def my_strat(symbol): spx_orders = define_bollingerband_SPX('$SPX', 20) spx_orders['Order']=spx_orders['Order'].replace('SELL','exitlong') spx_orders['Order']=spx_orders['Order'].replace('BUY','exitshort') #position_action_spx = pd.DataFrame(index=spx_orders.index, columns = ['Order']) #initialize the Orders Dataframe #position_action_spx=position_action_spx.fillna(spx_orders['Order']) #position_action_spx = pd.concat([position_action_spx, spx_orders['Order'].replace('SELL','exitlong')], axis=1) #position_action_spx = pd.concat([position_action_spx, spx_orders['Order'].replace('BUY','exitshort')], axis=1) # Import Orders into DataFrame (CURRENTLY HAS ALL DATES including non-trading) start_date = pd.to_datetime('12/31/07') #StartDate per Instructions end_date = pd.to_datetime('12/31/09') #EndDate per Instructions dates = pd.date_range(start_date, end_date) symbols = [symbol, '$SPX'] # Read in adjusted closing prices for given symbols, date range prices_all = get_data(symbols, dates) # automatically adds SPY prices = prices_all[[symbol]] # only portfolio symbols prices.columns = ['Price'] spx_prices = prices_all[['$SPX']] spx_prices.columns = ['Price'] # Compute SMA sma = pd.rolling_mean(prices, 20) sma.columns = ['SMA'] spx_sma = pd.rolling_mean(spx_prices, 10) spx_sma.columns = ['SMA'] # Compute Std Dev std_dev = pd.rolling_std(prices, 20) std_dev.columns = ['Standard Deviation'] spx_std_dev = pd.rolling_std(spx_prices, 10) spx_std_dev.columns = ['Standard Deviation'] # Calculate Bollinger Band Limits lower_bband = sma.subtract(2*std_dev.ix[:,0], axis=0) lower_bband.columns = ['Lower Band'] upper_bband = sma.add(2*std_dev.ix[:,0], axis=0) upper_bband.columns = ['Upper Band'] # Combine All Data into 1 dataframe data = pd.concat([prices, sma, lower_bband, upper_bband], axis = 1) # Compute 4 Statuses below_lower = pd.DataFrame(data['Price']<data['Lower Band'], columns = ['Below Lower']) #Low Points: Identify where Stock < Lower Band above_sma = pd.DataFrame(data['Price']>data['SMA'], columns = ['Above SMA']) #Mid Points: Identify where Stock > SMA above_upper = pd.DataFrame(data['Price']>data['Upper Band'], columns = ['Above Upper']) #High Points: Identify where Stock > Upper Band status = pd.concat([below_lower, above_sma, above_upper], axis = 1) status_shift = status.shift(1) #aka 'Yesterday' #BBStatuses BB = pd.DataFrame(index=data.index, columns=[symbol]) BB[symbol] = (prices['Price']-sma['SMA'])/(2*std_dev['Standard Deviation']) BB['$SPX'] = (spx_prices['Price']-spx_sma['SMA'])/(2*spx_std_dev['Standard Deviation']) corr = pd.rolling_corr(BB[symbol],BB['$SPX'], window=20) # Compute 4 Actions (get lazy and do iterator) position_action = pd.DataFrame(index=prices.index, columns = ['Order']) #initialize the Orders Dataframe #data['IBM']-data['IBM'].shift(1) #n compared to n-1 position_action[(status_shift['Below Lower']==True)&(status['Below Lower']==False)]='enterlong' #Enter Long: Yesterday Below Lower -> Today Above Lower position_action[(status_shift['Above SMA']==False)&(status['Above SMA']==True)]='exitlong' #Exit Long: Yesterday Below SMA -> Today Above SMA position_action[(status_shift['Above Upper']==True)&(status['Above Upper']==False)]='entershort' #Enter Short: Yesterday Above Upper -> Today Below Upper position_action[(status_shift['Above SMA']==True)&(status['Above SMA']==False)]='exitshort' #Exit Short: Yesterday Above SMA -> Today Below SMA position_action[((BB[symbol]-BB['$SPX'])>0.5) & (corr > 0.7)] = 'entershort' position_action[((BB[symbol]-BB['$SPX'])<0) & (corr > 0.7)] = 'exitshort' # position_action[(BB['$SPX']<-0.25) & (corr > 0.7)] = 'exitlong' # position_action[(BB['$SPX']>0.25) & (corr > 0.7)] = 'enterlong' position_action = position_action.dropna() entered_posn = 0 #0 = false, 1= long, -1=short #position_action=(pd.concat([spx_orders['Order'], position_action['Order']])).to_frame() position_action = position_action.sort_index() position_action = position_action.groupby(position_action.index).first() drops = pd.DataFrame(index=position_action.index, columns = ['change']) #initialize the Orders Dataframe for index, row in position_action.iterrows(): print index print row if entered_posn == 0: #calculate enters if (row[0] == 'enterlong'): entered_posn = 1 elif (row[0] == 'entershort'): entered_posn = -1 else: #exitlong or exitshort #position_action.drop(index) drops.loc[index] = 1 else: if (row[0] != 'exitshort') & (entered_posn == -1): drops.loc[index] = 1 elif (row[0] != 'exitlong') & (entered_posn == 1): drops.loc[index] = 1 else: #enterlong or entershort #position_action.drop(index) entered_posn = 0 print entered_posn print drops.loc[index] drops = drops.fillna(0) position_action = position_action[drops['change']==0] orders = pd.DataFrame(index=position_action.index, columns = [['Symbol', 'Order', 'Shares']]) orders.index.name = 'Date' orders['Symbol'] = symbol orders['Shares'] = 100 orders['Order'] = orders['Order'].fillna(position_action['Order']) orders['Order'] = orders['Order'].replace('entershort','SELL') orders['Order'] = orders['Order'].replace('enterlong','BUY') orders['Order'] = orders['Order'].replace('exitshort','BUY') orders['Order'] = orders['Order'].replace('exitlong','SELL') long_orders = position_action[(position_action['Order'].str.match('enterlong'))==True] short_orders = position_action[(position_action['Order'].str.match('entershort'))==True] exit_orders = pd.concat([position_action[(position_action['Order'].str.match('exitlong'))==True],position_action[(position_action['Order'].str.match('exitshort'))==True]], axis=0) orders.to_csv("./orders/orders.csv") # Plot the Data plot_data(data, long_orders, short_orders, exit_orders) return
alpha_Car = alpha_Car.set_index('Month') alpha_CAPM = alpha_CAPM.rename(columns=lambda x: str(x)[1:]) # beta_CAPM =beta_CAPM.rename(columns = lambda x : str(x)[1:]) alpha_FF5 = alpha_FF5.rename(columns=lambda x: str(x)[1:]) alpha_Car = alpha_Car.rename(columns=lambda x: str(x)[1:]) ########################################## Data Statistics ############################################### # Market Volatility using 1-year data sigma_m = marketret.rolling(window=12).std().rename( columns={'Market_Ret': 'Market_Vol'}) # Stock Volatility using 1-year data sigma = stockret.rolling(window=12).std() # Correlation between stockret and marketret using 5-year data corr = pd.DataFrame(index=stockret.index, columns=stockret.columns) for column in stockret: corr[column] = pd.rolling_corr(stockret[column], marketret, window=60) # Export intermediate calculations with open('calc.pkl', 'w') as f: pickle.dump([sigma_m, sigma], f) # Getting back the objects: # with open('calc.pkl') as f: # sigma_m, sigma= pickle.load(f) # Delete company that has no data at all times and delete times when no company has data corr = corr.dropna(axis=0, how='all').dropna(axis=1, how='all') alpha_CAPM = alpha_CAPM.dropna(axis=0, how='all').dropna(axis=1, how='all') # beta_CAPM = beta_CAPM.dropna(axis=0, how='all').dropna(axis=1, how='all') alpha_FF5 = alpha_FF5.dropna(axis=0, how='all').dropna(axis=1, how='all') alpha_Car = alpha_Car.dropna(axis=0, how='all').dropna(axis=1, how='all')
def calc_rolling_corr(self, reference, window=5): r = reference.pct_change() c = self.stock_raw['Adj Close'].pct_change() self.stock['rolling_corr'] = pd.rolling_corr(c, r, window) return self.stock
def corr(self, x, y, n): (x, y) = self._align_bivariate(x, y) return pd.rolling_corr(x, y, n)
def rolling_corr(self, data_frame1, periods, data_frame2=None, pairwise=False, flatten_labels=True): """Calculates rolling correlation wrapping around pandas functions Parameters ---------- data_frame1 : DataFrame contains time series to run correlations on periods : int period of rolling correlations data_frame2 : DataFrame (optional) contains times series to run correlation against pairwise : boolean should we do pairwise correlations only? Returns ------- DataFrame """ # this is the new bit of code here if pandas.__version__ < '0.17': if pairwise: panel = pandas.rolling_corr_pairwise( data_frame1.join(data_frame2), periods) else: panel = pandas.rolling_corr(data_frame1, data_frame2, periods) else: # panel = pandas.rolling_corr(data_frame1, data_frame2, periods, pairwise = pairwise) panel = data_frame1.rolling(window=periods).corr(other=data_frame2, pairwise=True) try: df = panel.to_frame(filter_observations=False).transpose() except: df = panel if flatten_labels: if pairwise: series1 = df.columns.get_level_values(0) series2 = df.columns.get_level_values(1) new_labels = [] for i in range(len(series1)): new_labels.append(series1[i] + " v " + series2[i]) else: new_labels = [] try: series1 = data_frame1.columns except: series1 = [data_frame1.name] series2 = data_frame2.columns for i in range(len(series1)): for j in range(len(series2)): new_labels.append(series1[i] + " v " + series2[j]) df.columns = new_labels return df
from convert_to_timeseries import convert_data_to_timeseries # Input file containing data input_file = 'data_timeseries.txt' # Load data data1 = convert_data_to_timeseries(input_file, 2) data2 = convert_data_to_timeseries(input_file, 3) dataframe = pd.DataFrame({'first': data1, 'second': data2}) # Print max and min print '\nMaximum:\n', dataframe.max() print '\nMinimum:\n', dataframe.min() # Print mean print '\nMean:\n', dataframe.mean() print '\nMean row-wise:\n', dataframe.mean(1)[:10] # Plot rolling mean pd.rolling_mean(dataframe, window=24).plot() # Print correlation coefficients print '\nCorrelation coefficients:\n', dataframe.corr() # Plot rolling correlation plt.figure() pd.rolling_corr(dataframe['first'], dataframe['second'], window=60).plot() plt.show()
def HPI_benchmark(): df= quandl.get('FMAC/HPI_USA', authtoken=api_key) df.columns = ['United_States'] df['United_States'] = ((df['United_States']-df['United_States'][0])/df['United_States'][0])*100 return df #grab_initial_state_data() fig = plt.figure() ax1 = plt.subplot2grid((2,1),(0,0)) ax2 = plt.subplot2grid((2,1),(1,0),sharex=ax1) HPI_data = pd.read_pickle('fiddy_states3.pickle') TX_AK_12corr = pd.rolling_corr(HPI_data['TX'],HPI_data['AK'],12) HPI_data['TX'].plot(ax=ax1,label='TX HPI') HPI_data['AK'].plot(ax=ax1,label='AK HPI') ax1.legend(loc=4) TX_AK_12corr.plot(ax=ax2,label='TX_AK_12corr') plt.legend(loc=4) plt.show()
df.head() df['Kurt'] = round(df[['Open', 'High', 'Low', 'Adj Close']].kurt(axis=1), 4) df.head() # Standard error of the mean df['Error'] = df[['Open', 'High', 'Low', 'Adj Close']].sem(axis=1) df.head() import talib as ta # Creating Indicators n = 5 df['RSI'] = ta.RSI(np.array(df['Adj Close'].shift(1)), timeperiod=n) df['SMA'] = pd.rolling_mean(df['Adj Close'].shift(1), window=n) df['Corr'] = pd.rolling_corr(df['SMA'], df['Adj Close'].shift(1), window=n) df['SAR'] = ta.SAR(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)), 0.2, 0.2) # Momemtum Indicator Functions df['ADX'] = ta.ADX(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)), np.array(df['Open'].shift(1)), timeperiod=n) df['ADXR'] = ta.ADXR(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)), np.array(df['Adj Close']), timeperiod=n) # df['APO']=ta.APO(np.array(df['Adj Close'].shift(1), fastperiod=12, slowperiod=26, matype=0)) df['AROON_DOWN'], df['AROON_UP'] = ta.AROON(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)),