Exemple #1
0
def rolling_statistics(data):
    ''' Calculates and plots rolling statistics (mean, std, correlation). '''
    plt.figure(figsize=(11, 8))
    
    plt.subplot(311)
    mr = pd.rolling_mean(data['returns'], 252) * 252
    mr.plot()
    plt.grid(True)
    plt.ylabel('returns (252d)')
    plt.axhline(mr.mean(), color='r', ls='dashed', lw=1.5)

    plt.subplot(312)
    vo = pd.rolling_std(data['returns'], 252) * math.sqrt(252)
    vo.plot()
    plt.grid(True)
    plt.ylabel('volatility (252d)')
    plt.axhline(vo.mean(), color='r', ls='dashed', lw=1.5)
    vx = plt.axis()

    plt.subplot(313)
    co = pd.rolling_corr(mr, vo, 252)
    co.plot()
    plt.grid(True)
    plt.ylabel('correlation (252d)')
    cx = plt.axis()
    plt.axis([vx[0], vx[1], cx[2], cx[3]])
    plt.axhline(co.mean(), color='r', ls='dashed', lw=1.5)
def correlation(sym1='LRCX', sym2='AAPL', bench='SPY'):
  # Pull the pricing data for our two stocks and S&P 500
  start = datetime.datetime(2013, 1, 1) # '2013-01-01'
  # end = '2015-01-01' # F**k that I make my own end date
  bench = pull_series(bench, start)
  a1 = pull_series(sym1, start)
  a2 = pull_series(sym2, start)
  # Do a simple plot
  fig = plt.figure()
  ax1 = fig.add_subplot(211)
  ax1.scatter(a1,a2)
  ax1.set_xlabel(sym1)
  ax1.set_ylabel(sym2)
  ax1.set_title('Stock prices from %i-%i-%i to today' %(start.day,
                                                        start.month,
                                                        start.year))
  print("Correlation coefficients")
  print("%s and %s: %.5f" %(sym1, sym2, np.corrcoef(a1,a2)[0,1]))
  print("%s and %s: %.5f" %(sym1, bench, np.corrcoef(a1,bench)[0,1]))
  print("%s and %s: %.5f" %(sym2, bench, np.corrcoef(bench,a2)[0,1]))
  # Get rolling correlation from pandas
  rolling_correlation = pd.rolling_corr(a1, a2, 60)
  ax2 = fig.add_subplot(222)
  ax2.plot(rolling_correlation)
  ax2.set_xlabel('Day')
  ax2.set_ylabel('60-day Rolling Correlation')
  # Get raw correlation
  X = np.random.rand(100)
  Y = X + np.random.poisson(size=100)
  ax3 = fig.add_subplot(223)
  ax3.scatter(X, Y)
  print('X-Y correlation coefficient: %.5f' %np.corrcoef(X, Y)[0, 1])
  plt.show()
  return np.corrcoef(X, Y)[0,1]
Exemple #3
0
    def computeCorrelations(self,data1,data2):

        correl = pandas.rolling_corr(data1,data2,window=20)
        data2['CORR'] = correl
        data2['stime'] = data2.index
        json_s = data2[['stime','svalue']].to_json(orient='values')
        return json_s
Exemple #4
0
 def get_rolling_corr(self, a, b, window=252, field='Adj Close', how='pct', 
                      plot=True):
     data = self.get_data([a, b], field, how)
     corr = pd.rolling_corr(data[a], data[b], window)
     if plot:
         corr.plot(grid=True, style='b')
         data.plot()
     return corr
def make_PCA_series(lfp, window, skip):
    # window is the length of the window to slide (in bins)
    # skip is the spacing between samples in the returned frame
    corrseries = pd.rolling_corr(lfp.dataframe, window=window)
    corrseries = corrseries.iloc[::skip, :, :]
    corrseries = corrseries.to_frame(filter_observations=False).transpose()
    lfp_pairs = physutils.LFPset(corrseries, meta=lfp.meta.copy())
    eigs = get_eigen_series(lfp_pairs)
    return eigs
def roll_corr(con_pan, var1, var2,window,plot=False):
    if plot == True:
        a = lag_plot(con_pan[var1])
        plt.show()
        autocorrelation_plot(con_pan[var1])
        plt.show()
    else:
        a =  pd.rolling_corr(con_pan[var1],con_pan[var2],window=window)
    return a
Exemple #7
0
    def buyAll(self, d_bond,d_equity,r_bond,r_equity):
        '''
        #this function solve the weight of spx in a buy all index to optimize a
        #backward looking sharpe ratio defined by total carry / vol in which
        #correlation is taken as last 100 days observation
        # d_equity: %ret of equity
        # d_bond: %ret of bond
        # r_equity - spx carry (dividend yield)
        # r_bond - ty carry (yield)
        # v_equity - spx variance
        # v_bond - ty variance
        # p - spx/ty correlation


        #result
        # x_IR - weight for maximizing IR
        # x_P - weight for minimizing variance assuming -50% constant correlation
        # x - average of the 2 above
        '''
        t=200
        t_s=30

        p=pd.rolling_corr(d_equity,d_bond,t)
        p=pd.ewma(p,halflife=t_s)

        p2 = pd.Series(-0.5, index=p.index)

        v_equity=pd.rolling_var(d_equity,t)
        v_bond=pd.rolling_var(d_bond,t)

        m=len(p)

        x_IR=p.copy()
        x_P=x_IR.copy()

        for i in range(0,m):

            f = lambda x, : -(x*r_equity[i]+(1-x)*r_bond[i])/np.sqrt((x**2*v_equity[i]+(1-x)**2*v_bond[i]+2*x*(1-x)*np.sqrt(v_equity[i]*v_bond[i])*p[i])*16)

            #fitting the data with fmin
            x0 = 0.2 # initial parameter value
            x1 = op.fminbound(f, 0.1,0.8,maxfun=100)
            x_IR[i]=x1
    
            #portfolio optimisation assuming a constant correlation of -50%
            f = lambda x, : -(x*r_equity[i]+(1-x)*r_bond[i])/np.sqrt((x**2*v_equity[i]+(1-x)**2*v_bond[i]+2*x*(1-x)*np.sqrt(v_equity[i]*v_bond[i])*p2[i])*16)

            # fitting the data with fmin
            x0 = 0.2 # initial parameter value
            x2 = op.fminbound(f, 0.1,0.8,maxfun=100)
            x_P[i]=x2
    
            w=(x_P+x_IR)/2
    
        return w
    def rolling_corr(self, data_frame1, periods, data_frame2 = None, pairwise = False, flatten_labels = True):
        """
        rolling_corr - Calculates rolling correlation wrapping around pandas functions

        Parameters
        ----------
        data_frame1 : DataFrame
            contains time series to run correlations on
        periods : int
            period of rolling correlations
        data_frame2 : DataFrame (optional)
            contains times series to run correlation against
        pairwise : boolean
            should we do pairwise correlations only?

        Returns
        -------
        DataFrame
        """

        panel = pandas.rolling_corr(data_frame1, data_frame2, periods, pairwise = pairwise)

        try:
            df = panel.to_frame(filter_observations=False).transpose()

        except:
            df = panel

        if flatten_labels:
            if pairwise:
                series1 = df.columns.get_level_values(0)
                series2 = df.columns.get_level_values(1)
                new_labels = []

                for i in range(len(series1)):
                    new_labels.append(series1[i] + " v " + series2[i])

            else:
                new_labels = []

                try:
                    series1 = data_frame1.columns
                except:
                    series1 = [data_frame1.name]

                series2 = data_frame2.columns

                for i in range(len(series1)):
                    for j in range(len(series2)):
                        new_labels.append(series1[i] + " v " + series2[j])

            df.columns = new_labels

        return df
Exemple #9
0
def rolling_corr_with_N225(stock, window=5):
    d1 = pd.read_csv("".join(["stock_", stock, ".csv"]), index_col=0, parse_dates=True)
    d2 = pd.read_csv("stock_N225.csv", index_col=0, parse_dates=True)
    s1 = d1.asfreq('B')['Adj Close'].pct_change().dropna()
    s2 = d2.asfreq('B')['Adj Close'].pct_change().dropna()
    rolling_corr = pd.rolling_corr(s1, s2, window).dropna()

    plt.figure()
    rolling_corr.plot()
    plt.savefig("".join(["corr_", stock, ".png"]))
    plt.close()

    return rolling_corr
def transform_df_ASneeded(df,F2scores,shift,stock_name):
    #df.replace(df['F2(8,20)'][df['F2(8,20)']<-5],-5)
    """transform curr_df here if you want running means, etc"""

    yVar = "ooRelRet(3days rolling mean)" 
    df[yVar] =pd.rolling_mean(df["ooRelRet"],3)
    shift  = -4
    df[yVar] = df[yVar].shift(shift)    
    
    df["ooRelRet(nextDay)"] = df["ooRelRet"].shift(-1)
      
    if 'barraBeta' in df.columns:
        df['barraBeta'] = df['barraBeta'].fillna(method='ffill')
        df['barraBeta'] = df['barraBeta'].fillna(method='bfill')
    else:
        print 'barraBeta not available in this set'
        
    df['corr_Prices20'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],20)
    df['corr_Prices8'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],8)
    df['corr_Ret'] = pd.rolling_corr(df['ooRawRet'],df['ooPoolRet'],40)

    
    newF2scores = []
    for f2score in F2scores:
        df[f2score][df[f2score] <-15] = -15
        df[f2score][df[f2score] > 15] = 15    
        
        name = f2score+'Exp'
        newF2scores.append(name)
        df[name] = df[f2score]*df['corr_Ret']
    
    xVar ='F2'    
    newF2scores = F2scores 
    print newF2scores

    #based on how you transformed df
    utils.dump_data_csv(df,stock_name,t_fn="t_"+stock_name+"_transformedDF_"+xVar+"_vs_"+yVar+".csv")
    return df,shift,xVar,yVar,newF2scores
Exemple #11
0
def transform_df_ASneeded(df,F2scores,shift,stock_name):
    #df.replace(df['F2(8,20)'][df['F2(8,20)']<-5],-5)
    """transform curr_df here if you want running means, etc"""

    shift  = -4
    days = 3
    df["ooRelRet("+str(days)+"D rolling mean)"] = pd.rolling_mean(df["ooRelRet"],days).shift(-(days+1))   
    
    df["ooRelRet(nextDay)"] = df["ooRelRet"].shift(-2)#technically this is incorrect but for the purposes of graphs to show the relationship between F2 and RelRet it is necessary
    yVar = "ooRelRet(3D rolling mean)"
    
    if 'barraBeta' in df.columns:
        df['barraBeta'] = df['barraBeta'].fillna(method='ffill')
        df['barraBeta'] = df['barraBeta'].fillna(method='bfill')
    else:
        print 'barraBeta not available in this set'
        
    df['corr_Prices20'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],20)
    df['corr_Prices8'] = pd.rolling_corr(df["ClosePrice"],df["EWPoolClose"],8)
    df['corr_Ret'] = pd.rolling_corr(df['ooRawRet'],df['ooPoolRet'],40)

    
    newF2scores = []
    for f2score in F2scores:
        df[f2score][df[f2score] <-15] = -15
        df[f2score][df[f2score] > 15] = 15    
        
        name = f2score+'Exp'
        newF2scores.append(name)
        df[name] = df[f2score]*df['corr_Ret']
    
    xVar ='F2'    
    newF2scores = F2scores 
    print newF2scores

    #based on how you transformed df
    #utils.dump_data_csv(df,stock_name,t_fn="t_"+stock_name+"_transformedDF_"+xVar+"_vs_"+yVar+".csv")
    return df,shift,xVar,yVar,newF2scores
Exemple #12
0
    def benchmark_correlation(self, window=90, bench='^GSPC'):
        '''
        
        Parameters
        ----------
        window : int
            Rolling window for which to calculate the estimator
        bins : int
            
        '''
        
        y = self._get_estimator(window)
        x = self._get_estimator(window, ticker=bench)
        date = y.index

        corr = pandas.rolling_corr(x, y, window)

        if self._type is "Skew" or self._type is "Kurtosis":
            f = lambda x: "%i" % round(x, 0)
        else:
            f = lambda x: "%i%%" % round(x*100, 0)
        
        '''
        Figure args
        '''
        
        fig = plt.figure(figsize=(8, 6))
        cones = plt.axes()
        
        '''
        Cones plot args
        '''
        
        # set the plots
        cones.plot(date, corr)
        
        # set the y-limits
        cones.set_ylim((corr.min() - 0.05, corr.max() + 0.05))
        
        # set and format the y-axis labels
        locs = cones.get_yticks()
        cones.set_yticklabels(map(f, locs))
        
        # turn on the grid
        cones.grid(True, axis='y', which='major', alpha=0.5)
        
        # set the title
        cones.set_title(self._type + ' (Correlation of ' + self._ticker + ' v. ' + bench.upper() + ', daily ' + self._start.strftime("%Y-%m-%d") + ' to ' + self._end.strftime("%Y-%m-%d") +  ')')
        
        return fig, plt
Exemple #13
0
def find_alphabeta():
    # Import stocks into DataFrame (CURRENTLY HAS ALL DATES including non-trading)
    start_date = pd.to_datetime('12/31/07') #StartDate per Instructions
    end_date = pd.to_datetime('12/31/09') #EndDate per Instructions
    dates = pd.date_range(start_date, end_date)
    #stocklist = list_of_symbols('2008')
    #data = get_data(stocklist[0].values.tolist(), dates)
    stocklist = ['YUM','AAPL']
    data = get_data(stocklist, dates)
    spx_data = get_data(['$SPX'],dates)
    # macd = pd.ewma((pd.ewma(data, span=12) - pd.ewma(data, span=26)), span=9)
    # harmonic_mean = 1/pd.rolling_mean(1/data,window=20)
    # harmonic_indicator = 100*(1-harmonic_mean/pd.rolling_mean(data, window=20))
    correlation = pd.rolling_corr(data, window=20)
    return
def analysis():
    """ A simple API endpoint to compare data from two sensors
        Example http://127.0.0.1:5000/api/stats/compare?a=sensoraname&b=sensorbname
    """

    if 'wotkit_token' in session:

        a = request.args.get('a')
        b = request.args.get('b')
        hours = int(request.args.get('hours'))
        
        if (a and b and hours):
            
            msph = 3600000 #milliseconds per hour
            result = defaultdict(dict)
            
            sensoraDataSeries = WotKitDataToSeries(WoTKitgetSensorData(a, msph*hours))
            sensorbDataSeries = WotKitDataToSeries(WoTKitgetSensorData(b, msph*hours))
           
            # Labels object
            result['labels'] = [`i`+"h" for i in range(1,hours)]

            # Sensor A object             
            sensoraDailyMeans = sensoraDataSeries.resample('H', how = 'mean')
            result['a']['mean'] = SeriesToList( sensoraDailyMeans )
            result['a']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensoraDailyMeans, 5) )
            result['a']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensoraDailyMeans, 5) )
            result['a']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensoraDailyMeans, 5) )
            result['a']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensoraDailyMeans, 5) )

            #Sensor B object         
            sensorbDailyMeans = sensorbDataSeries.resample('H', how = 'mean')
            result['b']['mean'] = SeriesToList(sensorbDailyMeans)
            result['b']['rolling_mean'] = SeriesToList( pd.rolling_mean(sensorbDailyMeans, 5) )
            result['b']['rolling_stdev'] = SeriesToList( pd.rolling_std(sensorbDailyMeans, 5) )
            result['b']['rolling_skewness'] = SeriesToList( pd.rolling_skew(sensorbDailyMeans, 5) )
            result['b']['rolling_kurtosis'] = SeriesToList( pd.rolling_kurt(sensorbDailyMeans, 5) )
            
            #Comparison object
            result['comparison']['correlation'] = SeriesToList( pd.rolling_corr(sensoraDailyMeans, sensorbDailyMeans, 5) )
            result['comparison']['covariance'] = SeriesToList( pd.rolling_cov(sensoraDailyMeans, sensorbDailyMeans, 5) )         
          
            json_response = json.dumps(result)

            return Response(json_response, content_type='application/json')
Exemple #15
0
 def test_ts_corr(self):
     self.env.add_operator('ts_corr', {
         'operator': OperatorTSCorr,
         'arg1': {'value': [3, 5]},
         })
     string1 = 'ts_corr(2, open1, open2)'
     gene1 = self.env.parse_string(string1)
     self.assertFalse(gene1.validate())
     string2 = 'ts_corr(5, open1, open2)'
     gene2 = self.env.parse_string(string2)
     self.assertTrue(gene2.validate())
     self.assertEqual(gene2.dimension, '')
     self.assertRaises(IndexError, gene2.eval, self.env, self.date1, self.date2)
     date1 = self.env.shift_date(self.date1, 4)
     df = pd.rolling_corr(self.env.get_data_value('open1'), self.env.get_data_value('open2'), 5).iloc[4:]
     self.assertTrue(
             frame_equal(
                 gene2.eval(self.env, date1, self.date2),
                 df)
             )
def Relative_Arbitrage ():

    sp500=web.DataReader('^GSPC', data_source='yahoo', start='1/1/2000', end='5/1/2015')
    F=web.DataReader('IBM', data_source='yahoo', start='1/1/2000', end='5/1/2015')
    Struct={'sp500':sp500['Close'],'F':F['Close']}  #define as dict
    DF=pd.DataFrame.from_dict(Struct)   #Convert Dict into dataframe (Pandas function)
    DF['3d Correl']=pd.rolling_corr(DF['sp500'],DF['F'],window=2)
    DF['sp500_Return']=np.log(DF['sp500']/DF['sp500'].shift(2))
    DF['F_Return']=np.log(DF['F']/DF['F'].shift(2))
    DF['Sp500 op']=0
    DF['F op']=0
    DF['Port Op']=0
    DF['Port Op']=np.where((DF['3d Correl']<-0.4),1,0)
    DF['Port Op']=np.where((DF['Port Op'].shift(1)==1)&(DF['3d Correl']<0.95),1,0)
    DF['sp500 op']=np.where((DF['Port Op']==1)&(DF['F_Return']>0.01),1,0)
    DF['F op']=np.where(DF['sp500 op']==1,-1,0)
    DF['sp500 1d return']=np.log(DF['sp500']/DF['sp500'].shift(1))
    DF['F 1d return']=np.log(DF['F']/DF['F'].shift(1))
    DF['Strategy']=DF['sp500 1d return']*DF['sp500 op'].shift(1)+DF['F 1d return']*DF['F op'].shift(1)
    DF[['sp500 1d return','Strategy']].cumsum().apply(np.exp).plot(grid=True,figsize=(8,5))
def density_pCorrCoef(dataSeriesOne, dataSeriesTwo):
    correlation = pd.rolling_corr(dataSeriesOne, dataSeriesTwo, window = 16, min_periods=10, center = True)
    return correlation.dropna()
ttl = ('Volume (' + co1 + ')')
plt.ylabel(ttl)
plt.show()

# Now process the data

# Calculate daily returns

firstCo_rets = firstCo['Close'].pct_change()
secondCo_rets = secondCo['Close'].pct_change()
thirdCo_rets = thirdCo['Close'].pct_change()
fourthCo_rets = fourthCo['Close'].pct_change()

#Calculate and plot one-year moving correlations

oneYrEnergy = pd.rolling_corr(firstCo_rets, secondCo_rets, 250).plot()
ttl = ('One Year Rolling Correlation between 2 ' + firstInd + ' Companies')
plt.title(ttl)
plt.show()
oneYrRetail = pd.rolling_corr(thirdCo_rets, fourthCo_rets, 250).plot()
ttl = ('One year Rolling Correlation between 2 ' + secondInd + ' Companies')
plt.title(ttl)
plt.show()

# Consider volatility
# Build a least-squares regression to model the dynamic relationship

import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.sandbox.regression.predstd import wls_prediction_std
import numpy as np
ax=plt.axis()
x=np.linspace(ax[0],ax[1]+0.01)
plt.plot(x,model.beta[1]+model.beta[0]*x,'b',lw=2)
plt.grid(True)
plt.axis('tight')
plt.xlabel('Euro STOXX 50 returns')
plt.ylabel('VSTOXX returns')

#Output correlation between 2 financial time series
rets.corr()
#           EUROSTOXX    VSTOXX
#EUROSTOXX   1.000000 -0.735117
#VSTOXX     -0.735117  1.000000

#Plot 252day rolling corelations
pd.rolling_corr(xdat,ydat,window=252).plot(grid=True,style='b')

#############################
#####High Frequency Data#####
#############################

url1='http://hopey.netfonds.no/posdump.php?'
url2='date=%s%s%s&paper=APPL.O&csv_format=csv'
url=url1+url2
year='2014'
month='09'
days =['22','23','24','25']

APPL=pd.DataFrame()
for day in days:
    print url % (year, month,day)
Exemple #20
0
    # print(100 * portfolio_returns.groupby(portfolio_returns.index.year).sum())
    # print(100 * np.sqrt(12) * portfolio_returns.groupby(portfolio_returns.index.year).std())
    # portfolio_returns.cumsum().plot()
    # plt.legend()
    # plt.grid()
    # plt.show()

    #correaltion Plot
    # plt.matshow(portfolio_returns.corr())
    # plt.xticks(range(len(portfolio_returns.columns)), portfolio_returns.columns)
    # plt.yticks(range(len(portfolio_returns.columns)), portfolio_returns.columns)
    # plt.colorbar()
    # plt.show()
    stats_df.rename()

    stats_df = stats_df[[
        'EW_GTAA', 'EW_GTAA_Universe', 'RiskWt_GTAA', 'RiskWt_GTAA_Universe',
        'MomoPortfoli_QO', 'MomoPortfolio_Q', '70/30_QO_MP/RW_GTAA',
        '70/30_QQQE/RW_GTAA_bm', '60/40_ACWI/AGG', 'S&P500'
    ]]
    # print(stats_df)
    tcor = pd.rolling_corr(portfolio_returns['RiskWt_GTAA'],
                           portfolio_returns['MomoPortfoli_QO'], 6)
    tcor.plot()
    plt.show()
    # ts1 = 100 * portfolio_returns.groupby(portfolio_returns.index.year).sum()
    # ts2 = 100 * np.sqrt(12) * portfolio_returns.groupby(portfolio_returns.index.year).std()
    # stats_df.to_csv("C:/Python27/Git/SMA_GTAA/Summary_Statistics.csv")
    # ts1.to_csv("C:/Python27/Git/SMA_GTAA/Return_Summary.csv")
    # ts2.to_csv("C:/Python27/Git/SMA_GTAA/Risk_Summary.csv")
Exemple #21
0
pd.rolling_mean(close_px,60).plot(logy=True)

fig,axes=plt.subplots(nrows=2,ncols=1,sharex=True,sharey=True,figsize=(12,7))
aapl_px=close_px.AAPL['2005':'2009']
ma60=pd.rolling_mean(aapl_px,50,min_periods=50)
ewma6=pd.ewma(aapl_px,span=60)
aapl_px.plot(style='k-',ax=axes[0])
ma60.plot(style='k--',ax=axes[0])
aapl_px.plot(style='k--',ax=axes[1])
ewma6.plot(style='k--',ax=axes[1])
axes[0].set_title('Simple MA')
axes[0].set_title('Exponentially-weighted MA')

spx_px=close_px_all['SPX']
spx_rets=spx_px/spx_px.shift(1)-1
returns=close_px.pct_change()
corr=pd.rolling_corr(returns.AAPL,spx_rets,125,min_periods=100)
corr.plot()
corr=pd.rolling_corr(returns,spx_rets,125,min_periods=100)
corr.plot()

from scipy.stats import percentileofscore
score_at_2percent=lambda x:percentileofscore(x,0.02)
result=pd.rolling_apply(returns.AAPL,250,score_at_2percent)
result.plot()

rng=pd.date_range('1/1/2000',periods=10000000,freq='10ms')
ts=pd.Series(np.random.randn(len(rng)),index=rng)
ts.resample('15min',how='ohlc')
%timeit ts.resample('15min',how='0hlc')
import quandl
import pandas as pd
import pickle
import matplotlib.pyplot as plt
from matplotlib import style
style.use('fivethirtyeight')

fig = plt.figure()
ax1 = plt.subplot2grid((2, 1), (0, 0))
ax2 = plt.subplot2grid((2, 1), (1, 0), sharex=ax1)
HPI_data2 = pd.read_pickle(
    'C:/Users/yadag/Desktop/PythonProgrammingPractice/DataAnalysis_with_Python/fiddy_states3.pickle'
)

TX_AK_Corr = pd.rolling_corr(HPI_data2['TX'], HPI_data2['AK'], 12)

HPI_data2['TX'].plot(ax=ax1, label='TX HPI')
HPI_data2['AK'].plot(ax=ax1, label='AK HPI')
TX_AK_Corr.plot(ax=ax2, label='TX_AK_Corr')

plt.legend(loc=4)
plt.show()
Exemple #23
0
"""

import json
import numpy as np
import pandas as pd
import pandas.io.data as web
import datetime as dt
from tornado.web import RequestHandler

START_DATE=dt.datetime(2000,1,1)
NAMES = ['AAPL','XOM','MSFT','JNJ','BRK.B','WFC','GE','PG','JPM','PFE']
symbols = pd.concat([web.get_data_yahoo(i, START_DATE)['Adj Close'] for i in NAMES],1)
symbols.columns = NAMES
symbols.index = [i.date() for i in list(symbols.index)]
symbols.index.names = ["date"]
panel_corr = pd.rolling_corr(symbols.pct_change(),21)
dates = np.array(map(lambda d: d.toordinal(), symbols.index))  

class StockHandler(RequestHandler):

    def get(self):
        self.write(symbols.to_csv())
        self.finish()

class CorrelationHandler(RequestHandler):

    encoder = json.JSONEncoder()

    def get_correlation(self,*date):
        f = lambda x: x[x<0][-1];
        find_date = lambda d,dates: list(np.argwhere(f((dates-dt.datetime(*d).toordinal()))==(dates-dt.datetime(*d).toordinal())).flat)[0]
df = pd.DataFrame(data)

print df
df.apply(np.mean, axis=1).head(3)

#passing a lambda is a common pattern
df.apply(lambda x: (x['Open'] - x['Close']), axis=1).head(3)
#define a more complex function
def percent_change(x):
    return (x['Open'] - x['Close']) / x['Open']

print df.apply(percent_change, axis=1).head(3)

#change axis, axis = 0 is default
print df.apply(np.mean, axis=0)

def greater_than_x(element, x):
    return element > x

print df.Open.apply(greater_than_x, args=(100,)).head(3)

#This can be used as in conjunction with subset capabilities
mask = df.Open.apply(greater_than_x, args=(100,))

print df.Open[mask].head()

print pd.rolling_apply(df.Close, 5, np.mean)

#There are actually a several built-in rolling functions
print pd.rolling_corr(df.Close, df.Open, 5)[:5]
    def rolling_corr(self,
                     data_frame1,
                     periods,
                     data_frame2=None,
                     pairwise=False,
                     flatten_labels=True):
        """
        rolling_ewma - Calculates exponentially weighted moving average

        Parameters
        ----------
        data_frame1 : DataFrame
            contains time series to run correlations on
        periods : int
            period of rolling correlations
        data_frame2 : DataFrame (optional)
            contains times series to run correlation against
        pairwise : boolean
            should we do pairwise correlations only?

        Returns
        -------
        DataFrame
        """

        panel = pandas.rolling_corr(data_frame1,
                                    data_frame2,
                                    periods,
                                    pairwise=pairwise)

        try:
            df = panel.to_frame(filter_observations=False).transpose()

        except:
            df = panel

        if flatten_labels:
            if pairwise:
                series1 = df.columns.get_level_values(0)
                series2 = df.columns.get_level_values(1)
                new_labels = []

                for i in range(len(series1)):
                    new_labels.append(series1[i] + " v " + series2[i])

            else:
                new_labels = []

                try:
                    series1 = data_frame1.columns
                except:
                    series1 = [data_frame1.name]

                series2 = data_frame2.columns

                for i in range(len(series1)):
                    for j in range(len(series2)):
                        new_labels.append(series1[i] + " v " + series2[j])

            df.columns = new_labels

        return df
import QSTK.qstkutil.qsdateutil as du
import QSTK.qstkutil.tsutil as tsu
import datetime as dt
import pandas as pd
import numpy as np

dt_start = dt.datetime(2006, 1, 1)
dt_end = dt.datetime(2011, 12, 31)
ldt_timestamps = du.getNYSEdays(dt_start, dt_end, dt.timedelta(hours=16))

dataobj = da.DataAccess('Yahoo', cachestalltime=24)
ls_symbols = dataobj.get_symbols_from_list('sp5002012')
ls_symbols.append('SPY')
ls_keys = ['open', 'high', 'low', 'close', 'volume', 'actual_close']
ldf_data = dataobj.get_data(ldt_timestamps, ls_symbols, ls_keys)
d_data = dict(zip(ls_keys, ldf_data))

for s_key in ls_keys:
    d_data[s_key] = d_data[s_key].fillna(method='ffill')
    d_data[s_key] = d_data[s_key].fillna(method='bfill')
    d_data[s_key] = d_data[s_key].fillna(1.0)

close_px_AAPL = d_data['close']['AAPL']
close_px_MSFT = d_data['close']['MSFT']
close_px_XOM = d_data['close']['XOM']
returns = d_data['close'] / d_data['close'].shift(1) - 1

aapl_std250 = pd.rolling_std(close_px_AAPL, 250, min_periods=10)
aapl_std250.plot()
corr = pd.rolling_corr(returns['AAPL'], returns['SPY'], 125, min_periods=100)
Exemple #27
0
def correlation_study(file='challenge-data-v2.csv'):
    sns.set_style("whitegrid")

    # *****************************************************
    # Loading the data
    data = pd.read_csv('challenge-data-v2.csv',
                       index_col='event_date',
                       parse_dates=True)

    # *****************************************************
    # Changing the name of the colums to a more suitable (shorter) form
    data.columns = ['sups', 'moff', 'mon', 'hd']

    # *****************************************************
    # Scaling the values to work in a more suitable way (avoiding super-high values)
    for feature in data.columns:
        if data[feature].values.dtype != 'object':
            scale_factor = np.round(np.log10(np.mean(data[feature])))
            data[feature] = data[feature] / 10**scale_factor
            print Fore.BLUE + 'The feature: ' + feature + ' was rescaled by a factor of ' + str(
                10**scale_factor)

    print 'IMPORTANT: Take these rescaling in account when reading the plots'
    print(Style.RESET_ALL)

    # *****************************************************
    # Computation of the distributions per year for the different features
    years = np.unique(data.index.year)

    # Definition of the figure.
    figid = plt.figure('Temporal distributions by year', figsize=(20, 10))

    # Definition of the matrix of plots.
    col = len(data.columns[data.columns != 'hd'])
    gs = grds.GridSpec(3, col)

    for i in range(col):

        ax1 = plt.subplot(gs[0, i])
        ax2 = plt.subplot(gs[1, i])

        legend = []
        for y in years:
            dat = data[data.columns[i]][str(y)].values
            ax1.plot(np.arange(len(dat)), dat, '-')
            legend.append(str(y))
        ax1.legend(legend)
        ax1.set_title('Daily ' + data.columns[i])

        legend = []
        for y in years:
            dat = data[data.columns[i]][str(y)].resample('M').values
            ax2.plot(np.arange(len(dat)) + 1, dat, '-o')
            legend.append(str(y))
        ax2.legend(legend)
        ax2.set_title('Monthly ' + data.columns[i])
        plt.xlim([1, 12])

        ax3 = plt.subplot(gs[2, i])
        legend = []
        for y in years:
            dat = data[data.columns[i]][str(y)]
            #         dat = dat.groupby(data.index.dayofweek).mean()
            dat = dat.groupby(dat.index.dayofweek).mean()
            dat.index = ['Mon', 'Tues', 'Wed', 'Thurs', 'Fri', 'Sat', 'Sun']
            dat.plot(style='-o')
            legend.append(str(y))
        ax3.legend(legend)
        ax3.set_title('Day week ' + data.columns[i])

    # *****************************************************
    # Computation of the distribution of activity per month for the different time series that are present in the data
    years = np.unique(data.index.year)
    # Definition of the figure.
    figid = plt.figure('Monthly distribution of features', figsize=(20, 10))

    # Definition of the matrix of plots. In this case the situation is more complex that is why I need to define a
    # matrix. It will be a dim[2x3] matrix.
    col = len(data.columns[data.columns != 'hd'])
    rows = len(years)
    gs = grds.GridSpec(rows, col)
    months = [
        'Jan', 'Feb', 'Mar', 'Aprl', 'May', 'Jun', 'Jul', 'Agst', 'Sep', 'Oct',
        'Nov', 'Dec'
    ]
    colors = sns.hls_palette(12, l=.5, s=.6)

    for c in range(col):
        for r in range(rows):
            ax1 = plt.subplot(gs[r, c])

            dat_year = data[data.columns[c]][str(years[r])]

            for m in range(1, 13):
                dat = dat_year[dat_year.index.month == m].values
                ax1.plot(np.arange(len(dat)), dat, '-', color=colors[m - 1])
            if r == 0 and c == col - 1:
                ax1.legend(months,
                           bbox_to_anchor=(1, 1),
                           loc='upper left',
                           borderaxespad=0.,
                           ncol=2,
                           fancybox=True,
                           frameon=True)

            if c == 0:
                ax1.set_ylabel('Year: ' + str(years[r]))

            if r == 0:
                ax1.set_title('Feature: ' + str(data.columns[c]))

    # *****************************************************
    # Computation of the distribution of cross correlations per month for the different time series that are present in the data
    years = np.unique(data.index.year)

    # Definition of the figure.
    figid = plt.figure('Monthly features cross-correlations', figsize=(20, 10))

    # Definition of the matrix of plots.
    feature1 = [0, 0, 2]
    feature2 = [2, 1, 1]

    col = len(data.columns[data.columns != 'hd'])
    rows = len(years)

    gs = grds.GridSpec(rows, col)
    months = [
        'Jan', 'Feb', 'Mar', 'Aprl', 'May', 'Jun', 'Jul', 'Agst', 'Sep', 'Oct',
        'Nov', 'Dec'
    ]
    # colors = sns.color_palette("Set2", 12)
    colors = sns.hls_palette(12, l=.5, s=.6)

    for c in range(col):
        for r in range(rows):
            ax1 = plt.subplot(gs[r, c])

            dat_year_feat1 = data[data.columns[feature1[c]]][str(years[r])]
            dat_year_feat2 = data[data.columns[feature2[c]]][str(years[r])]

            for m in range(1, 13):
                dat_feat1 = dat_year_feat1[dat_year_feat1.index.month ==
                                           m].values
                dat_feat1 = np.subtract(dat_feat1, np.mean(dat_feat1))
                dat_feat2 = dat_year_feat2[dat_year_feat2.index.month ==
                                           m].values
                dat_feat2 = np.subtract(dat_feat2, np.mean(dat_feat2))
                dat = sgn.correlate(dat_feat1, dat_feat2, mode='same')
                ax1.plot(np.linspace(-15, 15, len(dat)),
                         dat,
                         '-',
                         color=colors[m - 1])
            if c == 0:
                ax1.set_ylabel('Year: ' + str(years[r]))
            if r == 0 and c == col - 1:
                ax1.legend(months,
                           bbox_to_anchor=(1, 1),
                           loc='upper left',
                           borderaxespad=0.,
                           ncol=2,
                           fancybox=True,
                           frameon=True)
            if r == 0:
                ax1.set_title('Xcorr: ' + str(data.columns[feature1[c]]) +
                              ' and ' + str(data.columns[feature2[c]]))

    # *****************************************************
    # Computation of the distribution of activity per month for the different time series that are present in the data
    years = np.unique(data.index.year)

    # Definition of the matrix of plots.
    feature1 = [0, 0, 2]
    feature2 = [2, 1, 1]

    for f in range(len(feature1)):

        figid = plt.figure(
            'Rolling correlation coefficient and weekly activity of ' +
            data.columns[feature1[f]] + ' and ' + data.columns[feature2[f]],
            figsize=(20, 10))
        rows = len(years)

        gs = grds.GridSpec(4, 4)

        for r in range(rows):
            ax1 = plt.subplot(gs[0, :])
            ax2 = plt.subplot(gs[1, :])
            ax3 = plt.subplot(gs[2, :])

            dat_year_feat1 = data[data.columns[feature1[f]]][str(years[r])]
            dat_year_feat2 = data[data.columns[feature2[f]]][str(years[r])]

            ref = ax1.plot(dat_year_feat1.resample('W'))
            ax2.plot(dat_year_feat2.resample('W'))

            xcorr = pd.rolling_corr(dat_year_feat1, dat_year_feat2, 14)
            ax3.plot(xcorr)

            ax4 = plt.subplot(gs[3, r])
            n, bins, patches = ax4.hist(xcorr.values[np.logical_not(
                np.isnan(xcorr.values))],
                                        bins=np.round(len(xcorr) / 6),
                                        facecolor=ref[0].get_color(),
                                        edgecolor=ref[0].get_color())
            mediana = ax4.axvline(np.median(xcorr.values[np.logical_not(
                np.isnan(xcorr.values))]),
                                  color='r',
                                  linestyle='--')
            ax4.set_xlim([-1, 1])
            ax4.set_xlabel('CorrCoef year ' + str(years[r]))
            ax4.set_label(mediana)

            print '-------------------------------------------------'
            print 'Correlation distribution for year ' + str(years[r])
            print 'Mean:', xcorr.mean()
            print 'Median:', xcorr.median()
            print 'Standard deviation:', xcorr.std()
            print 'Kurtosis:', xcorr.kurtosis(
            )  # Kurtosis is mainly related with outliers not with the central peak
            print 'Skewness:', xcorr.skew(
            )  #Take in account that this value has not the substraction of the skew of a normal distribution (3)

            if (np.abs(xcorr.skew())) < 0.65:
                mu, sigma = stat.norm.fit(xcorr.values[np.logical_not(
                    np.isnan(xcorr.values))])
                print 'Normal distribution fitted!'
                print 'mu=' + str(mu)
                print 'sigma=' + str(sigma)

                fitted_normal = mlab.normpdf(bins, mu, sigma) * np.max(
                    xcorr.values[np.logical_not(np.isnan(xcorr.values))])
                # print fitted_normal
                normfit = ax4.plot(bins,
                                   fitted_normal * np.max(n),
                                   'r--',
                                   linewidth=2,
                                   color="#3498db")
                ax4.legend([
                    'Median ' + str(round(xcorr.median(), 2)),
                    'N(' + str(round(mu, 1)) + ',' + str(round(sigma, 2)) + ')'
                ],
                           loc='best')
            else:
                ax4.legend(['Median ' + str(round(xcorr.median(), 2))],
                           loc='best')

        ax1.set_ylabel(data.columns[feature1[f]])
        ax2.set_ylabel(data.columns[feature2[f]])
        ax3.set_ylabel('Rolling correlation, 14 days period')

        ax1.legend(years,
                   bbox_to_anchor=(1, 1),
                   loc='upper left',
                   borderaxespad=0.,
                   ncol=1,
                   fancybox=True,
                   frameon=True)

    if file == 'challenge-data-v2.csv':
        conclusions = ""
    return True
Exemple #28
0
    def corr(self):
        temp = pd.concat([
            self.underlyingYieldRate_5,
            self.ADV_20(20),
            self.ADV_20(15),
            self.ADV_20(10),
            self.ADV_20(5),
            self.TurnOver_20(20),
            self.TurnOver_20(15),
            self.TurnOver_20(10),
            self.RSI_20(20),
            self.RSI_20(15),
            self.RSI_20(10),
            self.RSI_20(5),
            self.RE_20(20),
            self.RE_20(15),
            self.RE_20(10),
            self.RE_20(5),
            self.VMC(),
            self.LDECC_5()
        ],
                         axis=1)
        temp = pd.DataFrame(
            np.matrix(temp),
            index=temp.index,
            columns=[
                'underlyingYieldRate_5', 'ADV_20(20)', 'ADV_20(15)',
                'ADV_20(10)', 'ADV_20(5)', 'TurnOver_20(20)',
                'TurnOver_20(15)', 'TurnOver_20(10)', 'RSI_20(20)',
                'RSI_20(15)', 'RSI_20(10)', 'RSI_20(5)', 'RE_20(20)',
                'RE_20(15)', 'RE_20(10)', 'RE_20(5)', 'VMC()', 'LDECC_5()'
            ])
        #temp=pd.concat([self.yield_rate,self.ADV_20(20),self.TurnOver_20(20),self.RSI_20(20),self.RE_20(20),self.VMC(),self.LDECC_5()],axis=1)
        ADV_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                      temp['ADV_20(20)'], 10)
        ADV_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                      temp['ADV_20(15)'], 10)
        ADV_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                      temp['ADV_20(10)'], 10)
        ADV_5_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                     temp['ADV_20(5)'], 10)

        TurnOver_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                           temp['TurnOver_20(20)'], 10)
        TurnOver_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                           temp['TurnOver_20(15)'], 10)
        TurnOver_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                           temp['TurnOver_20(10)'], 10)

        RSI_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                      temp['RSI_20(20)'], 10)
        RSI_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                      temp['RSI_20(15)'], 10)
        RSI_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                      temp['RSI_20(10)'], 10)
        RSI_5_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                     temp['RSI_20(5)'], 10)

        RE_20_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                     temp['RE_20(20)'], 10)
        RE_15_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                     temp['RE_20(15)'], 10)
        RE_10_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                     temp['RE_20(10)'], 10)
        RE_5_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                    temp['RE_20(5)'], 10)

        VMC_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                   temp['VMC()'], 10)
        LDECC_corr = pd.rolling_corr(temp['underlyingYieldRate_5'],
                                     temp['LDECC_5()'], 10)
        corr = pd.concat([
            ADV_20_corr, ADV_15_corr, ADV_10_corr, ADV_5_corr,
            TurnOver_20_corr, TurnOver_15_corr, TurnOver_10_corr, RSI_20_corr,
            RSI_15_corr, RSI_10_corr, RSI_5_corr, RE_20_corr, RE_15_corr,
            RE_10_corr, RE_5_corr, VMC_corr, LDECC_corr
        ],
                         axis=1).dropna()
        self.corr = pd.DataFrame(np.matrix(corr),
                                 index=corr.index,
                                 columns=self.func_name)
# ===============================================
# HPI_data['NY_12MSTD_old'] = pd.rolling_std(HPI_data['NY'], 12) # old way
# HPI_data['NY_12MSTD'] = HPI_data['NY'].rolling(window=12, center=False).std()

# print(HPI_data[['NY', 'NY_12MSTD', 'NY_12MSTD_old']].tail(9))
# print(HPI_data[['NY', 'NY_12MSTD']].tail(9))


# Nice, but standard deviation is a totally different scale. plotting the two normally isn't helpful

# Plot stuff:
# HPI_data[['NY', 'NY_12MSTD']].plot(ax=ax1)

# So we graph it on a different graph.
# HPI_data['NY_12MSTD'].plot(ax=ax2)


# Get a rolling correlation b/w NY and the US Benchmark for 12 months:
# ===============================================
HPI_data['NY_12MCOR_TO_MEAN'] = pd.rolling_corr(HPI_data['NY'], HPI_data['USA_AVE'], 12) #old way
HPI_data['NY_12MCOR_TO_MEAN'] = HPI_data['NY'].rolling(window=12).corr(other=HPI_data['USA_AVE'])

print(HPI_data[['NY', 'USA_AVE', 'NY_12MCOR_TO_MEAN']])
HPI_data['NY'].plot(ax=ax1, label='NY_HPI')
HPI_data['USA_AVE'].plot(ax=ax1, label='US_AVE')
HPI_data['NY_12MCOR_TO_MEAN'].plot(ax=ax2, label='NY-US_AVE-Correlation')
ax1.legend(loc=2)

plt.legend(loc=4)
plt.show()
Exemple #30
0
    f = plt.figure(figsize=(12, 8))
    ax = f.add_subplot(111)
    stats.probplot(aapl, dist="norm", plot=ax)
    plt.show()

    # Volatility
    min_periods = 75
    vol = pd.rolling_std(daily_pct_change, min_periods) * np.sqrt(min_periods)
    vol.plot(figsize=(10, 8))

    # Correlation - fixed, over the whole period
    daily_pct_change.corr()
    # - fixed, on a subset
    daily_pct_change["2012":"2013"].corr()
    # - rolling, on an year period, on 2 stocks
    rolling_corr = pd.rolling_corr(daily_pct_change["AAPL"], daily_pct_change["MSFT"], window=252).dropna()

    # monthly calculations
    aaplM = aapl.resample("M", how="last")
    aaplMpct_chg = aaplM.pct_change()
    aaplMpct_chg.hist(bins=50, figsize=(12, 8))
    aaplMpct_chg.describe(percentiles=[0.025, 0.5, 0.975])

    # ----- get some info on more exchanged stocks -----
    volumes = all_data[["Volume"]].reset_index()
    daily_volume = volumes.pivot("Date", "Ticker", "Volume")
    vol_mean = pd.DataFrame(daily_volume.mean(), columns=["vol_mean"])
    more_vol = vol_mean.sort_index(by="vol_mean", ascending=False)

    it_more_vol_tickers = more_vol[:12].index.tolist()  # todo: > valore medio
Exemple #31
0
ax2=sm.graphics.tsa.plot_acf(df_last['log_returns_min'],lags=10,ax=ax2)
ax3=fig.add_subplot(133)
ax3=sm.graphics.tsa.plot_pacf(df_last['log_returns_min'],lags=30,ax=ax3)


# In[ ]:

ACF0=sm.tsa.stattools.acf(df_last['log_returns_min'], nlags=14)
#ACF0.plot()
plt.bar(np.arange(14),ACF0[1:15])


# In[ ]:

window=500
aa=pd.rolling_corr(df_last['log_returns_min'],df_last['log_returns_min'].shift(1),window)

fig=plt.figure(figsize=(18,6))
fig.suptitle('Minute by Minute Returns Rolling Correlation ( %s minutes)' %(window),y=1.05,fontsize=20)
ax1 = fig.add_subplot(121)
ax1=plt.hist(aa.ix[window:])
ax1=plt.axvline(x=0,color='black')
ax2=fig.add_subplot(122)
ax2=aa.plot()
ax2=plt.axhline(y=0,color='black')


# In[ ]:

minute_lag=np.zeros((len(df_last),7))
for i in range(0,7):
def ts_corrFn(df, col1, col2, min_periods, max_periods):
    if not (max_periods): max_periods = len(df[col1])
    return pd.rolling_corr(df[col1],
                           df[col2],
                           max_periods,
                           min_periods=min_periods)
Exemple #33
0
"""
Another interesting visualization would be to compare the Texas HPI to the overall HPI. 
Then do a rolling correlation between the two of them. The assumption would be that when 
correlation was falling, there would soon be a reversion.

Every time correlation drops, you should in theory sell property in the are that is rising, 
and then you should buy property in the area that is falling. The idea is that, these two 
areas are so highly correlated that we can be very confident that the correlation will 
eventually return back to about 0.98. As such, when correlation is -0.5, we can be very 
confident in our decision to make this move, as the outcome can be one of the following:

HPI forever diverges like this and never returns (unlikely), the falling area rises up 
to meet the rising one, in which case we win, the rising area falls to meet the other 
falling one, in which case we made a great sale, or both move to re-converge, 
in which case we definitely won out.
"""

fig = plt.figure()
ax1 = plt.subplot2grid((2,1), (0,0))
ax2 = plt.subplot2grid((2,1), (1,0), sharex=ax1)

AZ_AK_12corr = pd.rolling_corr(HPI_data['AZ'], HPI_data['AK'], 12)

HPI_data['AZ'].plot(ax=ax1, label="AZ HPI")
HPI_data['AK'].plot(ax=ax1, label="AK HPI")
ax1.legend(loc=4)

AZ_AK_12corr.plot(ax=ax2)

plt.show()
Exemple #34
0
    pd.options.display.max_columns = None
    print(main_df.head())

    pickle_out = open('fiddy_states3.pickle', 'wb')
    pickle.dump(main_df, pickle_out)
    pickle_out.close()


def HPI_Benchmark():
    df = quandl.get('FMAC/HPI_USA', authtoken=api_key)
    df['Value'] = (df['Value'] - df['Value'][0]) / df['Value'][0] * 100.0
    return df


fig = plt.figure()
ax1 = plt.subplot2grid((2, 1), (0, 0))
ax2 = plt.subplot2grid((2, 1), (1, 0), sharex=ax1)

HPI_data = pd.read_pickle('fiddy_states3.pickle')

TX_AK_12correleation = pd.rolling_corr(HPI_date['TX'], HPI_data['AK'], 12)

HPI_data['TX'].plot(ax=ax1, label='TX HPI')
HPI_data['AK'].plot(ax=ax1, label='AK HPI')
ax1.legend(loc=4)

TX_AK_12corr.plot(ax=ax2, label='TK_AK_12')

plt.legend(loc=2)
plt.show()
		for mon in xrange(0, len(ts1), 12):
			ann1.append(np.sum(ts1[mon:mon+12]))
			ann2.append(np.sum(ts2[mon:mon+12]))
		# calculate moving correlation
		# Mapping for the year
		x = np.arange(styr, edyr+1, 1.)
		y = np.arange(1., edyr-styr+1, 1.)
		X, Y = np.meshgrid(x, y)

		stat = np.empty((tstep-1, tstep))
		stat.fill(np.nan)

		for wind in xrange(1, tstep):
			data1 = Series(ann1, index=dates)
			data2 = Series(ann2, index=dates)
			print pd.rolling_corr(data1, data2, window=wind)
			stat[wind-1, :] = pd.rolling_corr(data1, data2, window=wind)  # tstep-wind-1
			print wind


		# create figure
		fig = plt.figure(figsize=(12, 8), dpi=100, facecolor="white")
		font = {'family': 'serif', 'color': 'darkred', 'weight': 'normal', 'size': 20}

		clevs = np.arange(-1.0, 1.1, 0.1)
		im = plt.contourf(X, Y, stat, clevs, cmap=plt.cm.jet)
		del clevs
		cb = plt.colorbar(im, ticks=np.arange(-1.0, 1.1, 0.2)) #, "right", size="5%", pad='2%',
		plt.xlabel("ENDING YEAR")
		plt.ylabel("WINDOW LENGTH")
		plt.title('CMIP5 Global Moving COR(%s,%s) %s-%s' % (forcing[i], forcing[j], str(styr), str(edyr)))
ret_data2 = ret_data2[~np.isnan(ret_data2)]

model = pd.ols(y=ret_data2, x=ret_data1)

plt.plot(ret_data1, ret_data2)
ax = plt.axis()  # grab axis values
x = np.linspace(ax[0], ax[1] + 0.01)
plt.plot(x, model.beta[1] + model.beta[0] * x, 'b', lw=2)
plt.grid(True)
plt.axis('tight')
plt.xlabel('Nifty Returns')
plt.ylabel('ACC Returns')

np.correlate(ret_data1, ret_data2)

pd.rolling_corr(ret_data1, ret_data2, window=252).plot(grid=True, style='b')

# Augmented Dicky Fuller Test

stock = web.DataReader('AMBUJACEM.NS',
                       data_source='yahoo',
                       start='4/4/2008',
                       end='4/4/2016')

x = stock['Close']

ret_stock = np.log(x / x.shift(1))

ret_stock = ret_stock[~np.isnan(ret_stock)]

lag = 1
from convert_to_timeseries import convert_data_to_timeseries

# Input file containing data
input_file = 'data_timeseries.txt'

# Load data
data1 = convert_data_to_timeseries(input_file, 2)
data2 = convert_data_to_timeseries(input_file, 3)
dataframe = pd.DataFrame({'first': data1, 'second': data2})

# Print max and min
print('\nMaximum:\n', dataframe.max())
print('\nMinimum:\n', dataframe.min())

# Print mean
print('\nMean:\n', dataframe.mean())
print('\nMean row-wise:\n', dataframe.mean(1)[:10])

# Plot rolling mean
pd.rolling_mean(dataframe, window=24).plot()

# Print correlation coefficients
print('\nCorrelation coefficients:\n', dataframe.corr())

# Plot rolling correlation
plt.figure()
pd.rolling_corr(dataframe['first'], dataframe['second'], window=60).plot()

plt.show()
title('title')
show()

plot(gg, 'y')
legend()
show()
#y = pd.rolling_mean(z,90)
#figure()
#plot(z, 'r')
#xlabel('x')
#ylabel('y')

#title('REPO Index')
#show()
repo = repo[['CLOSE']]
repo.columns = ['REPO']

#y = micex['CLOSE']
#mix = y.resample('Q', how='mean')
s = kv2['Ошибки и пропуски'] / 1000
figure()
dr = gg.values
#plot(mix, 'g', label='MICEX')
plot(gg.values, 'r', label='REPO')
plot(s, 'y', label='ЧОП')
f = pd.DataFrame({'NEO': s, 'REPO': dr}).plot()
pd.rolling_corr(s, dr, window=5)
f.legend(['REPO', 'NEO'], 'corr').plot(style='.')
show()

#repo_only.to_excel('D:\work\data\Репо.xlsx')
add_cls['Consecutive Up Days'] = ((add_cls['Daily Change'] - add_cls['Daily Change'].shift())>0).apply(lambda y : y * (y.groupby((y != y.shift()).cumsum()).cumcount() + 1))                                                                                                        

# every object in add_clas dictionary is a data frame
# so turn it into a panel and join it with the original
Q=PNL.join( pd.Panel( add_cls ) )

# this section is for functions that return panels
# they join with a simple panel.join call
Q = Q.join( PNL.pct_change(periods = 1), how='inner', rsuffix=' Pct Change')

# this one handles cases when a panel hasn't been fitted to some built-in yet
# (this would probably be the function to wrap in a GenericWrapper for pipleine stuff)

Q = Q.join(Q.apply( lambda x : pd.rolling_window( x, 5, 'gaussian', std=0.1) ), rsuffix=' Gaussian Mean')

# lag a few days
NUM_LAG_DAYS=3

Q = Q.join( [Q.shift(k).add_suffix(' Lag ' + str(k)) for k in range(1, NUM_LAG_DAYS+1)]   )

# add some rolling correlation between series'
Q = Q.join(pd.rolling_corr( Q['Daily Change'] , pairwise=True, window=5).transpose(2,0,1))

# add some rolling std
Q = Q.join ( pd.Panel( { 'rolling std' : pd.rolling_std( Q['Daily Change'], 5 )} ) )

Q = Q.join ( Q.apply( lambda x : pd.rolling_std( x, 5 )  ), rsuffix=' rolling std' )

print Q.items.tolist()

print Q.to_frame().unstack().head()
Exemple #40
0
 def ts_operation(df1, df2, n):
     return pd.rolling_corr(df1, df2, n)
    pickle_out = open('fiddy_states3.pickle','wb')
    pickle.dump(main_df, pickle_out)
    pickle_out.close()
    print df.head()

def HPI_Benchmark():
    df = quandl.get("FMAC/HPI_USA", authtoken = api_key)
    df["Value"] = (df["Value"] -df["Value"][0] / df["Value"][0] * 100.0)
    return df

#grab_initial_state_data()

fig = plt.figure()
ax1 = plt.subplot2grid((2,1),(0,0))
ax2 = plt.subplot2grid((2,1),(1,0), sharex = ax1)


HPI_data = pd.read_pickle('fiddy_states3.pickle')
TX_AK_12corr = pd.rolling_corr(HPI_data['TX'], HPI_data['AK'], 12)

HPI_data['TX'].plot(ax = ax1, label= 'TX HPI')
HPI_data['AK'].plot(ax = ax1, label= 'AK HPI')

ax1.legend(loc = 4)

TX_AK_12corr.plot(ax = ax2, label = 'TX_AK_12corr')

plt.legend(loc = 4)
plt.show()
Exemple #42
0
close()
close()
fig,axes = plt.subplots(nrows=2,ncols=1,sharex=True,sharey=True,figsize=(12,7))
aapl_px.plot(style='k-',ax=axes[0])
ma60.plot(style='k--',ax=axes[0])
aapl_px.plot(style='k-',ax=axes[1])
ewma60.plot(style='k--',ax=axes[1])
axes[0].set_title('Simple MA')
axes[1].set_title('Exponentially-weighted MA')
spx_rets=spx_px/spx_px.shift(1)-1
spx_px = close_px_all['SPX']
spx_rets=spx_px/spx_px.shift(1)-1
returns=close_px.pct_change()
corr=returns.AAPL.rolling(spx_rets,125,min_periods=100).corr()
corr=returns.AAPL.rolling_corr(spx_rets,125,min_periods=100)
corr=pd.rolling_corr(returns.AAPL,spx_rets,125,min_periods=100)
corr=returns.AAPL.rolling(window=125,min_periods=100).corr(spx_rets)
close()
corr.plot()
corr=returns.rolling(window=125,min_periods=100).corr(spx_rets)
corr.plot()
close()
from scipy.stats import percentileofscore
score_at_2percent = lambda x:percentileofscore(x,0.02)
result = returns.AAPL.rolling(250).apply(score_at_2percent)
result.plot()
close()
rng = pd.date_range('1/1/2000',periods=100000000,freq='10ms')
ts=Series(np.random.randn(len(rng)),index=rng)
rng = pd.date_range('1/1/2000',periods=10000000,freq='10ms')
ts=Series(np.random.randn(len(rng)),index=rng)
Exemple #43
0
def my_strat(symbol):
    spx_orders = define_bollingerband_SPX('$SPX', 20)
    spx_orders['Order']=spx_orders['Order'].replace('SELL','exitlong')
    spx_orders['Order']=spx_orders['Order'].replace('BUY','exitshort')
    #position_action_spx = pd.DataFrame(index=spx_orders.index, columns = ['Order']) #initialize the Orders Dataframe
    #position_action_spx=position_action_spx.fillna(spx_orders['Order'])
    #position_action_spx = pd.concat([position_action_spx, spx_orders['Order'].replace('SELL','exitlong')], axis=1)
    #position_action_spx = pd.concat([position_action_spx, spx_orders['Order'].replace('BUY','exitshort')], axis=1)

    # Import Orders into DataFrame (CURRENTLY HAS ALL DATES including non-trading)
    start_date = pd.to_datetime('12/31/07') #StartDate per Instructions
    end_date = pd.to_datetime('12/31/09') #EndDate per Instructions
    dates = pd.date_range(start_date, end_date)

    symbols = [symbol, '$SPX']

    # Read in adjusted closing prices for given symbols, date range
    prices_all = get_data(symbols, dates)  # automatically adds SPY
    prices = prices_all[[symbol]]  # only portfolio symbols
    prices.columns = ['Price']

    spx_prices = prices_all[['$SPX']]
    spx_prices.columns = ['Price']

    # Compute SMA
    sma = pd.rolling_mean(prices, 20)
    sma.columns = ['SMA']

    spx_sma = pd.rolling_mean(spx_prices, 10)
    spx_sma.columns = ['SMA']

    # Compute Std Dev
    std_dev = pd.rolling_std(prices, 20)
    std_dev.columns = ['Standard Deviation']

    spx_std_dev = pd.rolling_std(spx_prices, 10)
    spx_std_dev.columns = ['Standard Deviation']

    # Calculate Bollinger Band Limits
    lower_bband = sma.subtract(2*std_dev.ix[:,0], axis=0)
    lower_bband.columns = ['Lower Band']
    upper_bband = sma.add(2*std_dev.ix[:,0], axis=0)
    upper_bband.columns = ['Upper Band']

    # Combine All Data into 1 dataframe
    data = pd.concat([prices, sma, lower_bband, upper_bband], axis = 1)

    # Compute 4 Statuses
    below_lower = pd.DataFrame(data['Price']<data['Lower Band'], columns = ['Below Lower']) #Low Points: Identify where Stock < Lower Band
    above_sma = pd.DataFrame(data['Price']>data['SMA'], columns = ['Above SMA']) #Mid Points: Identify where Stock > SMA
    above_upper = pd.DataFrame(data['Price']>data['Upper Band'], columns = ['Above Upper']) #High Points: Identify where Stock > Upper Band
    status = pd.concat([below_lower, above_sma, above_upper], axis = 1)
    status_shift = status.shift(1) #aka 'Yesterday'

    #BBStatuses
    BB = pd.DataFrame(index=data.index, columns=[symbol])
    BB[symbol] = (prices['Price']-sma['SMA'])/(2*std_dev['Standard Deviation'])
    BB['$SPX'] = (spx_prices['Price']-spx_sma['SMA'])/(2*spx_std_dev['Standard Deviation'])
    corr = pd.rolling_corr(BB[symbol],BB['$SPX'], window=20)

    # Compute 4 Actions (get lazy and do iterator)
    position_action = pd.DataFrame(index=prices.index, columns = ['Order']) #initialize the Orders Dataframe
            #data['IBM']-data['IBM'].shift(1) #n compared to n-1
    position_action[(status_shift['Below Lower']==True)&(status['Below Lower']==False)]='enterlong'  #Enter Long: Yesterday Below Lower -> Today Above Lower
    position_action[(status_shift['Above SMA']==False)&(status['Above SMA']==True)]='exitlong'       #Exit Long: Yesterday Below SMA -> Today Above SMA
    position_action[(status_shift['Above Upper']==True)&(status['Above Upper']==False)]='entershort' #Enter Short: Yesterday Above Upper -> Today Below Upper
    position_action[(status_shift['Above SMA']==True)&(status['Above SMA']==False)]='exitshort'      #Exit Short: Yesterday Above SMA -> Today Below SMA
    position_action[((BB[symbol]-BB['$SPX'])>0.5) & (corr > 0.7)] = 'entershort'
    position_action[((BB[symbol]-BB['$SPX'])<0) & (corr > 0.7)] = 'exitshort'
    # position_action[(BB['$SPX']<-0.25) & (corr > 0.7)] = 'exitlong'
    # position_action[(BB['$SPX']>0.25) & (corr > 0.7)] = 'enterlong'

    position_action = position_action.dropna()


    entered_posn = 0 #0 = false, 1= long, -1=short
    #position_action=(pd.concat([spx_orders['Order'], position_action['Order']])).to_frame()
    position_action = position_action.sort_index()
    position_action = position_action.groupby(position_action.index).first()
    drops = pd.DataFrame(index=position_action.index, columns = ['change']) #initialize the Orders Dataframe
    for index, row in position_action.iterrows():
        print index
        print row
        if entered_posn == 0:
            #calculate enters
            if (row[0] == 'enterlong'):
                entered_posn = 1
            elif (row[0] == 'entershort'):
                entered_posn = -1
            else: #exitlong or exitshort
                #position_action.drop(index)
                drops.loc[index] = 1
        else:
            if (row[0] != 'exitshort') & (entered_posn == -1):
                drops.loc[index] = 1
            elif (row[0] != 'exitlong') & (entered_posn == 1):
                drops.loc[index] = 1
            else: #enterlong or entershort
                #position_action.drop(index)
                entered_posn = 0
        print entered_posn
        print drops.loc[index]

    drops = drops.fillna(0)
    position_action = position_action[drops['change']==0]

    orders = pd.DataFrame(index=position_action.index, columns = [['Symbol', 'Order', 'Shares']])
    orders.index.name = 'Date'
    orders['Symbol'] = symbol
    orders['Shares'] = 100
    orders['Order'] = orders['Order'].fillna(position_action['Order'])
    orders['Order'] = orders['Order'].replace('entershort','SELL')
    orders['Order'] = orders['Order'].replace('enterlong','BUY')
    orders['Order'] = orders['Order'].replace('exitshort','BUY')
    orders['Order'] = orders['Order'].replace('exitlong','SELL')

    long_orders = position_action[(position_action['Order'].str.match('enterlong'))==True]
    short_orders = position_action[(position_action['Order'].str.match('entershort'))==True]
    exit_orders = pd.concat([position_action[(position_action['Order'].str.match('exitlong'))==True],position_action[(position_action['Order'].str.match('exitshort'))==True]], axis=0)

    orders.to_csv("./orders/orders.csv")

    # Plot the Data
    plot_data(data, long_orders, short_orders, exit_orders)
    return
alpha_Car = alpha_Car.set_index('Month')
alpha_CAPM = alpha_CAPM.rename(columns=lambda x: str(x)[1:])
# beta_CAPM =beta_CAPM.rename(columns = lambda x : str(x)[1:])
alpha_FF5 = alpha_FF5.rename(columns=lambda x: str(x)[1:])
alpha_Car = alpha_Car.rename(columns=lambda x: str(x)[1:])

########################################## Data Statistics ###############################################
# Market Volatility using 1-year data
sigma_m = marketret.rolling(window=12).std().rename(
    columns={'Market_Ret': 'Market_Vol'})
# Stock Volatility using 1-year data
sigma = stockret.rolling(window=12).std()
# Correlation between stockret and marketret using 5-year data
corr = pd.DataFrame(index=stockret.index, columns=stockret.columns)
for column in stockret:
    corr[column] = pd.rolling_corr(stockret[column], marketret, window=60)

# Export intermediate calculations
with open('calc.pkl', 'w') as f:
    pickle.dump([sigma_m, sigma], f)
# Getting back the objects:
# with open('calc.pkl') as f:
# sigma_m, sigma= pickle.load(f)

# Delete company that has no data at all times and delete times when no company has data
corr = corr.dropna(axis=0, how='all').dropna(axis=1, how='all')

alpha_CAPM = alpha_CAPM.dropna(axis=0, how='all').dropna(axis=1, how='all')
# beta_CAPM = beta_CAPM.dropna(axis=0, how='all').dropna(axis=1, how='all')
alpha_FF5 = alpha_FF5.dropna(axis=0, how='all').dropna(axis=1, how='all')
alpha_Car = alpha_Car.dropna(axis=0, how='all').dropna(axis=1, how='all')
Exemple #45
0
 def calc_rolling_corr(self, reference, window=5):
     r = reference.pct_change()
     c = self.stock_raw['Adj Close'].pct_change()
     self.stock['rolling_corr'] = pd.rolling_corr(c, r, window)
     return self.stock
Exemple #46
0
 def calc_rolling_corr(self, reference, window=5):
     r = reference.pct_change()
     c = self.stock_raw['Adj Close'].pct_change()
     self.stock['rolling_corr'] = pd.rolling_corr(c, r, window)
     return self.stock
 def corr(self, x, y, n):
     (x, y) = self._align_bivariate(x, y)
     return pd.rolling_corr(x, y, n)
Exemple #48
0
    def rolling_corr(self,
                     data_frame1,
                     periods,
                     data_frame2=None,
                     pairwise=False,
                     flatten_labels=True):
        """Calculates rolling correlation wrapping around pandas functions

        Parameters
        ----------
        data_frame1 : DataFrame
            contains time series to run correlations on
        periods : int
            period of rolling correlations
        data_frame2 : DataFrame (optional)
            contains times series to run correlation against
        pairwise : boolean
            should we do pairwise correlations only?

        Returns
        -------
        DataFrame
        """

        # this is the new bit of code here
        if pandas.__version__ < '0.17':
            if pairwise:
                panel = pandas.rolling_corr_pairwise(
                    data_frame1.join(data_frame2), periods)
            else:
                panel = pandas.rolling_corr(data_frame1, data_frame2, periods)
        else:
            # panel = pandas.rolling_corr(data_frame1, data_frame2, periods, pairwise = pairwise)
            panel = data_frame1.rolling(window=periods).corr(other=data_frame2,
                                                             pairwise=True)

        try:
            df = panel.to_frame(filter_observations=False).transpose()

        except:
            df = panel

        if flatten_labels:
            if pairwise:
                series1 = df.columns.get_level_values(0)
                series2 = df.columns.get_level_values(1)
                new_labels = []

                for i in range(len(series1)):
                    new_labels.append(series1[i] + " v " + series2[i])

            else:
                new_labels = []

                try:
                    series1 = data_frame1.columns
                except:
                    series1 = [data_frame1.name]

                series2 = data_frame2.columns

                for i in range(len(series1)):
                    for j in range(len(series2)):
                        new_labels.append(series1[i] + " v " + series2[j])

            df.columns = new_labels

        return df
from convert_to_timeseries import convert_data_to_timeseries

# Input file containing data
input_file = 'data_timeseries.txt'

# Load data
data1 = convert_data_to_timeseries(input_file, 2)
data2 = convert_data_to_timeseries(input_file, 3)
dataframe = pd.DataFrame({'first': data1, 'second': data2})

# Print max and min
print '\nMaximum:\n', dataframe.max()
print '\nMinimum:\n', dataframe.min()

# Print mean
print '\nMean:\n', dataframe.mean()
print '\nMean row-wise:\n', dataframe.mean(1)[:10]

# Plot rolling mean
pd.rolling_mean(dataframe, window=24).plot()

# Print correlation coefficients
print '\nCorrelation coefficients:\n', dataframe.corr()

# Plot rolling correlation
plt.figure()
pd.rolling_corr(dataframe['first'], dataframe['second'], window=60).plot()

plt.show()

def HPI_benchmark():
    df= quandl.get('FMAC/HPI_USA', authtoken=api_key)
    df.columns = ['United_States']
    df['United_States'] = ((df['United_States']-df['United_States'][0])/df['United_States'][0])*100
    return df



#grab_initial_state_data()

fig = plt.figure()
ax1 = plt.subplot2grid((2,1),(0,0))
ax2 = plt.subplot2grid((2,1),(1,0),sharex=ax1)
HPI_data = pd.read_pickle('fiddy_states3.pickle')
TX_AK_12corr = pd.rolling_corr(HPI_data['TX'],HPI_data['AK'],12)


HPI_data['TX'].plot(ax=ax1,label='TX HPI')
HPI_data['AK'].plot(ax=ax1,label='AK HPI')
ax1.legend(loc=4)

TX_AK_12corr.plot(ax=ax2,label='TX_AK_12corr')



plt.legend(loc=4)

plt.show()

Exemple #51
0
 def ts_operation(df1, df2, n):
     return pd.rolling_corr(df1, df2, n)
Exemple #52
0
df.head()

df['Kurt'] = round(df[['Open', 'High', 'Low', 'Adj Close']].kurt(axis=1), 4)
df.head()

# Standard error of the mean
df['Error'] = df[['Open', 'High', 'Low', 'Adj Close']].sem(axis=1)
df.head()

import talib as ta

# Creating Indicators
n = 5
df['RSI'] = ta.RSI(np.array(df['Adj Close'].shift(1)), timeperiod=n)
df['SMA'] = pd.rolling_mean(df['Adj Close'].shift(1), window=n)
df['Corr'] = pd.rolling_corr(df['SMA'], df['Adj Close'].shift(1), window=n)
df['SAR'] = ta.SAR(np.array(df['High'].shift(1)), np.array(df['Low'].shift(1)),
                   0.2, 0.2)

# Momemtum Indicator Functions
df['ADX'] = ta.ADX(np.array(df['High'].shift(1)),
                   np.array(df['Low'].shift(1)),
                   np.array(df['Open'].shift(1)),
                   timeperiod=n)
df['ADXR'] = ta.ADXR(np.array(df['High'].shift(1)),
                     np.array(df['Low'].shift(1)),
                     np.array(df['Adj Close']),
                     timeperiod=n)
# df['APO']=ta.APO(np.array(df['Adj Close'].shift(1), fastperiod=12, slowperiod=26, matype=0))
df['AROON_DOWN'], df['AROON_UP'] = ta.AROON(np.array(df['High'].shift(1)),
                                            np.array(df['Low'].shift(1)),