Esempio n. 1
0
 def getdata(self):
     a = []
     for i in self.index.columns:
         a.append(pd.DataFrame((self.data>=0).values==(self.index[i]>=0).values.reshape(240,1)))
     a = pd.concat(a,axis=1)
     a.columns = self.index.columns
     storage = []
     s5 = []
     s10 = []
     s15 = []
     s20 = []
     s30 = []
     for i in self.index.columns:
         a = pd.expanding_corr(self.data,self.index[i],pairwise=True)
         a5 = pd.ewmcorr(self.index[i],self.data,com=5,min_periods=20)
         a10 = pd.ewmcorr(self.index[i],self.data,com=10,min_periods=20)
         a15 = pd.ewmcorr(self.index[i],self.data,com=15,min_periods=20)
         a20 = pd.ewmcorr(self.index[i],self.data,com=20,min_periods=20)
         a30 = pd.ewmcorr(self.index[i],self.data,com=30,min_periods=20)
         storage.append(a)
         s5.append(a5)
         s10.append(a10)
         s15.append(a15)
         s20.append(a20)
         s30.append(a30)
     test = pd.concat(storage,axis=1)
     t5 = pd.concat(s5,axis=1)
     t10 = pd.concat(s10,axis=1)
     t15 = pd.concat(s15,axis=1)
     t20 = pd.concat(s20,axis=1)
     t30 = pd.concat(s30,axis=1)
     test.columns = self.index.columns
     t5.columns = self.index.columns
     t10.columns = self.index.columns
     t15.columns = self.index.columns
     t20.columns = self.index.columns
     t30.columns = self.index.columns
     self.ecor = test
     self.expcor5 = t5
     self.expcor10 = t10
     self.expcor15 = t15
     self.expcor20 = t20
     self.expcor30 = t30
     dist = pd.DataFrame(np.subtract(self.index.values,self.data.values)).abs()
     dist.columns = s.index.columns
     self.dist = dist
     self.cumdist = dist.cumsum()
     self.dir = a
     self.pos = self.dir.sum()
     self.neg = self.dir.count()-self.dir.sum()
     self.pr =  self.dir.sum()/self.dir.count()
     self.expcor = pd.concat([self.expcor5,self.expcor10,self.expcor15,self.expcor20,self.expcor30],axis=1)
     b = pd.concat([self.expcor5.max(),self.expcor10.max(),self.expcor15.max(),self.expcor20.max(),self.expcor30.max()],axis=1)
     c = pd.concat([self.expcor5.min(),self.expcor10.min(),self.expcor15.min(),self.expcor20.min(),self.expcor30.min()],axis=1)
     b.columns = [5,10,15,20,30]
     c.columns = [5,10,15,20,30]
     self.cormax = b
     self.cormin = c
Esempio n. 2
0
def correlation_single_period(data_for_estimate, 
                              using_exponent=True, min_periods=20, ew_lookback=250,
                              floor_at_zero=True):
    ## These may come from config as str
    using_exponent=str2Bool(using_exponent)
            
    if using_exponent:
        ## If we stack there will be duplicate dates
        ## So we massage the span so it's correct
        ## This assumes the index is at least daily and on same timestamp
        ## This is an artifact of how we prepare the data
        dindex=data_for_estimate.index
        dlenadj=float(len(dindex))/len(set(list(dindex)))
        ## Usual use for IDM, FDM calculation when whole data set is used
        corrmat=pd.ewmcorr(data_for_estimate, span=int(ew_lookback*dlenadj), min_periods=min_periods)
        
        ## only want the final one
        corrmat=corrmat.values[-1]
    else:
        ## Use normal correlation
        ## Usual use for bootstrapping when only have sub sample
        corrmat=data_for_estimate.corr(min_periods=min_periods)
        corrmat=corrmat.values

    if floor_at_zero:
        corrmat[corrmat<0]=0.0
    
    return corrmat
Esempio n. 3
0
def correlation_single_period(data_for_estimate,
                              using_exponent=True,
                              min_periods=20,
                              ew_lookback=250,
                              floor_at_zero=True):
    ## These may come from config as str
    using_exponent = str2Bool(using_exponent)

    if using_exponent:
        ## If we stack there will be duplicate dates
        ## So we massage the span so it's correct
        ## This assumes the index is at least daily and on same timestamp
        ## This is an artifact of how we prepare the data
        dindex = data_for_estimate.index
        dlenadj = float(len(dindex)) / len(set(list(dindex)))
        ## Usual use for IDM, FDM calculation when whole data set is used
        corrmat = pd.ewmcorr(data_for_estimate,
                             span=int(ew_lookback * dlenadj),
                             min_periods=min_periods)

        ## only want the final one
        corrmat = corrmat.values[-1]
    else:
        ## Use normal correlation
        ## Usual use for bootstrapping when only have sub sample
        corrmat = data_for_estimate.corr(min_periods=min_periods)
        corrmat = corrmat.values

    if floor_at_zero:
        corrmat[corrmat < 0] = 0.0

    return corrmat
Esempio n. 4
0
def correlation_single_period(data_for_estimate,
                              using_exponent=True, min_periods=20, ew_lookback=250,
                              floor_at_zero=True):
    """
    We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling

    It's important that forward filling, or index / ffill / diff has been done before we begin

    also that we're on the right time frame, eg weekly if that's what we're doing

    :param data_for_estimate: Data to get correlations from
    :type data_for_estimate: pd.DataFrame

    :param using_exponent: Should we use exponential weighting?
    :type using_exponent: bool

    :param ew_lookback: Lookback, in periods, for exp. weighting
    :type ew_lookback: int

    :param min_periods: Minimum periods before we get a correlation
    :type min_periods: int

    :param floor_at_zero: remove negative correlations before proceeding
    :type floor_at_zero: bool or str

    :returns: 2-dim square np.array


    """
    # These may come from config as str
    using_exponent = str2Bool(using_exponent)

    if using_exponent:
        # If we stack there will be duplicate dates
        # So we massage the span so it's correct
        # This assumes the index is at least daily and on same timestamp
        # This is an artifact of how we prepare the data
        dindex = data_for_estimate.index
        dlenadj = float(len(dindex)) / len(set(list(dindex)))
        # Usual use for IDM, FDM calculation when whole data set is used
        corrmat = pd.ewmcorr(
            data_for_estimate,
            span=int(
                ew_lookback *
                dlenadj),
            min_periods=min_periods)

        # only want the final one
        corrmat = corrmat.values[-1]
    else:
        # Use normal correlation
        # Usual use for bootstrapping when only have sub sample
        corrmat = data_for_estimate.corr(min_periods=min_periods)
        corrmat = corrmat.values

    if floor_at_zero:
        corrmat[corrmat < 0] = 0.0

    return corrmat
def calc_ts_pairwise_correlation(data_pct,days=250):
	corrts=pd.ewmcorr(data_pct,days,min_periods=days)
	s = pd.Series()
	for i in data_pct.index:
		x=corrts.ix[i]
		x=x[x.count()!=0].T[x.count()!=0]
		s[i]=calc_pairwise_correlation(x)
	return s
Esempio n. 6
0
def correlation_single_period(data_for_estimate,
                              using_exponent=True,
                              min_periods=20,
                              ew_lookback=250,
                              floor_at_zero=True):
    """
    We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling
    
    It's important that forward filling, or index / ffill / diff has been done before we begin
    
    also that we're on the right time frame, eg weekly if that's what we're doing
    
    :param data_for_estimate: Data to get correlations from
    :type data_for_estimate: pd.DataFrame

    :param using_exponent: Should we use exponential weighting?
    :type using_exponent: bool 

    :param ew_lookback: Lookback, in periods, for exp. weighting
    :type ew_lookback: int 

    :param min_periods: Minimum periods before we get a correlation
    :type min_periods: int 

    :param floor_at_zero: remove negative correlations before proceeding
    :type floor_at_zero: bool or str
    
    :returns: 2-dim square np.array

    
    """
    ## These may come from config as str
    using_exponent = str2Bool(using_exponent)

    if using_exponent:
        ## If we stack there will be duplicate dates
        ## So we massage the span so it's correct
        ## This assumes the index is at least daily and on same timestamp
        ## This is an artifact of how we prepare the data
        dindex = data_for_estimate.index
        dlenadj = float(len(dindex)) / len(set(list(dindex)))
        ## Usual use for IDM, FDM calculation when whole data set is used
        corrmat = pd.ewmcorr(data_for_estimate,
                             span=int(ew_lookback * dlenadj),
                             min_periods=min_periods)

        ## only want the final one
        corrmat = corrmat.values[-1]
    else:
        ## Use normal correlation
        ## Usual use for bootstrapping when only have sub sample
        corrmat = data_for_estimate.corr(min_periods=min_periods)
        corrmat = corrmat.values

    if floor_at_zero:
        corrmat[corrmat < 0] = 0.0

    return corrmat
Esempio n. 7
0
def time_series_to_ewmf_matrix(subject_time_series,parcel_path,window_size,out_file):
	"""
	runs exponentially weighted moment functions via Pandas
	"""
	parcel = nib.load(parcel_path).get_data()
	ts = dict()
	for i in range(np.max(parcel)):
		ts[i] = np.mean(subject_time_series[parcel==i+1],axis = 0)
	ts = pd.DataFrame(ts)
	matrix = pd.ewmcorr(ts,span=window_size)
	np.save(out_file,np.array(matrix))
Esempio n. 8
0
def time_series_to_ewmf_matrix(subject_time_series, parcel_path, window_size,
                               out_file):
    """
	runs exponentially weighted moment functions via Pandas
	"""
    parcel = nib.load(parcel_path).get_data()
    ts = dict()
    for i in range(np.max(parcel)):
        ts[i] = np.mean(subject_time_series[parcel == i + 1], axis=0)
    ts = pd.DataFrame(ts)
    matrix = pd.ewmcorr(ts, span=window_size)
    np.save(out_file, np.array(matrix))
Esempio n. 9
0
def correlation_single_period(data_for_estimate, 
                              using_exponent=True, min_periods=20, ew_lookback=250,
                              floor_at_zero=True):

    using_exponent=str2Bool(using_exponent)
            
    if using_exponent:
        dindex=data_for_estimate.index
        dlenadj=float(len(dindex))/len(set(list(dindex)))
        corrmat=pd.ewmcorr(data_for_estimate, span=int(ew_lookback*dlenadj), min_periods=min_periods)
        corrmat=corrmat.values[-1]
    else:
        corrmat=data_for_estimate.corr(min_periods=min_periods)
        corrmat=corrmat.values
    if floor_at_zero:
        corrmat[corrmat<0]=0.0
    return corrmat
Esempio n. 10
0
def clusterCorrelation(ret, g):
    c = pd.ewmcorr(ret, span=32.33, adjust=False).iloc[-1, :, :]
    o = pd.DataFrame(0, index=range(0, len(g)), columns=range(0, len(g)))
    for i in range(0, len(g)):
        for j in range(i, len(g)):
            if (i == j):  # will be a corr square
                corr_m = np.triu(c.loc[g[i], g[j]], k=1).flatten()
                c_corr = corr_m[corr_m != 0].mean()
            else:
                c_corr = (c.loc[g[i], g[j]]).stack().mean()
            o.loc[i, j] = c_corr
            o.loc[j, i] = c_corr
        inGroup = c.loc[g[i], g[i]]
        inGroup.values[[np.arange(len(inGroup))] * 2] = None
        o.loc[i, i] = inGroup.stack().dropna().mean()
    o.fillna(1., inplace=True)
    return o
Esempio n. 11
0
def _calc_ewma_correlation(ret_df,
                           corr_halflife = 252,
                           corr_seed_period = 252,
                           lag = 1):
    '''
    Calculate EWMA (exponentially weighted moving average) correlation matrix
    '''
    if lag > 0:
        ret_df = ret_df.shift(lag)

    # compute pairwise ewma correlation
    corr_panel = pd.ewmcorr(ret_df, halflife=corr_halflife, min_periods=corr_seed_period, pairwise=True)

    # reindex axis to maintain order of columns as pandas implicity sorts axes alphabetically
    corr_panel = corr_panel.reindex_axis(ret_df.columns, axis='major_axis')
    corr_panel = corr_panel.reindex_axis(ret_df.columns, axis='minor_axis')

    return corr_panel
Esempio n. 12
0
}
data_index = pd.DataFrame()
for i in indices.keys():
    file = 'https://wholesale.banking.societegenerale.com/fileadmin/indices_feeds/' + indices[
        i]
    data_index[i] = pd.read_csv(file,
                                sep='\t',
                                index_col=0,
                                parse_dates=[0],
                                usecols=[0, 1]).ix[:, 0]

data_pct = data_index.pct_change()

ax1 = data_pct['2019':].cumsum().ffill().plot(colormap='jet')
ax1.set_xlabel("")
ax1.get_figure().savefig('socgen.png')
plt.show()
plt.gcf().clear()

df = pd.DataFrame()
df['CTA'] = data_index.CTA
df['SP500'] = quandl.get('CHRIS/CME_SP1', authtoken=token).Last
df = df.dropna().pct_change()
ax2 = pd.ewmcorr(df.CTA, df['SP500'], 20)['2019':].plot(
    colormap='jet', title='20 Day Rolling Correlation: CTA index to S&P 500')
ax2.set_xlabel("")
ax2.get_figure().savefig('socgen_corr.png')

e = Email(subject='Morning Update: Soc Gen Indices')
e.add_attachments(['socgen.png', 'socgen_corr.png'])
e.send()