def getdata(self): a = [] for i in self.index.columns: a.append(pd.DataFrame((self.data>=0).values==(self.index[i]>=0).values.reshape(240,1))) a = pd.concat(a,axis=1) a.columns = self.index.columns storage = [] s5 = [] s10 = [] s15 = [] s20 = [] s30 = [] for i in self.index.columns: a = pd.expanding_corr(self.data,self.index[i],pairwise=True) a5 = pd.ewmcorr(self.index[i],self.data,com=5,min_periods=20) a10 = pd.ewmcorr(self.index[i],self.data,com=10,min_periods=20) a15 = pd.ewmcorr(self.index[i],self.data,com=15,min_periods=20) a20 = pd.ewmcorr(self.index[i],self.data,com=20,min_periods=20) a30 = pd.ewmcorr(self.index[i],self.data,com=30,min_periods=20) storage.append(a) s5.append(a5) s10.append(a10) s15.append(a15) s20.append(a20) s30.append(a30) test = pd.concat(storage,axis=1) t5 = pd.concat(s5,axis=1) t10 = pd.concat(s10,axis=1) t15 = pd.concat(s15,axis=1) t20 = pd.concat(s20,axis=1) t30 = pd.concat(s30,axis=1) test.columns = self.index.columns t5.columns = self.index.columns t10.columns = self.index.columns t15.columns = self.index.columns t20.columns = self.index.columns t30.columns = self.index.columns self.ecor = test self.expcor5 = t5 self.expcor10 = t10 self.expcor15 = t15 self.expcor20 = t20 self.expcor30 = t30 dist = pd.DataFrame(np.subtract(self.index.values,self.data.values)).abs() dist.columns = s.index.columns self.dist = dist self.cumdist = dist.cumsum() self.dir = a self.pos = self.dir.sum() self.neg = self.dir.count()-self.dir.sum() self.pr = self.dir.sum()/self.dir.count() self.expcor = pd.concat([self.expcor5,self.expcor10,self.expcor15,self.expcor20,self.expcor30],axis=1) b = pd.concat([self.expcor5.max(),self.expcor10.max(),self.expcor15.max(),self.expcor20.max(),self.expcor30.max()],axis=1) c = pd.concat([self.expcor5.min(),self.expcor10.min(),self.expcor15.min(),self.expcor20.min(),self.expcor30.min()],axis=1) b.columns = [5,10,15,20,30] c.columns = [5,10,15,20,30] self.cormax = b self.cormin = c
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): ## These may come from config as str using_exponent=str2Bool(using_exponent) if using_exponent: ## If we stack there will be duplicate dates ## So we massage the span so it's correct ## This assumes the index is at least daily and on same timestamp ## This is an artifact of how we prepare the data dindex=data_for_estimate.index dlenadj=float(len(dindex))/len(set(list(dindex))) ## Usual use for IDM, FDM calculation when whole data set is used corrmat=pd.ewmcorr(data_for_estimate, span=int(ew_lookback*dlenadj), min_periods=min_periods) ## only want the final one corrmat=corrmat.values[-1] else: ## Use normal correlation ## Usual use for bootstrapping when only have sub sample corrmat=data_for_estimate.corr(min_periods=min_periods) corrmat=corrmat.values if floor_at_zero: corrmat[corrmat<0]=0.0 return corrmat
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): ## These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: ## If we stack there will be duplicate dates ## So we massage the span so it's correct ## This assumes the index is at least daily and on same timestamp ## This is an artifact of how we prepare the data dindex = data_for_estimate.index dlenadj = float(len(dindex)) / len(set(list(dindex))) ## Usual use for IDM, FDM calculation when whole data set is used corrmat = pd.ewmcorr(data_for_estimate, span=int(ew_lookback * dlenadj), min_periods=min_periods) ## only want the final one corrmat = corrmat.values[-1] else: ## Use normal correlation ## Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values if floor_at_zero: corrmat[corrmat < 0] = 0.0 return corrmat
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): """ We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling It's important that forward filling, or index / ffill / diff has been done before we begin also that we're on the right time frame, eg weekly if that's what we're doing :param data_for_estimate: Data to get correlations from :type data_for_estimate: pd.DataFrame :param using_exponent: Should we use exponential weighting? :type using_exponent: bool :param ew_lookback: Lookback, in periods, for exp. weighting :type ew_lookback: int :param min_periods: Minimum periods before we get a correlation :type min_periods: int :param floor_at_zero: remove negative correlations before proceeding :type floor_at_zero: bool or str :returns: 2-dim square np.array """ # These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: # If we stack there will be duplicate dates # So we massage the span so it's correct # This assumes the index is at least daily and on same timestamp # This is an artifact of how we prepare the data dindex = data_for_estimate.index dlenadj = float(len(dindex)) / len(set(list(dindex))) # Usual use for IDM, FDM calculation when whole data set is used corrmat = pd.ewmcorr( data_for_estimate, span=int( ew_lookback * dlenadj), min_periods=min_periods) # only want the final one corrmat = corrmat.values[-1] else: # Use normal correlation # Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values if floor_at_zero: corrmat[corrmat < 0] = 0.0 return corrmat
def calc_ts_pairwise_correlation(data_pct,days=250): corrts=pd.ewmcorr(data_pct,days,min_periods=days) s = pd.Series() for i in data_pct.index: x=corrts.ix[i] x=x[x.count()!=0].T[x.count()!=0] s[i]=calc_pairwise_correlation(x) return s
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): """ We generate a correlation from eithier a pd.DataFrame, or a list of them if we're pooling It's important that forward filling, or index / ffill / diff has been done before we begin also that we're on the right time frame, eg weekly if that's what we're doing :param data_for_estimate: Data to get correlations from :type data_for_estimate: pd.DataFrame :param using_exponent: Should we use exponential weighting? :type using_exponent: bool :param ew_lookback: Lookback, in periods, for exp. weighting :type ew_lookback: int :param min_periods: Minimum periods before we get a correlation :type min_periods: int :param floor_at_zero: remove negative correlations before proceeding :type floor_at_zero: bool or str :returns: 2-dim square np.array """ ## These may come from config as str using_exponent = str2Bool(using_exponent) if using_exponent: ## If we stack there will be duplicate dates ## So we massage the span so it's correct ## This assumes the index is at least daily and on same timestamp ## This is an artifact of how we prepare the data dindex = data_for_estimate.index dlenadj = float(len(dindex)) / len(set(list(dindex))) ## Usual use for IDM, FDM calculation when whole data set is used corrmat = pd.ewmcorr(data_for_estimate, span=int(ew_lookback * dlenadj), min_periods=min_periods) ## only want the final one corrmat = corrmat.values[-1] else: ## Use normal correlation ## Usual use for bootstrapping when only have sub sample corrmat = data_for_estimate.corr(min_periods=min_periods) corrmat = corrmat.values if floor_at_zero: corrmat[corrmat < 0] = 0.0 return corrmat
def time_series_to_ewmf_matrix(subject_time_series,parcel_path,window_size,out_file): """ runs exponentially weighted moment functions via Pandas """ parcel = nib.load(parcel_path).get_data() ts = dict() for i in range(np.max(parcel)): ts[i] = np.mean(subject_time_series[parcel==i+1],axis = 0) ts = pd.DataFrame(ts) matrix = pd.ewmcorr(ts,span=window_size) np.save(out_file,np.array(matrix))
def time_series_to_ewmf_matrix(subject_time_series, parcel_path, window_size, out_file): """ runs exponentially weighted moment functions via Pandas """ parcel = nib.load(parcel_path).get_data() ts = dict() for i in range(np.max(parcel)): ts[i] = np.mean(subject_time_series[parcel == i + 1], axis=0) ts = pd.DataFrame(ts) matrix = pd.ewmcorr(ts, span=window_size) np.save(out_file, np.array(matrix))
def correlation_single_period(data_for_estimate, using_exponent=True, min_periods=20, ew_lookback=250, floor_at_zero=True): using_exponent=str2Bool(using_exponent) if using_exponent: dindex=data_for_estimate.index dlenadj=float(len(dindex))/len(set(list(dindex))) corrmat=pd.ewmcorr(data_for_estimate, span=int(ew_lookback*dlenadj), min_periods=min_periods) corrmat=corrmat.values[-1] else: corrmat=data_for_estimate.corr(min_periods=min_periods) corrmat=corrmat.values if floor_at_zero: corrmat[corrmat<0]=0.0 return corrmat
def clusterCorrelation(ret, g): c = pd.ewmcorr(ret, span=32.33, adjust=False).iloc[-1, :, :] o = pd.DataFrame(0, index=range(0, len(g)), columns=range(0, len(g))) for i in range(0, len(g)): for j in range(i, len(g)): if (i == j): # will be a corr square corr_m = np.triu(c.loc[g[i], g[j]], k=1).flatten() c_corr = corr_m[corr_m != 0].mean() else: c_corr = (c.loc[g[i], g[j]]).stack().mean() o.loc[i, j] = c_corr o.loc[j, i] = c_corr inGroup = c.loc[g[i], g[i]] inGroup.values[[np.arange(len(inGroup))] * 2] = None o.loc[i, i] = inGroup.stack().dropna().mean() o.fillna(1., inplace=True) return o
def _calc_ewma_correlation(ret_df, corr_halflife = 252, corr_seed_period = 252, lag = 1): ''' Calculate EWMA (exponentially weighted moving average) correlation matrix ''' if lag > 0: ret_df = ret_df.shift(lag) # compute pairwise ewma correlation corr_panel = pd.ewmcorr(ret_df, halflife=corr_halflife, min_periods=corr_seed_period, pairwise=True) # reindex axis to maintain order of columns as pandas implicity sorts axes alphabetically corr_panel = corr_panel.reindex_axis(ret_df.columns, axis='major_axis') corr_panel = corr_panel.reindex_axis(ret_df.columns, axis='minor_axis') return corr_panel
} data_index = pd.DataFrame() for i in indices.keys(): file = 'https://wholesale.banking.societegenerale.com/fileadmin/indices_feeds/' + indices[ i] data_index[i] = pd.read_csv(file, sep='\t', index_col=0, parse_dates=[0], usecols=[0, 1]).ix[:, 0] data_pct = data_index.pct_change() ax1 = data_pct['2019':].cumsum().ffill().plot(colormap='jet') ax1.set_xlabel("") ax1.get_figure().savefig('socgen.png') plt.show() plt.gcf().clear() df = pd.DataFrame() df['CTA'] = data_index.CTA df['SP500'] = quandl.get('CHRIS/CME_SP1', authtoken=token).Last df = df.dropna().pct_change() ax2 = pd.ewmcorr(df.CTA, df['SP500'], 20)['2019':].plot( colormap='jet', title='20 Day Rolling Correlation: CTA index to S&P 500') ax2.set_xlabel("") ax2.get_figure().savefig('socgen_corr.png') e = Email(subject='Morning Update: Soc Gen Indices') e.add_attachments(['socgen.png', 'socgen_corr.png']) e.send()