def optPort_nco(cov, mu=None, maxNumClusters=None): cov = pd.DataFrame(cov) if mu is not None: mu = pd.Series(mu[:,0]) corr1 = mp.cov2corr(cov) # Optimal partition of clusters (step 1) corr1, clstrs, _ = oc.clusterKMeansBase(corr1, maxNumClusters, n_init=10) #wIntra = pd.DataFrame(0, index=cov.index, columns=clstrs.keys()) w_intra_clusters = pd.DataFrame(0, index=cov.index, columns=clstrs.keys()) for i in clstrs: cov_cluster = cov.loc[clstrs[i], clstrs[i]].values if mu is None: mu_cluster = None else: mu_cluster = mu.loc[clstrs[i]].values.reshape(-1,1) #Long/Short #w_intra_clusters.loc[clstrs[i],i] = mc.optPort(cov_cluster, mu_cluster).flatten() # Long only: Estimating the Convex Optimization Solution in a cluster (step 2) w_intra_clusters.loc[clstrs[i], i] = allocate_cvo(cov_cluster, mu_cluster).flatten() cov_inter_cluster = w_intra_clusters.T.dot(np.dot(cov, w_intra_clusters)) #reduce covariance matrix mu_inter_cluster = (None if mu is None else w_intra_clusters.T.dot(mu)) #Long/Short #w_inter_clusters = pd.Series(mc.optPort(cov_inter_cluster, mu_inter_cluster).flatten(), index=cov_inter_cluster.index) # Long only: Optimal allocations across the reduced covariance matrix (step 3) w_inter_clusters = pd.Series(allocate_cvo(cov_inter_cluster, mu_inter_cluster).flatten(), index=cov_inter_cluster.index) # Final allocations - dot-product of the intra-cluster and inter-cluster allocations (step 4) nco = w_intra_clusters.mul(w_inter_clusters, axis=1).sum(axis=1).values.reshape(-1,1) return nco
def deNoiseCov(cov0, q, bWidth): corr0 = mp.cov2corr(cov0) eVal0, eVec0 = mp.getPCA(corr0) eMax0, var0 = mp.findMaxEval(np.diag(eVal0), q, bWidth) nFacts0 = eVal0.shape[0]-np.diag(eVal0)[::-1].searchsorted(eMax0) corr1 = mp.denoisedCorr(eVal0, eVec0, nFacts0) #denoising by constant residual eigenvalue method cov1 = corr2cov(corr1, np.diag(cov0)**.5) return cov1
def testNCO(): # Chapter 7 - apply the Nested Clustered Optimization (NCO) algorithm N = 5 T = 5 S_value = np.array([[1., 2, 3, 4, 5], [1.1, 3, 2, 3, 5], [1.2, 4, 1.3, 4, 5], [1.3, 5, 1, 3, 5], [1.4, 6, 1, 4, 5.5], [1.5, 7, 1, 3, 5.5]]) S_value[:, 1] = 1 S_value[5, 1] = 1.1 S, instrument_returns = calculate_returns(S_value) _, instrument_returns = calculate_returns(S_value, percentageAsProduct=True) mu1 = None cov1_d = np.cov(S, rowvar=0, ddof=1) #test baseClustering corr1 = mp.cov2corr(cov) a, b, c = nco.NCO()._cluster_kmeans_base(pd.DataFrame(corr1)) d, e, f = clusterKMeansBase(pd.DataFrame(corr1)) #b={0: [2, 0], 1: [1], 2: [3, 4]} #e={0: [1, 2], 1: [3, 4], 2: [0]} min_var_markowitz = mc.optPort(cov1_d, mu1).flatten() min_var_NCO = pc.optPort_nco(cov1_d, mu1, max(int(cov1_d.shape[0] / 2), 2)).flatten() mlfinlab_NCO = nco.NCO().allocate_nco(cov1_d, mu1, max(int(cov1_d.shape[0] / 2), 2)).flatten() cov1_d = np.cov(S, rowvar=0, ddof=1) mlfinlab_NCO = nco.NCO().allocate_nco(cov1_d, mu1, int(cov1_d.shape[0] / 2)).flatten() expected_return_markowitz = [ min_var_markowitz[i] * instrument_returns[i] for i in range(0, cov1_d.shape[0]) ] e_m = sum(expected_return_markowitz) expected_return_NCO = [ min_var_NCO[i] * instrument_returns[i] for i in range(0, cov1_d.shape[0]) ] e_NCO = sum(expected_return_markowitz) vol = getVolatility(S_value) m_minVol = [ min_var_markowitz[i] * vol[i] for i in range(0, cov1_d.shape[0]) ] NCO_minVol = [mlfinlab_NCO[i] * vol[i] for i in range(0, cov1_d.shape[0])]
def testNCO(): N = 5 T = 5 S_value = np.array([[1., 2,3, 4,5], [1.1,3,2, 3,5], [1.2,4,1.3,4,5], [1.3,5,1, 3,5], [1.4,6,1, 4,5.5], [1.5,7,1, 3,5.5]]) S_value[:,1] =1 S_value[5,1] =1.1 S, instrument_returns = calculate_returns(S_value) _, instrument_returns = calculate_returns(S_value, percentageAsProduct=True) np.testing.assert_almost_equal(S, pd.DataFrame(S_value).pct_change().dropna(how="all")) mu1 = None cov1_d = np.cov(S ,rowvar=0, ddof=1) #test baseClustering corr1 = mp.cov2corr(cov1_d) a,b,c = nco.NCO()._cluster_kmeans_base(pd.DataFrame(corr1)) d,e,f = clusterKMeansBase(pd.DataFrame(corr1)) #b={0: [2, 0], 1: [1], 2: [3, 4]} #e={0: [1, 2], 1: [3, 4], 2: [0]} min_var_markowitz = mc.optPort(cov1_d, mu1).flatten() #compare min_var_markowitz with mlfinlab impl #ml. min_var_NCO = pc.optPort_nco(cov1_d, mu1, max(int(cov1_d.shape[0]/2), 2)).flatten() mlfinlab_NCO= nco.NCO().allocate_nco(cov1_d, mu1, max(int(cov1_d.shape[0]/2), 2)).flatten() cov1_d = np.cov(S,rowvar=0, ddof=1) mlfinlab_NCO= nco.NCO().allocate_nco(cov1_d, mu1, int(cov1_d.shape[0]/2)).flatten() expected_return_markowitz = [min_var_markowitz[i]*instrument_returns[i] for i in range(0,cov1_d.shape[0])] e_m = sum(expected_return_markowitz) expected_return_NCO = [min_var_NCO[i]*instrument_returns[i] for i in range(0,cov1_d.shape[0])] e_NCO = sum(expected_return_markowitz) vol = getVolatility(S_value) m_minVol = [min_var_markowitz[i]*vol[i] for i in range(0, cov1_d.shape[0])] NCO_minVol = [mlfinlab_NCO[i]*vol[i] for i in range(0, cov1_d.shape[0])]
def randomBlockCorr(nCols, nBlocks, random_state=None, minBlockSize=1): #Form block corr rng = check_random_state(random_state) print("randomBlockCorr:" + str(minBlockSize)) cov0 = getRndBlockCov(nCols, nBlocks, minBlockSize=minBlockSize, sigma=.5, random_state=rng) cov1 = getRndBlockCov(nCols, 1, minBlockSize=minBlockSize, sigma=1., random_state=rng) #add noise cov0 += cov1 corr0 = mp.cov2corr(cov0) corr0 = pd.DataFrame(corr0) return corr0
min_var_NCO = pc.optPort_nco(cov1_d, mu1, int(cov1_d.shape[0] / 2)).flatten() #note pnames = pnames[1:] - first element is obx ######## T, N = 237, 235 #x = np.random.normal(0, 1, size = (T, N)) S, pnames = get_OL_tickers_close(T, N) np.argwhere(np.isnan(S)) S[204, 109] = S[203, 109] cov0 = np.cov(S, rowvar=0, ddof=1) q = float(S.shape[0]) / float(S.shape[1]) #T/N #eMax0, var0 = mp.findMaxEval(np.diag(eVal0), q, bWidth=.01) corr0 = mp.cov2corr(cov0) eVal0, eVec0 = mp.getPCA(corr0) bWidth = best_bandwidth.findOptimalBWidth(np.diag(eVal0)) min_var_markowitz = mc.optPort(cov1_d, mu1).flatten() min_var_NCO = pc.optPort_nco(cov1_d, mu1, int(cov1_d.shape[0] / 2)).flatten() ################## # Test if bWidth found makes sense pdf0 = mp.mpPDF(1., q=T / float(N), pts=N) pdf1 = mp.fitKDE(np.diag(eVal0), bWidth=bWidth['bandwidth']) #empirical pdf #pdf1 = mp.fitKDE(np.diag(eVal0), bWidth=0.1) fig = plt.figure()
matrix_condition_number = max(eVal)/min(eVal) print(matrix_condition_number) fig, ax = plt.subplots(figsize=(13,10)) sns.heatmap(corr1, cmap='viridis') plt.show() # code snippet 7.3 - NCO method. Step 1. Correlation matrix clustering nBlocks, bSize, bCorr = 2, 2, .5 q = 10.0 np.random.seed(0) mu0, cov0 = mc.formTrueMatrix(nBlocks, bSize, bCorr) cols = cov0.columns cov1 = mc.deNoiseCov(cov0, q, bWidth=.01) #denoise cov cov1 = pd.DataFrame(cov1, index=cols, columns=cols) corr1 = mp.cov2corr(cov1) corr1, clstrs, silh = oc.clusterKMeansBase(pd.DataFrame(corr0)) # code snippet 7.4 - intracluster optimal allocations # step 2. compute intracluster allocations using the denoised cov matrix wIntra = pd.DataFrame(0, index=cov0.index, columns=clstrs.keys()) for i in clstrs: wIntra.loc[clstrs[i], i] = minVarPort(cov1.loc[clstrs[i], clstrs[i]]).flatten() cov2 = wIntra.T.dot(np.dot(cov1, wIntra)) #reduced covariance matrix # code snippet 7.5 - intercluster optimal allocations # step 3. compute optimal intercluster allocations, usint the reduced covariance matrix # which is close to a diagonal matrix, so optimization problem is close to ideal case \ro =0 wInter = pd.Series(minVarPort(cov2).flatten(), index=cov2.index) wAll0 = wIntra.mul(wInter, axis=1).sum(axis=1).sort_index()