def optPort_nco(cov, mu=None, maxNumClusters=None):
    cov = pd.DataFrame(cov)
    if mu is not None:
        mu = pd.Series(mu[:,0])
    
    corr1 = mp.cov2corr(cov)
    
    # Optimal partition of clusters (step 1)
    corr1, clstrs, _ = oc.clusterKMeansBase(corr1, maxNumClusters, n_init=10)
    #wIntra = pd.DataFrame(0, index=cov.index, columns=clstrs.keys())
    w_intra_clusters = pd.DataFrame(0, index=cov.index, columns=clstrs.keys())
    for i in clstrs:
        cov_cluster = cov.loc[clstrs[i], clstrs[i]].values
        if mu is None:
            mu_cluster = None
        else: 
            mu_cluster = mu.loc[clstrs[i]].values.reshape(-1,1)
        
        #Long/Short
        #w_intra_clusters.loc[clstrs[i],i] = mc.optPort(cov_cluster, mu_cluster).flatten()
        
        # Long only: Estimating the Convex Optimization Solution in a cluster (step 2)
        w_intra_clusters.loc[clstrs[i], i] = allocate_cvo(cov_cluster, mu_cluster).flatten()        
    
    cov_inter_cluster = w_intra_clusters.T.dot(np.dot(cov, w_intra_clusters)) #reduce covariance matrix
    mu_inter_cluster = (None if mu is None else w_intra_clusters.T.dot(mu))
    
    #Long/Short
    #w_inter_clusters = pd.Series(mc.optPort(cov_inter_cluster, mu_inter_cluster).flatten(), index=cov_inter_cluster.index)
    # Long only: Optimal allocations across the reduced covariance matrix (step 3)
    w_inter_clusters = pd.Series(allocate_cvo(cov_inter_cluster, mu_inter_cluster).flatten(), index=cov_inter_cluster.index)    
    
    # Final allocations - dot-product of the intra-cluster and inter-cluster allocations (step 4)
    nco =   w_intra_clusters.mul(w_inter_clusters, axis=1).sum(axis=1).values.reshape(-1,1)
    return nco
def deNoiseCov(cov0, q, bWidth):
    corr0 = mp.cov2corr(cov0)
    eVal0, eVec0 = mp.getPCA(corr0)
    eMax0, var0 = mp.findMaxEval(np.diag(eVal0), q, bWidth)
    nFacts0 = eVal0.shape[0]-np.diag(eVal0)[::-1].searchsorted(eMax0)
    corr1 = mp.denoisedCorr(eVal0, eVec0, nFacts0) #denoising by constant residual eigenvalue method
    cov1 = corr2cov(corr1, np.diag(cov0)**.5)
    return cov1
Example #3
0
def testNCO():
    # Chapter 7 - apply the Nested Clustered Optimization (NCO) algorithm
    N = 5
    T = 5
    S_value = np.array([[1., 2, 3, 4, 5], [1.1, 3, 2, 3, 5],
                        [1.2, 4, 1.3, 4, 5], [1.3, 5, 1, 3, 5],
                        [1.4, 6, 1, 4, 5.5], [1.5, 7, 1, 3, 5.5]])
    S_value[:, 1] = 1
    S_value[5, 1] = 1.1

    S, instrument_returns = calculate_returns(S_value)
    _, instrument_returns = calculate_returns(S_value,
                                              percentageAsProduct=True)

    mu1 = None
    cov1_d = np.cov(S, rowvar=0, ddof=1)

    #test baseClustering
    corr1 = mp.cov2corr(cov)
    a, b, c = nco.NCO()._cluster_kmeans_base(pd.DataFrame(corr1))
    d, e, f = clusterKMeansBase(pd.DataFrame(corr1))
    #b={0: [2, 0], 1: [1], 2: [3, 4]}
    #e={0: [1, 2], 1: [3, 4], 2: [0]}

    min_var_markowitz = mc.optPort(cov1_d, mu1).flatten()
    min_var_NCO = pc.optPort_nco(cov1_d, mu1, max(int(cov1_d.shape[0] / 2),
                                                  2)).flatten()
    mlfinlab_NCO = nco.NCO().allocate_nco(cov1_d, mu1,
                                          max(int(cov1_d.shape[0] / 2),
                                              2)).flatten()

    cov1_d = np.cov(S, rowvar=0, ddof=1)
    mlfinlab_NCO = nco.NCO().allocate_nco(cov1_d, mu1,
                                          int(cov1_d.shape[0] / 2)).flatten()

    expected_return_markowitz = [
        min_var_markowitz[i] * instrument_returns[i]
        for i in range(0, cov1_d.shape[0])
    ]
    e_m = sum(expected_return_markowitz)
    expected_return_NCO = [
        min_var_NCO[i] * instrument_returns[i]
        for i in range(0, cov1_d.shape[0])
    ]
    e_NCO = sum(expected_return_markowitz)
    vol = getVolatility(S_value)
    m_minVol = [
        min_var_markowitz[i] * vol[i] for i in range(0, cov1_d.shape[0])
    ]
    NCO_minVol = [mlfinlab_NCO[i] * vol[i] for i in range(0, cov1_d.shape[0])]
def testNCO():
    N = 5
    T = 5
    S_value = np.array([[1., 2,3,  4,5],
                        [1.1,3,2,  3,5],
                        [1.2,4,1.3,4,5],
                        [1.3,5,1,  3,5],
                        [1.4,6,1,  4,5.5],
                        [1.5,7,1,  3,5.5]])
    S_value[:,1] =1
    S_value[5,1] =1.1

    S, instrument_returns = calculate_returns(S_value)
    _, instrument_returns = calculate_returns(S_value, percentageAsProduct=True)
    
    np.testing.assert_almost_equal(S, pd.DataFrame(S_value).pct_change().dropna(how="all"))
    
    mu1 = None
    cov1_d = np.cov(S ,rowvar=0, ddof=1)

    #test baseClustering
    corr1 = mp.cov2corr(cov1_d)
    a,b,c = nco.NCO()._cluster_kmeans_base(pd.DataFrame(corr1))
    d,e,f = clusterKMeansBase(pd.DataFrame(corr1))
    #b={0: [2, 0], 1: [1], 2: [3, 4]}
    #e={0: [1, 2], 1: [3, 4], 2: [0]}


    min_var_markowitz = mc.optPort(cov1_d, mu1).flatten()
    
    #compare min_var_markowitz with mlfinlab impl
    #ml.
    
    
    min_var_NCO = pc.optPort_nco(cov1_d, mu1, max(int(cov1_d.shape[0]/2), 2)).flatten()  
    mlfinlab_NCO= nco.NCO().allocate_nco(cov1_d, mu1, max(int(cov1_d.shape[0]/2), 2)).flatten()

    cov1_d = np.cov(S,rowvar=0, ddof=1)    
    mlfinlab_NCO= nco.NCO().allocate_nco(cov1_d, mu1, int(cov1_d.shape[0]/2)).flatten()

    expected_return_markowitz = [min_var_markowitz[i]*instrument_returns[i] for i in range(0,cov1_d.shape[0])]
    e_m = sum(expected_return_markowitz)
    expected_return_NCO = [min_var_NCO[i]*instrument_returns[i] for i in range(0,cov1_d.shape[0])]
    e_NCO = sum(expected_return_markowitz)
    vol = getVolatility(S_value)
    m_minVol = [min_var_markowitz[i]*vol[i] for i in range(0, cov1_d.shape[0])] 
    NCO_minVol = [mlfinlab_NCO[i]*vol[i] for i in range(0, cov1_d.shape[0])]   
def randomBlockCorr(nCols, nBlocks, random_state=None, minBlockSize=1):
    #Form block corr
    rng = check_random_state(random_state)

    print("randomBlockCorr:" + str(minBlockSize))
    cov0 = getRndBlockCov(nCols,
                          nBlocks,
                          minBlockSize=minBlockSize,
                          sigma=.5,
                          random_state=rng)
    cov1 = getRndBlockCov(nCols,
                          1,
                          minBlockSize=minBlockSize,
                          sigma=1.,
                          random_state=rng)  #add noise
    cov0 += cov1
    corr0 = mp.cov2corr(cov0)
    corr0 = pd.DataFrame(corr0)
    return corr0
Example #6
0
    min_var_NCO = pc.optPort_nco(cov1_d, mu1,
                                 int(cov1_d.shape[0] / 2)).flatten()
    #note pnames = pnames[1:] - first element is obx

    ########
    T, N = 237, 235
    #x = np.random.normal(0, 1, size = (T, N))
    S, pnames = get_OL_tickers_close(T, N)
    np.argwhere(np.isnan(S))
    S[204, 109] = S[203, 109]

    cov0 = np.cov(S, rowvar=0, ddof=1)
    q = float(S.shape[0]) / float(S.shape[1])  #T/N
    #eMax0, var0 = mp.findMaxEval(np.diag(eVal0), q, bWidth=.01)

    corr0 = mp.cov2corr(cov0)
    eVal0, eVec0 = mp.getPCA(corr0)
    bWidth = best_bandwidth.findOptimalBWidth(np.diag(eVal0))

    min_var_markowitz = mc.optPort(cov1_d, mu1).flatten()
    min_var_NCO = pc.optPort_nco(cov1_d, mu1,
                                 int(cov1_d.shape[0] / 2)).flatten()

    ##################
    # Test if bWidth found makes sense
    pdf0 = mp.mpPDF(1., q=T / float(N), pts=N)
    pdf1 = mp.fitKDE(np.diag(eVal0),
                     bWidth=bWidth['bandwidth'])  #empirical pdf
    #pdf1 = mp.fitKDE(np.diag(eVal0), bWidth=0.1)

    fig = plt.figure()
    matrix_condition_number = max(eVal)/min(eVal)
    print(matrix_condition_number) 
    
    fig, ax = plt.subplots(figsize=(13,10))  
    sns.heatmap(corr1, cmap='viridis')
    plt.show()

    # code snippet 7.3 - NCO method. Step 1. Correlation matrix clustering
    nBlocks, bSize, bCorr = 2, 2, .5
    q = 10.0
    np.random.seed(0)
    mu0, cov0 = mc.formTrueMatrix(nBlocks, bSize, bCorr)
    cols = cov0.columns
    cov1 = mc.deNoiseCov(cov0, q, bWidth=.01) #denoise cov
    cov1 = pd.DataFrame(cov1, index=cols, columns=cols)
    corr1 = mp.cov2corr(cov1)
    corr1, clstrs, silh = oc.clusterKMeansBase(pd.DataFrame(corr0))
    
    # code snippet 7.4 - intracluster optimal allocations
    # step 2. compute intracluster allocations using the denoised cov matrix
    wIntra = pd.DataFrame(0, index=cov0.index, columns=clstrs.keys())
    for i in clstrs:
        wIntra.loc[clstrs[i], i] = minVarPort(cov1.loc[clstrs[i], clstrs[i]]).flatten()
        
    cov2 = wIntra.T.dot(np.dot(cov1, wIntra)) #reduced covariance matrix
    
    # code snippet 7.5 - intercluster optimal allocations
    # step 3. compute optimal intercluster allocations, usint the reduced covariance matrix
    # which is close to a diagonal matrix, so optimization problem is close to ideal case \ro =0
    wInter = pd.Series(minVarPort(cov2).flatten(), index=cov2.index)
    wAll0 = wIntra.mul(wInter, axis=1).sum(axis=1).sort_index()