Python nearcorr Examples, nearest_correlation.nearcorr Python Examples

Example #1

0

Show file

File: nearest_correlation_unittests.py Project: argriffing/nearest_correlation

    def test_restart(self):

        A = np.array([[1, 1, 0],
                      [1, 1, 1],
                      [0, 1, 1]])

        # Do 3 iterations on A and gather the result
        try:
            Y = nearcorr(A, max_iterations=3)
        except nearest_correlation.ExceededMaxIterationsError as e:
          result3 = np.copy(e.matrix)

        # Do 1 iteration on A
        try:
            X = nearcorr(A, max_iterations=1)
        except nearest_correlation.ExceededMaxIterationsError as e:
            restart = e

        # restart from previous result and do another iteration
        try:
            X = nearcorr(restart, max_iterations=1)
        except nearest_correlation.ExceededMaxIterationsError as e:
            restart = e

        # restart from previous result and do another iteration
        try:
            X = nearcorr(restart, max_iterations=1)
        except nearest_correlation.ExceededMaxIterationsError as e:
            result1 = e.matrix

        self.assertTrue(np.all(result1 == result3))

Example #2

0

Show file

File: nearest_correlation_unittests.py Project: yunjunz/nearest_correlation

    def test_restart(self):

        A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])

        # Do 3 iterations on A and gather the result
        try:
            Y = nearcorr(A, max_iterations=3)
        except nearest_correlation.ExceededMaxIterationsError as e:
            result3 = np.copy(e.matrix)

        # Do 1 iteration on A
        try:
            X = nearcorr(A, max_iterations=1)
        except nearest_correlation.ExceededMaxIterationsError as e:
            restart = e

        # restart from previous result and do another iteration
        try:
            X = nearcorr(restart, max_iterations=1)
        except nearest_correlation.ExceededMaxIterationsError as e:
            restart = e

        # restart from previous result and do another iteration
        try:
            X = nearcorr(restart, max_iterations=1)
        except nearest_correlation.ExceededMaxIterationsError as e:
            result1 = e.matrix

        self.assertTrue(np.all(result1 == result3))

Example #3

0

Show file

def generate_cov_mat(size):
    """ Generates a random size x size  correlation matrix
    
    Parameters
    ----------
    size : int
       Dimension of the square correlation matrix
       
    Returns
    ----------
    A : numpy.ndarray
       Correlation matrix
    """
    
    P = np.zeros((size, size))
    for k in range(size):
        for i in range(size):
            if k == i:
                P[k][i] = 1
            elif k < i:
                P[k][i] = random.uniform(-1, 1)
            else:
                P[k][i] = P[i][k]
    A = nearcorr(P,
                 tol=[],
                 flag=0, 
                 max_iterations=50000,
                 n_pos_eig=0,
                 weights=None,
                 verbose=False,
                 except_on_too_many_iterations=True)
    return A

Example #4

0

Show file

File: nearest_correlation_unittests.py Project: yunjunz/nearest_correlation

    def test_HighamExample2002(self):

        A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])

        X = nearcorr(A)

        expected_result = np.array([[1., 0.76068985, 0.15729811],
                                    [0.76068985, 1., 0.76068985],
                                    [0.15729811, 0.76068985, 1.]])

        self.assertTrue((np.abs((X - expected_result)) < 1e-8).all())

Example #5

0

Show file

File: nearest_correlation_unittests.py Project: yunjunz/nearest_correlation

    def test_Weights(self):
        A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])

        weights = np.array([1, 2, 3])

        X = nearcorr(A, weights=weights)

        expected_result = np.array([[1., 0.66774961, 0.16723692],
                                    [0.66774961, 1., 0.84557496],
                                    [0.16723692, 0.84557496, 1.]])

        self.assertTrue((np.abs((X - expected_result)) < 1e-8).all())

Example #6

0

Show file

File: nearest_correlation_unittests.py Project: yunjunz/nearest_correlation

    def test_NAGExample(self):
        A = np.array([[2, -1, 0, 0], [-1, 2, -1, 0], [0, -1, 2, -1],
                      [0, 0, -1, 2]])

        X = nearcorr(A)

        expected_result = np.array([[1., -0.8084125, 0.1915875, 0.10677505],
                                    [-0.8084125, 1., -0.65623269, 0.1915875],
                                    [0.1915875, -0.65623269, 1., -0.8084125],
                                    [0.10677505, 0.1915875, -0.8084125, 1.]])

        self.assertTrue((np.abs((X - expected_result)) < 1e-8).all())

Example #7

0

Show file

File: nearest_correlation_unittests.py Project: argriffing/nearest_correlation

    def test_HighamExample2002(self):

        A = np.array([[1, 1, 0],
                      [1, 1, 1],
                      [0, 1, 1]])

        X = nearcorr(A)

        expected_result = np.array([[ 1.        ,  0.76068985,  0.15729811],
                                    [ 0.76068985,  1.        ,  0.76068985],
                                    [ 0.15729811,  0.76068985,  1.        ]])

        self.assertTrue((np.abs((X - expected_result)) < 1e-8).all())

Example #8

0

Show file

File: nearest_correlation_unittests.py Project: argriffing/nearest_correlation

    def test_Weights(self):
        A = np.array([[1, 1, 0],
                      [1, 1, 1],
                      [0, 1, 1]])

        weights = np.array([1,2,3])

        X = nearcorr(A, weights = weights)

        expected_result = np.array([[ 1.        , 0.66774961, 0.16723692],
                                    [ 0.66774961, 1.        , 0.84557496],
                                    [ 0.16723692, 0.84557496, 1.        ]])

        self.assertTrue((np.abs((X - expected_result)) < 1e-8).all())

Example #9

0

Show file

File: nearest_correlation_unittests.py Project: argriffing/nearest_correlation

    def test_NAGExample(self):
        A = np.array([[2, -1, 0, 0], 
                      [-1, 2, -1, 0],
                      [0, -1, 2, -1], 
                      [0, 0, -1, 2]])

        X = nearcorr(A)

        expected_result = np.array([[ 1.        , -0.8084125 ,  0.1915875 ,  0.10677505],
                                    [-0.8084125 ,  1.        , -0.65623269,  0.1915875 ],
                                    [ 0.1915875 , -0.65623269,  1.        , -0.8084125 ],
                                    [ 0.10677505,  0.1915875 , -0.8084125 ,  1.        ]])

        self.assertTrue((np.abs((X - expected_result)) < 1e-8).all())

Example #10

0

Show file

File: nearest_correlation_unittests.py Project: yunjunz/nearest_correlation

    def test_ExceededMaxIterationsFalse(self):
        A = np.array([[1, 1, 0], [1, 1, 1], [0, 1, 1]])

        X = nearcorr(A, max_iterations=10, except_on_too_many_iterations=False)

Example #11

0

Show file

def re():

    #tm = np.load("TM_score_matrix.npy")
    #tm = np.loadtxt("../Datasets/Fold_representation/TM_score/Gram_Matrix/TM_matrix/TM_matrix.fa")
    tm = np.load("TM_matrix_2_pdbs-noTER_symmetric.npy")
    print(tm.shape)

    #tm = tm + np.identity(len(tm))*2.50

    model = eigen()

    tm_flat = np.reshape(tm, (-1, ))
    print("original:", np.mean(tm_flat), np.std(tm_flat))
    #plot_hist(tm_flat, "original")

    from nearest_correlation import nearcorr

    print(
        "Frobenius norm:",
        np.linalg.norm(tm - nearcorr(tm, max_iterations=2000, tol=[1E-5]),
                       'fro'))

    tm = nearcorr(tm, max_iterations=2000, tol=[1E-5])

    #tm = tm + np.identity(len(tm))*0.001
    #print ("w", np.mean(w), np.std(w), np.min(w))

    w, v = model.fit(tm)
    print("w", np.mean(w), np.std(w), np.min(w))
    tm_flat1 = np.reshape(tm, (-1, ))
    print("before_centralized", np.mean(tm_flat1), np.std(tm_flat1))
    #plot_hist(tm_flat1, "before_centralized")
    print(sum(w[0:20]) / sum(w))

    tm = model.center(tm, tm)
    #tm = tm + np.identity(len(tm))*0.000001
    tm = tm + np.identity(len(tm)) * 0.001
    tm_flat2 = np.reshape(tm, (-1, ))
    print("after centerlized", np.mean(tm_flat2), np.std(tm_flat2))
    #plot_hist(tm_flat, tm_flat1, tm_flat2, "original", "before_centralized", "after_centralized")
    #plot_relation(tm_flat, tm_flat1, tm_flat2, "original", "before_centralized", "after_centralized")
    w, v = model.fit(tm)
    print("w", np.mean(w), np.std(w), np.min(w))
    print(sum(w[0:20]) / sum(w))
    '''	
	sum_ne=0
	for i in range(len(tm)):
		if(w[i]<0):
			w[i]=0.001
			sum_ne+=1
	#print (sum_ne, np.linalg.norm(v[0]), np.linalg.norm(v[33]))
	
	tm_flat = np.reshape(tm, (-1,))
	print ("previous", np.mean(tm_flat), np.std(tm_flat))

	tm = np.zeros((len(tm), len(tm)))

	for i in range(len(tm)):
		ttt=np.reshape(v[i], (1, len(tm)))
		y = np.multiply(ttt.T, ttt*w[i])
		tm+=y
	# geting the new tm matrix after truncation
	tm = model.center(tm,tm)
		
	w,v = model.fit(tm)


	tm_flat = np.reshape(tm, (-1,))
	print ("now", np.mean(tm_flat), np.std(tm_flat))
	'''
    np.save("../Results/eigenvalues", np.array(w))
    np.save("../Results/eigenvectors", np.array(v))
    #w=np.load("Results/eigenvalues.npy")
    #v=np.load("Results/eigenvectors.npy")
    print(sum(w[0:20]) / sum(w))
    print("min:", min(w))

    import matplotlib.pyplot as plt
    plt.figure()

    z1 = np.linspace(-500, 20)
    z = np.linspace(0, 1.1)
    x = np.arange(0, 1232, 1)
    for i in range(len(x)):
        plt.scatter(x[i], np.sum(w[0:(i + 1)]) / np.sum(w), color='blue', s=4)
    plt.plot(np.zeros(z.shape) + 20, z, linestyle='--', color='black')
    plt.plot(z1,
             np.zeros(z1.shape) + np.sum(w[0:20]) / np.sum(w),
             linestyle='--',
             color='black')

    plt.xlabel("Index", fontsize=12)
    plt.ylabel("Cumulative explained Variance", fontsize=12)
    plt.ylim([0, 1.1])
    plt.xlim([-100, 1400])
    plt.xticks([0, 200, 400, 600, 800, 1000, 1200],
               (0, 200, 400, 600, 800, 1000, 1200))
    plt.yticks([0.0, 0.20, 0.4, 0.6, 0.8, 1], (0.0, 0.20, 0.4, 0.6, 0.8, 1))
    plt.savefig("../Results/explained_variance.eps", format='eps')
    plt.savefig("../Results/explained_variance.png", format='png')
    plt.show()
    plt.close()

    coor = []
    dis = np.zeros((len(tm), len(tm)))

    for i in range(len(tm)):

        temp = []
        temp1 = []
        #temp.append(fold[i])

        for j in range(len(tm)):
            temp.append(str("%7.4f" % (np.dot(tm[i], v[j]) / np.sqrt(w[j]))))
            temp1.append(np.dot(tm[i], v[j]) / np.sqrt(w[j]))

        #top20.append(temp)
        coor.append(temp1)

    folds_name = np.loadtxt("folds_name", dtype='str')
    with open("../Results/folds_coordinate", "w") as f:
        for i in range(len(tm)):
            f.write(folds_name[i])
            f.write(" ")
            for element in coor[i][0:20]:
                f.write("%6.3f " % (element))
            f.write("\n")

    # save coordinates ---------------start

    basis = []

    for i in range(20):
        basis.append(v[i] / np.sqrt(w[i]))

    np.savetxt("../Results/folds_basis", basis, fmt='%s')

    # save coordinates------------------end

    for i in range(20):
        print(np.var(coor, axis=0)[i], w[i] / 1232.)

    #coor=np.loadtxt("Results/folds_coordinate")
    from sklearn.cluster import KMeans

    def fold_center_var(ncluster):

        kmeans = KMeans(n_clusters=ncluster, random_state=0).fit(coor)
        coor100 = kmeans.cluster_centers_  # coor100 shape (ncluster, 1232)

        pca = PCA()
        pca.fit(coor100)

        # coor100 is the cluster heads coordinates

        # we only need to consier two space: 1232d and n_cluster-dimensinal space
        # project the original first 20 basis into these n_cluster-spanned subspace, which are [1,0...], [0,1...].. in 1232d space
        pbasis = pca.components_  #pbasis (n_cluster, 1232)

        coor20pbasis = []  # shape(20, n_cluster)
        for i in range(20):
            obasis = np.zeros((1232, ))
            obasis[i] = 1.

            cw = []
            for j in range(len(pbasis)):
                cw.append(
                    np.dot(obasis, pbasis[j]) / np.linalg.norm(pbasis[j]))

            coor20pbasis.append(cw)

        # calculate variance of n_cluster cluster heads along these 20 basis in n_clusterd

        #1. first calculate the n_cluster cluster heads coordinate in n_cluster d

        coorncluster = []  # shape(n_cluster, n_cluster)
        for i in range(ncluster):
            cw = []
            for j in range(ncluster):
                cw.append(
                    np.dot(coor100[i], pbasis[j]) / np.linalg.norm(pbasis[j]))
            coorncluster.append(cw)

        # then project those points along these 20 dimensional vectors.
        nominator = 0.
        for i in range(20):
            cw = []
            for j in range(ncluster):
                cw.append(
                    np.dot(coor20pbasis[i], coorncluster[j]) /
                    np.linalg.norm(coor20pbasis[i]))

            nominator += np.var(cw)
        denominator = sum(pca.explained_variance_)
        print(nominator / denominator)
        return nominator / denominator

    x = [20, 30, 40, 60, 80, 100, 120, 140, 160, 180, 200]
    y = []
    for xx in x:
        y.append(fold_center_var(xx))
    import matplotlib.pyplot as plt
    plt.plot(x, y)
    plt.yticks(fontsize=14)
    plt.ylim([0, 1.1])
    plt.xlim([0, 200])
    plt.xticks(x, x)
    plt.xlabel("#clusters", fontsize=14)
    plt.ylabel("Cumulative explained Variance", fontsize=12)
    plt.savefig("../Results/explained_variance_after_clustering.eps",
                format='eps')
    plt.savefig("../Results/explained_variance_after_clustering.png",
                format='png')
    plt.show()

    exit(0)
    '''
	exit(0)


	z1=np.linspace(-100,20)

	z=np.linspace(0,1)
	plt.plot(np.zeros(z.shape)+20, z , linestyle='--', color='black')

	plt.plot(z1, np.zeros(z1.shape)+np.sum(w[0:20])/np.sum(w), linestyle='--', color='grey')
#	plt.text(22, np.sum(w[0:20])/np.sum(w), "[%.1f, 20]" %())
	for i in range(len(x)):
		plt.scatter(x[i], np.sum(w[0:(i+1)])/np.sum(w), color='blue' , s=4)

	print np.sum(w[0:20])/np.sum(w)
	#plt.xticks(x, (1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20))
	plt.xlabel("Index", fontsize=12)
	plt.ylabel("Cumulative explained Variance", fontsize=12)
	plt.ylim([0,1])
	plt.xlim([-100,1400])
	plt.xticks([0,  200, 400, 600, 800, 1000, 1200],(0, 200, 400, 600, 800, 1000, 1200))
	plt.yticks([0.0, 0.20, 0.4, 0.6, 0.8, 1], (0.0, 0.20,0.4,0.6,0.8,1))
	plt.savefig("curve.eps", format='eps')
	plt.show()
	plt.close()
	exit(0)
	'''
    classs = []
    fold = []
    top20 = []
    for lines in open("../represent_file"):
        classs.append(lines[5])

        for b in range(8, 12):
            if (lines[b] == '.'):
                fold.append(lines[5:b])
                break

#	print fold, len(fold)

    coor = []
    dis = np.zeros((len(tm), len(tm)))

    for i in range(len(tm)):

        temp = []
        temp1 = []
        temp.append(fold[i])

        for j in range(500):
            temp.append(str("%7.4f" % (np.dot(tm[i], v[j]) / np.sqrt(w[j]))))
            temp1.append(np.dot(tm[i], v[j]) / np.sqrt(w[j]))

        top20.append(temp)
        coor.append(temp1)

    from sklearn.cluster import KMeans
    kmeans = KMeans(n_clusters=100, random_state=0).fit(coor)
    var = np.var(np.array(kmeans.cluster_centers_), axis=0)
    print np.sum(var[0:20]) / np.sum(var[0:100])
    #	for i in range(1232):
    #		print ("%.2f %.3f %.3f" % (w[i], w[i]/len(w), np.var(np.array(coor).T[i]) ))

    exit(0)

    np.savetxt("folds_coordinate", top20, fmt='%s')

    #basis
    basis = []

    for i in range(20):
        basis.append(v[i] / np.sqrt(w[i]))

    np.savetxt("folds_basis", basis, fmt='%s')

    for i in range(len(w)):
        print w[i]

    minm = 1.0
    '''
	for i in range(len(tm)):
		for j in range(len(tm)):
			dis[i][j]=np.linalg.norm(np.subtract(coor[i],coor[j]))
			if(i!=j and dis[i][j]<minm):
				minm=dis[i][j]
			print ("%.4f " %(dis[i][j])),

		print ('\n')
'''

    for i in range(20):
        print("%.2f %.3f %.3f" %
              (w[i], w[i] / len(w), np.var(np.array(coor).T[i])))

    exit(0)
    #----------------1/11/2019----------------------------

    for threhold in np.linspace(0.3, 1, 8):

        tm_new = delete_fold(tm)

        #print len(tm_new), len(tm_new[0])

        w, v = linalg.eig(tm_new)
        #print np.min(w)

        if (np.min(w) < 0):
            tm_new = np.add(tm_new, -np.min(w) * np.identity(len(tm_new)))

        w, v = linalg.eig(tm_new)
        v = np.transpose(v)
        #print np.min(w)

        for i in range(len(w)):
            for j in range(i + 1, len(w)):
                if (w[i] < w[j]):
                    temp = w[i]
                    w[i] = w[j]
                    w[j] = temp
                    temp = v[i]
                    v[i] = v[j]
                    v[j] = temp

        wnorm = w / np.sum(w)

        for i in range(len(tm_new)):
            if (np.sum(wnorm[0:i + 1]) > 0.5):
                print("%.3f	%.0f	%.0f\n" % (threhold, i, len(tm_new))),
                break

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import axes3d

    plt.figure()

    x = np.arange(0, 20, 1)

    for i in range(50):
        print("%d	 %.4f	%.4f" % (i + 1, wnorm[i], np.sum(wnorm[0:i + 1])))

    exit(0)

    for i in range(len(x)):
        plt.bar(x[i], w[i], color='blue', width=0.7)

    #plt.savefig("top20eigv.png", format='png')
    plt.close()

    x1 = []
    x2 = []
    x3 = []

    top20 = []
    for i in range(len(tm)):
        x1.append(np.dot(tm[i], v[0]) / np.sqrt(w[0]))
        x2.append(np.dot(tm[i], v[1]) / np.sqrt(w[1]))
        x3.append(np.dot(tm[i], v[2]) / np.sqrt(w[2]))

        #print x1[-1],x2[-1],x3[-1], v[0][i]*np.sqrt(w[0]), v[1][i]*np.sqrt(w[1]), v[2][i]*np.sqrt(w[2])

    classs = []
    fold = []
    for lines in open("../represent_file"):
        classs.append(lines[5])

        for b in range(8, 12):
            if (lines[b] == '.'):
                fold.append(lines[5:b])
                break


#	print fold, len(fold)

    for i in range(len(tm)):

        temp = []

        #temp.append(fold[i])

        for j in range(20):
            temp.append(str("%7.4f" % (np.dot(tm[i], v[j]) / np.sqrt(w[j]))))

        top20.append(temp)

    #np.savetxt("folds_coordinate", top20, fmt='%s')

    #basis
    basis = []

    for i in range(20):
        basis.append(v[i] / np.sqrt(w[i]))
    #np.savetxt("fold_basis", basis)

    c = []

    colormap = 'rainbow'

    class1 = {
        'a': 'red',
        'b': 'pink',
        'c': 'orange',
        'd': 'yellow',
        'e': 'green',
        'f': 'blue',
        'g': 'purple'
    }

    fig = plt.figure()
    ax = fig.gca(projection='3d')

    label = {
        'a': r'$\alpha$',
        'b': r'$\beta$',
        'c': r'$\alpha/\beta$',
        'd': r'$\alpha+\beta$',
        'e': r'$\alpha and \beta$',
        'f': 'Membrane and cell surface',
        'g': 'Small proteins'
    }
    judge = {}
    for i in range(len(x1)):

        if classs[i] in judge:
            ax.scatter(x1[i], x2[i], x3[i], alpha=0.8, c=class1[classs[i]])
        else:
            ax.scatter(x1[i],
                       x2[i],
                       x3[i],
                       alpha=0.8,
                       c=class1[classs[i]],
                       label=label[classs[i]])
            judge[classs[i]] = 1

    ax.set_xlabel("PC1")
    ax.set_ylabel("PC2")
    ax.set_zlabel("PC3")
    plt.legend(loc="upper left")

    #plt.close()
    fig = plt.figure()
    ax = fig.gca(projection='3d')

    withold = np.loadtxt("withhold_folds", dtype='str')

    j = 0
    for i in range(len(x1)):

        if fold[i] in withold:
            j += 1
            #			print j
            ax.scatter(x1[i], x2[i], x3[i], alpha=0.8, c='blue')
        else:
            ax.scatter(x1[i], x2[i], x3[i], alpha=0.8, c='green')

    ax.set_xlabel("PC1")
    ax.set_ylabel("PC2")
    ax.set_zlabel("PC3")
    plt.legend(loc="upper left")

    #plt.show()

    plt.close()

    # t-SNE
    from sklearn.manifold import TSNE

    t_embed = TSNE(n_components=3).fit_transform(top20)

    #	print t_embed.shape

    judge = {}
    for i in range(len(x1)):

        if classs[i] in judge:
            ax.scatter(t_embed[i][0],
                       t_embed[i][1],
                       t_embed[i][2],
                       alpha=0.8,
                       c=class1[classs[i]])
        else:
            ax.scatter(t_embed[i][0],
                       t_embed[i][1],
                       t_embed[i][2],
                       alpha=0.8,
                       c=class1[classs[i]],
                       label=label[classs[i]])
            judge[classs[i]] = 1

    ax.set_xlabel("PC1")
    ax.set_ylabel("PC2")
    ax.set_zlabel("PC3")
    plt.legend(loc="upper left")

    plt.show()

Example #12

0

Show file

from scipy.stats import random_correlation

from knowledge_gradient import KG_Alg
from knowledge_gradient import KG_multi
from knowledge_gradient import update_mu_S

if __name__ == "__main__":
   processes = mp.cpu_count()
   pool = mp.Pool(processes)
   
   np.random.seed(126)
   
   g = 7/sum([.5, .8, 1.2, 2.5, 1.7, 2.1, 2.2])
   G = np.round(random_correlation.rvs((g*.5, g*.8, g*1.2, g*2.5, g*1.7, g*2.1, g*2.2)), 3)
   
   S = nearcorr(G, tol=[], flag=0, max_iterations=1000, n_pos_eig=0, weights=None,
                verbose=False, except_on_too_many_iterations=True)
   
   M = S.shape[0]
   
   lambda_ = np.array([0.2, 1.1, 1.3, 0.12, 0.4, 0.3, 0.12])
   
   mu = np.array([0.2, 0.21, 0.92, 0.11, 0.7, 0.2, -0.1])
   
   print(KG_Alg(mu, S, lambda_))
   
   print(KG_multi(mu, S, lambda_, pool))
   
   y=0.22
   x=3
   mu_1, S_1 = update_mu_S(mu, S, lambda_, x, y)
   print(mu_1.shape)

Example #13

0

Show file

File: corr3.py Project: myso42/PyTalk

for tkr in good_tickers:
    tmpdf = df[df.Ticker == tkr]["Adj Close"][dte1:dte2]
    tmprtndf = ((tmpdf - tmpdf.shift(1)) / tmpdf).dropna()
    rsdf = tmprtndf / tmprtndf.std()
    rtndf = pd.concat([rtndf, rsdf], axis=1)

rtndf = rtndf.dropna()
rtndf.columns = good_tickers

t, m = rtndf.shape
cmat = rtndf.corr()

evls, evcs = LA.eig(cmat)
rcmat = abs(np.dot(np.dot(evcs, np.diag(evls)), LA.inv(evcs)))
evallst = map(abs, evls)

filtvals = [val for val in evallst if val < lamplus(t, m)]
sevlist = [np.mean(filtvals)] * len(filtvals)
feval = evallst[: (len(evallst) - len(sevlist))] + sevlist

rcmat = abs(np.dot(np.dot(evcs, np.diag(feval)), LA.inv(evcs)))
rcmat = (rcmat + rcmat.T) / 2
ncorr = nearcorr(rcmat, max_iterations=1000)

ncorrdf = pd.DataFrame(ncorr, columns=good_tickers, index=good_tickers)

# Start the clustering

sns.clustermap(1 - abs(ncorrdf))
plt.show()

Example #14

0

Show file

File: corr3.py Project: steve98654/PyTalk

for tkr in good_tickers:
    tmpdf = df[df.Ticker == tkr]['Adj Close'][dte1:dte2]
    tmprtndf = ((tmpdf - tmpdf.shift(1)) / tmpdf).dropna()
    rsdf = tmprtndf / tmprtndf.std()
    rtndf = pd.concat([rtndf, rsdf], axis=1)

rtndf = rtndf.dropna()
rtndf.columns = good_tickers

t, m = rtndf.shape
cmat = rtndf.corr()

evls, evcs = LA.eig(cmat)
rcmat = abs(np.dot(np.dot(evcs, np.diag(evls)), LA.inv(evcs)))
evallst = map(abs, evls)

filtvals = [val for val in evallst if val < lamplus(t, m)]
sevlist = [np.mean(filtvals)] * len(filtvals)
feval = evallst[:(len(evallst) - len(sevlist))] + sevlist

rcmat = abs(np.dot(np.dot(evcs, np.diag(feval)), LA.inv(evcs)))
rcmat = (rcmat + rcmat.T) / 2
ncorr = nearcorr(rcmat, max_iterations=1000)

ncorrdf = pd.DataFrame(ncorr, columns=good_tickers, index=good_tickers)

# Start the clustering

sns.clustermap(1 - abs(ncorrdf))
plt.show()

Example #15

0

Show file

File: nearest_correlation_unittests.py Project: argriffing/nearest_correlation

    def test_ExceededMaxIterationsFalse(self):
        A = np.array([[1,1,0],
                      [1,1,1],
                      [0,1,1]])

        X = nearcorr(A,max_iterations=10,except_on_too_many_iterations=False)