def pca(d, headers, normalize=True):
    if normalize:
        A = normalize_columns_separately(headers, d).T
    else:
        A = d.get_data(headers).T

# assign to m the mean values of the columns of A
    m = A.mean(axis=0)

    # assign to D the difference matrix A - m
    D = A - m
    # assign to U, S, V the result of running np.svd on D, with full_matrices=False
    U, S, V = np.linalg.svd(D, full_matrices=False)

    N = np.shape(A)[0]

    # the eigenvalues of cov(A) are the squares of the singular values (S matrix)
    #	  divided by the degrees of freedom (N-1). The values are sorted.
    eigenvalues = S * S / (N - 1)

    # project the data onto the eigenvectors. Treat V as a transformation
    #	  matrix and right-multiply it by D transpose. The eigenvectors of A
    #	  are the rows of V. The eigenvectors match the order of the eigenvalues.
    pdata = (V * D.T).T

    # create and return a PCA data object with the headers, projected data,
    # eigenvectors, eigenvalues, and mean vector.
    result = data.PCAData(headers, pdata, eigenvalues, V, m)
    return result
Esempio n. 2
0
def pca(d, headers, normalize=True):
    # assign to A the desired data. Use either normalize_columns_separately
    #   or get_data, depending on the value of the normalize argument.
    if normalize:
        A = normalize_columns_separately(d, headers)
    else:
        A = d.subset(headers)

    # assign to m the mean values of the columns of A
    m = np.mean(A, 0)

    # assign to D the difference matrix A - m
    D = A - m

    # assign to U, S, V the result of running np.svd on D, with full_matrices=False
    U, S, V = np.linalg.svd(D, full_matrices=False)

    # the eigenvalues of cov(A) are the squares of the singular values (S matrix)
    #   divided by the degrees of freedom (N-1). The values are sorted.
    eigenvalues = np.square(S) / (np.size(D, 0) - 1)

    # project the data onto the eigenvectors. Treat V as a transformation
    #   matrix and right-multiply it by D transpose. The eigenvectors of A
    #   are the rows of V. The eigenvectors match the order of the eigenvalues.
    proj_data = V * D.T

    # create and return a PCA data object with the headers, projected data,
    # eigenvectors, eigenvalues, and mean vector.
    return data.PCAData(proj_data.T, V, eigenvalues, m, headers)
Esempio n. 3
0
def pca(d, headers, normalize=True):
    
    # assign to A the desired data
    if (normalize):
        A = normalize_columns_separately(d, headers)
    else:
        A = d.get_data(headers)
  
    # assign to m the mean values of the columns of A
    m = np.mean(A, axis=0)

    # assign to D the difference matrix A - m
    D = A - m

    # assign to U, S, and V the results of SVD
    U, S, V = np.linalg.svd(D, full_matrices=False)

    # the eigenvalues of cov(A) are the squares of the singular values (S matrix)
    #   divided by the degrees of freedom (N-1). The values are sorted.
    evals = S*S/(A.shape[0]-1)

    # project the data onto the eigenvectors. 
    #   The eigenvectors match the order of the eigenvalues.
    proj = (V*D.T).T

    # create and return a PCA data object with the headers, projected data, 
    # eigenvectors, eigenvalues, and mean vector.
    pca = data.PCAData(headers, proj, evals, V, m)
    return pca
Esempio n. 4
0
def pca(d, headers, normalize=True):
    if normalize:
        A = normalize_columns_separately(d, headers)

        m = []
        for i in range(A.shape[1]):
            m.append(np.mean(A[:, i]))
    else:
        A = d.get_data(headers)

        # mean values of the columns of A
        m = np.matrix(mean(d, headers))

    # the difference matrix
    D = A - m

    # singular value decomposition
    U, S, V = np.linalg.svd(D, full_matrices=False)

    # get eigenvalues
    evals = []
    for i in range(len(S)):
        evals.append((math.pow(S[i], 2)) / (A.shape[0] - 1))

    evals = np.matrix(evals)

    # generate projected data
    pdata = (V * D.T).T

    return data.PCAData(headers, pdata, evals, V, m)
Esempio n. 5
0
def pca(d, headers, prenormalize=True):
    if prenormalize:
        A = normalize_columns_separately(headers, d)
    else:
        A = d.limit_columns(headers)

    #do the mean on A so that it's normalized if they wanted to normalize it
    m = np.mean(A, axis=0)
    D = A - m
    U,S,V = np.linalg.svd(D, full_matrices=False)
    
    #D*V.T are the eigenvectors
    #V are the eigenvalues
    return data.PCAData(D*V.T,V,((S**2)/(A.shape[0]-1)),m,headers)
Esempio n. 6
0
def pca(d, headers, normalize=True):
    '''Takes in a Data object and list of column headers. Returns a data.PCAData object.
    By default, data will be prenormalized before pca analysis.'''
    if (normalize):
        A = normalize_columns_separately(headers, d)
    else:
        A = d.columns_data(headers)
    m = np.mean(A, axis=0)
    D = A - m
    U, S, V = np.linalg.svd(D, full_matrices=False)
    N = A.shape[0]
    eigenVals = (S * S) / (N - 1)
    projectedData = (V * D.T).T
    return data.PCAData(projectedData, V, eigenVals, m, headers)
Esempio n. 7
0
def pca(d, headers, normalize=True):

    if normalize:
        A = normalize_columns_separately(headers, d)
    else:
        A = d.get_data(headers)
    # assign to m the mean values of the columns of A
    m = np.mean(A, axis=0)
    # assign to D the difference matrix A - m
    D = A - m
    # assign to U, S, V the result of running np.svd on d
    U, S, V = np.linalg.svd(D, full_matrices=False)
    eVals = np.square(S) / (D.shape[0] - 1)
    pmat = (V * D.T).T
    return data.PCAData(pmat, V, eVals, m, headers)
Esempio n. 8
0
def pca(d, headers, normalize=True):

    print(headers)
    if normalize:
        A = normalize_columns_separately(headers, d)
    else:
        A = d.all_rows_specified_columns(headers)

    m = numpy.mean(A, axis=0)
    D = A - m
    U, S, V = numpy.linalg.svd(D, full_matrices=False)
    N = D.shape[0]
    eigenvalues = S**2 / (N - 1)
    pmat = (V * D.T).T

    return dt.PCAData(pmat, V, eigenvalues, m, headers)
Esempio n. 9
0
def pca(d,headers,normalized=True):
	if(normalized):
		A = normalize_columns_separately(d, headers)
	else:
		A=d.get_data(headers)

	m = np.mean(A, axis=0)
	D=A-m

	U,S,V=np.linalg.svd(D,full_matrices=False)

	N=A.shape[0]
	evals=(S*S)/(N-1)

	projected=(V*D.T).T

	pcaData= data.PCAData(headers,projected,evals,V,m)
	return pcaData
Esempio n. 10
0
def pca(d, headers, normalize=True):
    if normalize:
        A = normalize_columns_separately(d, headers)
    else:
        A = d.get_data(headers)

    m = A.mean(axis=0)
    D = A - m
    U, S, V = np.linalg.svd(D, full_matrices=False)

    # evals = np.square(np.true_divide(S,S.shape[0]-1))
    evals, evecs = np.linalg.eig(np.cov(A, rowvar=False))
    evals[::-1].sort()
    evecs = V.copy()

    pdata = np.transpose(V * np.transpose(D))

    return data.PCAData(headers, pdata, evals, evecs, m)
Esempio n. 11
0
def pca(d, headers, normalize=True):
    if normalize:
        A = normalize_columns_separately(d, headers).T
    else:
        A = d.get_data(headers).T

    C = np.cov(A, rowvar=False)

    W, V = np.linalg.eig(C)
    W = np.real(W)
    V = np.real(V)

    idx = W.argsort()
    idx = idx[::-1]
    W = W[idx]
    V = V[:, idx].T

    m = A.mean(0)

    D = A - m
    projd_data = V * D.T

    return data.PCAData(headers, projd_data.T, W, V, m)
Esempio n. 12
0
def pca(d, headers, normalize=True):

    if normalize == True:
        A = normalize_columns_separately(headers, d)
    else:
        A = d.get_data(headers)
    m = mean(headers, d)
    D = A - m

    #calculate eigenvectors and eigenvalues
    U, S, V = np.linalg.svd(D, full_matrices=False)
    index = 0
    #get the eigenvalues using the number of degress of freedom
    for d in S:
        e = (d * d) / (U.shape[0] - 1)
        S[index] = e
        index = index + 1
    #the projected data
    pdata = np.dot(V, (D.T))
    pdata = pdata.T

    pcad = data.PCAData(headers, pdata, S, V, m)

    return pcad
Esempio n. 13
0
# means of the original data
means = np.matrix([3., 6.])

# eigenvalues of the original data
evals = np.matrix([16.13395443, 0.03271224])

# eigenvectors of the original data as rows
evecs = np.matrix([[0.4527601, 0.89163238], [-0.89163238, 0.4527601]])

# the original data projected onto the eigenvectors.
# pdata = (evecs * (orgdata - means).T).T
pdata = np.matrix([[-4.4720497, -0.02777563], [-2.23602485, -0.01388782],
                   [4.02623351, -0.19860441], [2.68184104, 0.24026787]])

# create a PCAData object
pcad = data.PCAData(pdata, evecs, evals, means, headers)

# Test all of the various new functions
print("Eigenvalues:")
print(pcad.get_eigenvalues())

print("\nEigenvectors:")
print(pcad.get_eigenvectors())

print("\nMeans:")
print(pcad.get_original_means())

print("\nOriginal Headers:")
print(pcad.get_original_headers())

# Test old functions
Esempio n. 14
0
evals = np.matrix([16.13395443, 0.03271224])

# eigenvectors of the original data as rows
evecs = np.matrix([[ 0.4527601,   0.89163238],
                   [-0.89163238,  0.4527601 ]])

# the original data projected onto the eigenvectors.
# pdata = (evecs * (orgdata - means).T).T
pdata = np.matrix([[-4.4720497,  -0.02777563],
                   [-2.23602485, -0.01388782],
                   [ 4.02623351, -0.19860441],
                   [ 2.68184104,  0.24026787]])


# create a PCAData object
pcad = data.PCAData( headers, pdata, evals, evecs, means )

# Test all of the various new functions
print "Eigenvalues:"
print pcad.get_eigenvalues()

print "\nEigenvectors:"
print pcad.get_eigenvectors()

print "\nMeans:"
print pcad.get_data_means()

print "\nOriginal Headers:"
print pcad.get_data_headers()

# Test old functions