def pca(d, headers, normalize=True): if normalize: A = normalize_columns_separately(headers, d).T else: A = d.get_data(headers).T # assign to m the mean values of the columns of A m = A.mean(axis=0) # assign to D the difference matrix A - m D = A - m # assign to U, S, V the result of running np.svd on D, with full_matrices=False U, S, V = np.linalg.svd(D, full_matrices=False) N = np.shape(A)[0] # the eigenvalues of cov(A) are the squares of the singular values (S matrix) # divided by the degrees of freedom (N-1). The values are sorted. eigenvalues = S * S / (N - 1) # project the data onto the eigenvectors. Treat V as a transformation # matrix and right-multiply it by D transpose. The eigenvectors of A # are the rows of V. The eigenvectors match the order of the eigenvalues. pdata = (V * D.T).T # create and return a PCA data object with the headers, projected data, # eigenvectors, eigenvalues, and mean vector. result = data.PCAData(headers, pdata, eigenvalues, V, m) return result
def pca(d, headers, normalize=True): # assign to A the desired data. Use either normalize_columns_separately # or get_data, depending on the value of the normalize argument. if normalize: A = normalize_columns_separately(d, headers) else: A = d.subset(headers) # assign to m the mean values of the columns of A m = np.mean(A, 0) # assign to D the difference matrix A - m D = A - m # assign to U, S, V the result of running np.svd on D, with full_matrices=False U, S, V = np.linalg.svd(D, full_matrices=False) # the eigenvalues of cov(A) are the squares of the singular values (S matrix) # divided by the degrees of freedom (N-1). The values are sorted. eigenvalues = np.square(S) / (np.size(D, 0) - 1) # project the data onto the eigenvectors. Treat V as a transformation # matrix and right-multiply it by D transpose. The eigenvectors of A # are the rows of V. The eigenvectors match the order of the eigenvalues. proj_data = V * D.T # create and return a PCA data object with the headers, projected data, # eigenvectors, eigenvalues, and mean vector. return data.PCAData(proj_data.T, V, eigenvalues, m, headers)
def pca(d, headers, normalize=True): # assign to A the desired data if (normalize): A = normalize_columns_separately(d, headers) else: A = d.get_data(headers) # assign to m the mean values of the columns of A m = np.mean(A, axis=0) # assign to D the difference matrix A - m D = A - m # assign to U, S, and V the results of SVD U, S, V = np.linalg.svd(D, full_matrices=False) # the eigenvalues of cov(A) are the squares of the singular values (S matrix) # divided by the degrees of freedom (N-1). The values are sorted. evals = S*S/(A.shape[0]-1) # project the data onto the eigenvectors. # The eigenvectors match the order of the eigenvalues. proj = (V*D.T).T # create and return a PCA data object with the headers, projected data, # eigenvectors, eigenvalues, and mean vector. pca = data.PCAData(headers, proj, evals, V, m) return pca
def pca(d, headers, normalize=True): if normalize: A = normalize_columns_separately(d, headers) m = [] for i in range(A.shape[1]): m.append(np.mean(A[:, i])) else: A = d.get_data(headers) # mean values of the columns of A m = np.matrix(mean(d, headers)) # the difference matrix D = A - m # singular value decomposition U, S, V = np.linalg.svd(D, full_matrices=False) # get eigenvalues evals = [] for i in range(len(S)): evals.append((math.pow(S[i], 2)) / (A.shape[0] - 1)) evals = np.matrix(evals) # generate projected data pdata = (V * D.T).T return data.PCAData(headers, pdata, evals, V, m)
def pca(d, headers, prenormalize=True): if prenormalize: A = normalize_columns_separately(headers, d) else: A = d.limit_columns(headers) #do the mean on A so that it's normalized if they wanted to normalize it m = np.mean(A, axis=0) D = A - m U,S,V = np.linalg.svd(D, full_matrices=False) #D*V.T are the eigenvectors #V are the eigenvalues return data.PCAData(D*V.T,V,((S**2)/(A.shape[0]-1)),m,headers)
def pca(d, headers, normalize=True): '''Takes in a Data object and list of column headers. Returns a data.PCAData object. By default, data will be prenormalized before pca analysis.''' if (normalize): A = normalize_columns_separately(headers, d) else: A = d.columns_data(headers) m = np.mean(A, axis=0) D = A - m U, S, V = np.linalg.svd(D, full_matrices=False) N = A.shape[0] eigenVals = (S * S) / (N - 1) projectedData = (V * D.T).T return data.PCAData(projectedData, V, eigenVals, m, headers)
def pca(d, headers, normalize=True): if normalize: A = normalize_columns_separately(headers, d) else: A = d.get_data(headers) # assign to m the mean values of the columns of A m = np.mean(A, axis=0) # assign to D the difference matrix A - m D = A - m # assign to U, S, V the result of running np.svd on d U, S, V = np.linalg.svd(D, full_matrices=False) eVals = np.square(S) / (D.shape[0] - 1) pmat = (V * D.T).T return data.PCAData(pmat, V, eVals, m, headers)
def pca(d, headers, normalize=True): print(headers) if normalize: A = normalize_columns_separately(headers, d) else: A = d.all_rows_specified_columns(headers) m = numpy.mean(A, axis=0) D = A - m U, S, V = numpy.linalg.svd(D, full_matrices=False) N = D.shape[0] eigenvalues = S**2 / (N - 1) pmat = (V * D.T).T return dt.PCAData(pmat, V, eigenvalues, m, headers)
def pca(d,headers,normalized=True): if(normalized): A = normalize_columns_separately(d, headers) else: A=d.get_data(headers) m = np.mean(A, axis=0) D=A-m U,S,V=np.linalg.svd(D,full_matrices=False) N=A.shape[0] evals=(S*S)/(N-1) projected=(V*D.T).T pcaData= data.PCAData(headers,projected,evals,V,m) return pcaData
def pca(d, headers, normalize=True): if normalize: A = normalize_columns_separately(d, headers) else: A = d.get_data(headers) m = A.mean(axis=0) D = A - m U, S, V = np.linalg.svd(D, full_matrices=False) # evals = np.square(np.true_divide(S,S.shape[0]-1)) evals, evecs = np.linalg.eig(np.cov(A, rowvar=False)) evals[::-1].sort() evecs = V.copy() pdata = np.transpose(V * np.transpose(D)) return data.PCAData(headers, pdata, evals, evecs, m)
def pca(d, headers, normalize=True): if normalize: A = normalize_columns_separately(d, headers).T else: A = d.get_data(headers).T C = np.cov(A, rowvar=False) W, V = np.linalg.eig(C) W = np.real(W) V = np.real(V) idx = W.argsort() idx = idx[::-1] W = W[idx] V = V[:, idx].T m = A.mean(0) D = A - m projd_data = V * D.T return data.PCAData(headers, projd_data.T, W, V, m)
def pca(d, headers, normalize=True): if normalize == True: A = normalize_columns_separately(headers, d) else: A = d.get_data(headers) m = mean(headers, d) D = A - m #calculate eigenvectors and eigenvalues U, S, V = np.linalg.svd(D, full_matrices=False) index = 0 #get the eigenvalues using the number of degress of freedom for d in S: e = (d * d) / (U.shape[0] - 1) S[index] = e index = index + 1 #the projected data pdata = np.dot(V, (D.T)) pdata = pdata.T pcad = data.PCAData(headers, pdata, S, V, m) return pcad
# means of the original data means = np.matrix([3., 6.]) # eigenvalues of the original data evals = np.matrix([16.13395443, 0.03271224]) # eigenvectors of the original data as rows evecs = np.matrix([[0.4527601, 0.89163238], [-0.89163238, 0.4527601]]) # the original data projected onto the eigenvectors. # pdata = (evecs * (orgdata - means).T).T pdata = np.matrix([[-4.4720497, -0.02777563], [-2.23602485, -0.01388782], [4.02623351, -0.19860441], [2.68184104, 0.24026787]]) # create a PCAData object pcad = data.PCAData(pdata, evecs, evals, means, headers) # Test all of the various new functions print("Eigenvalues:") print(pcad.get_eigenvalues()) print("\nEigenvectors:") print(pcad.get_eigenvectors()) print("\nMeans:") print(pcad.get_original_means()) print("\nOriginal Headers:") print(pcad.get_original_headers()) # Test old functions
evals = np.matrix([16.13395443, 0.03271224]) # eigenvectors of the original data as rows evecs = np.matrix([[ 0.4527601, 0.89163238], [-0.89163238, 0.4527601 ]]) # the original data projected onto the eigenvectors. # pdata = (evecs * (orgdata - means).T).T pdata = np.matrix([[-4.4720497, -0.02777563], [-2.23602485, -0.01388782], [ 4.02623351, -0.19860441], [ 2.68184104, 0.24026787]]) # create a PCAData object pcad = data.PCAData( headers, pdata, evals, evecs, means ) # Test all of the various new functions print "Eigenvalues:" print pcad.get_eigenvalues() print "\nEigenvectors:" print pcad.get_eigenvectors() print "\nMeans:" print pcad.get_data_means() print "\nOriginal Headers:" print pcad.get_data_headers() # Test old functions