def pca_algorithm(self, original_data): global stop_thread data = np.matmul(original_data.transpose(), original_data) # w, v = [], [] # pri_time = threading.Thread(name='print_time', target=print_time) # pri_time.start() # print("{}".format(original_data.shape[0] * original_data.shape[1])) # exit(0) data_size = original_data.shape[0] * original_data.shape[1] global step_size step_size = 100 / (1 + data_size / 490) progress_thread = threading.Thread(name='progress_bar', target=progress_bar) progress_thread.start() # Compute the eigenvalues \lambda_{i} and eigenvectors v_{i} of w, v = linalg.eig(data) stop_thread = True print("finish culc") # pri_time.join() progress_thread.join() # Sort the highest eigenvalues argwsort = np.argsort(w)[-self.k_reduction:][::-1] # From the highest eigenvalues chose k eigenvectors u = [list(value) for (i, value) in enumerate(v) if i in argwsort] return u
def ksc_center(X, C, k, center): [m, d] = X.shape clusters = empty((0, d)) for j in range(0, m): if int(C[j]) == k: if sum(center) == 0: opt_xi = X[j] else: [tmp, opt_xi, tmps] = dist_ts_shift(center, X[j]) opt_xi = opt_xi.reshape((1, d)) clusters = append(clusters, opt_xi, axis=0) cluster_member_num = clusters.shape[0] if cluster_member_num == 0: return zeros((1, d)) else: cluster_sample_norms = sum(clusters ** 2, axis=-1) ** 1./2 cluster_sample_norms = cluster_sample_norms.reshape([cluster_member_num, 1]) # transpose doesn't work (one dimension) cluster_sample_norms = tile(cluster_sample_norms, d) clusters = divide(clusters, cluster_sample_norms) M = cluster_member_num * identity(d) - dot(transpose(clusters), clusters) eig_val, eig_vec = linalg.eig(M) idx = eig_val.argsort() eig_vec = eig_vec[:, idx] eig_vec = eig_vec[:, 0].reshape([1, d]) if sum(eig_vec) < 0: eig_vec = -eig_vec return eig_vec
def problem4(data, outDir, outName, targetValue): resultOutLoc = os.path.join(outDir, '4', outName + '.txt') if os.path.exists(resultOutLoc): return if not os.path.exists(os.path.dirname(resultOutLoc)): os.makedirs(os.path.dirname(resultOutLoc)) p = phi(data[TRAIN]) tVect = tListToTVector(data[TRAIN_LABELS]) eignValues = eig(dot(p.transpose(), p))[0] convergenceDelta = .1 lastAlpha = 0.0 lastBeta = 0.0 alpha = 1.0 beta = 1.0 while abs(lastAlpha - alpha) > convergenceDelta or abs(lastBeta - beta) > convergenceDelta: lastAlpha = alpha lastBeta = beta m = mN(lastAlpha, lastBeta, p, tVect) g = gamma(lastAlpha, lastBeta, eignValues) alpha = Float64(alphaF(g, m)) beta = Float64(betaF(g, m, data)) testMSE = MSE(data[TEST], mN(alpha, beta, p, tVect), data[TEST_LABELS]) outFile = open(resultOutLoc, 'w') outFile.write('\t'.join([str(alpha), str(beta), str(testMSE)])) outFile.close()
def princomp_B(A,numpc=0): """ Compute 1st Principal Component. Function modified from: http://glowingpython.blogspot.ch/2011/07/principal-component-analysis-with-numpy.html Parameters ---------- A : object of type MstConfiguration Contains the following attributes: subtract_column_mean_at_start (bool), debug (bool), use_gfp_peaks (bool), force_avgref (bool), set_gfp_all_1 (bool), use_smoothing (bool), gfp_type_smoothing (string), smoothing_window (int), use_fancy_peaks (bool), method_GFPpeak (string), original_nr_of_maps (int), seed_number (int), max_number_of_iterations (int), ERP (bool), correspondance_cutoff (double): numpc : int number of principal components Returns ------- coeff: array first principal component """ M=A.T a=numpy.dot(M,M.T) #get covariance matrix by matrix multiplication of data with transposed data [latent,coeff]=eig(a) p = size(coeff,axis=1) idx = argsort(latent) # sorting the eigenvalues idx = idx[::-1] # in ascending order # sorting eigenvectors according to the sorted eigenvalues coeff = coeff[:,idx] #latent = latent[idx] # sorting eigenvalues if numpc < p or numpc >= 0: coeff = coeff[:,range(numpc)] # cutting some PCs #score = dot(coeff.T,M) # projection of the data in the new space return coeff
def princomp_B(A, numpc=0): """ Compute 1st Principal Component. Function modified from: http://glowingpython.blogspot.ch/2011/07/principal-component-analysis-with-numpy.html Parameters ---------- A : object of type MstConfiguration Contains the following attributes: subtract_column_mean_at_start (bool), debug (bool), use_gfp_peaks (bool), force_avgref (bool), set_gfp_all_1 (bool), use_smoothing (bool), gfp_type_smoothing (string), smoothing_window (int), use_fancy_peaks (bool), method_GFPpeak (string), original_nr_of_maps (int), seed_number (int), max_number_of_iterations (int), ERP (bool), correspondance_cutoff (double): numpc : int number of principal components Returns ------- coeff: array first principal component """ M = A.T a = np.dot( M, M.T ) #get covariance matrix by matrix multiplication of data with transposed data [latent, coeff] = eig(a) p = size(coeff, axis=1) idx = argsort(latent) # sorting the eigenvalues idx = idx[::-1] # in ascending order # sorting eigenvectors according to the sorted eigenvalues coeff = coeff[:, idx] #latent = latent[idx] # sorting eigenvalues if numpc < p or numpc >= 0: coeff = coeff[:, list(range(numpc))] # cutting some PCs #score = dot(coeff.T,M) # projection of the data in the new space return coeff
def get_tip_eigenbasis_expansion(z=.1,freq=1000,a=30,\ smoothing=0,reload_signal=True,\ *args,**kwargs): """Appears to render noise past the first 15 eigenvalues. Smoothing by ~4 may be justified, removing zeros probably not...""" # Rely on Lightning Rod Model only to load tip data from file in its process of computing a signal # if reload_signal: tip.verbose=False signal=tip.LRM(freq,rp=mat.Au.reflection_p,zmin=z,amplitude=0,\ normalize_to=None,normalize_at=1000,Nzs=1,demodulate=False,\ *args,**kwargs) tip.verbose=True global L,g,M,alphas,P,Ls,Es #Get diagonal basis for the matrix L=tip.LRM.LambdaMatrix(tip.LRM.qxs) g=numpy.matrix(numpy.diag(-tip.LRM.qxs*numpy.exp(-2*tip.LRM.qxs*z/numpy.float(a))*tip.LRM.wqxs)) M=AWA(L*g,axes=[tip.LRM.qxs]*2,axis_names=['q']*2) #Smooth along s-axis (first), this is where we truncated the integral xform if smoothing: M=numrec.smooth(M,axis=0,window_len=smoothing) alphas,P=linalg.eig(numpy.matrix(M)) P=numpy.matrix(P) Ls=numpy.array(P.getI()*tip.LRM.Lambda0Vector(tip.LRM.qxs)).squeeze() Es=numpy.array(numpy.matrix(tip.LRM.get_dipole_moments(tip.LRM.qxs))*g*P).squeeze() Rs=-Es*Ls/alphas**2 Ps=1/alphas return {'Rs':Rs,'Ps':Ps,'Es':Es,'alphas':alphas,'Ls':Ls}
def create_matrix(m, k, p, N, iteration_number, starting_conditions,w): x = [random.choice([-1, 1]) for _ in range(0, N)] A = zeros(shape=(N, N)) for i in range(0, N): for j in range(0, N): if i == j: A[i][j] = k elif j > i: A[i][j] = (-1) ** (j + 1) * (m / (j + 1)) elif j == (i - 1): A[i][j] = m / (i + 1) x_copy = x b = dot(A, x) D = diag(A) R = A - diagflat(D) x = copy.copy(starting_conditions) x_norm = p + 1 i = 0 B = R/D e_vals, e_vect = linalg.eig(B) # print(";".join((str(N), str(max(abs(e_vals)))))) # print "results for ||x(i+1) - x(i): " while (x_norm >= p) and (i < iteration_number): prev_x = x.copy() for j in range(0, N): d = 0 for k in range(0, N): if j != k: d = d + A[j][k] * x[k] x[j] = (1 - w) * x[j] + (w/A[j][j])*(b[j] - d) x_norm = linalg.norm(x - prev_x, inf) # norma typu max po kolumnach i += 1 print(";".join((str(w), str("%.1e" % p), str(N), str(i), str("%.3e" % linalg.norm(x_copy - x)), str("%.3e" % linalg.norm(x_copy - x, inf)))) + ";"), x = x_copy b = dot(A, x) x = starting_conditions b_norm = p + 1 i = 0 # print "results for ||Ax(i) -b ||" while (b_norm >= p) and (i < iteration_number): for j in range(0, N): d = 0 for k in range(0, N): if j != k: d = d + A[j][k] * x[k] x[j] = (1 - w) * x[j] + (w/A[j][j])*(b[j] - d) b_norm = linalg.norm(dot(A, x) - b, inf) i += 1 print(";".join((str(i), str("%.3e" % linalg.norm(x_copy - x)), str("%.3e" % linalg.norm(x_copy - x, inf)))))
def get_laplacian_pe(self, sign_flip: bool = False) -> torch.Tensor: # get the eigenvectors of the laplacian matrix laplacian_matrix = nx.laplacian_matrix(nx.Graph(self.dag)).toarray() eigenvalues, eigenvectors = linalg.eig(laplacian_matrix) # set the eigenvectors with the smallest eigenvalues as PE idx = eigenvalues.argsort()[::-1][:self.pos_enc_dim] eigenvectors = eigenvectors[:, idx] if sign_flip: # randomly flip signs of eigenvectors signs = np.random.uniform(size=self.pos_enc_dim) > 0.5 signs = signs.astype(int) eigenvectors *= signs pos_enc = torch.from_numpy(eigenvectors).float() return pos_enc
def hsqrt(A): ''' Computes Hermitian square root of input Hermitian matrix A Parameters ---------- A : (N, N) array_like A square array of real or complex elements Returns ------- B : (N, N) ndarray (matrix) Hermitian square root of Ax ''' w, V = eig(A) D = np.diag(np.sqrt(w)) B = dot(V, dot(D, V.T)) return B
def jointprior(self, m1, m2, m3, d): """ Return expected relative entropy, surprise and standard deviation for dist1 and dist2 having joint prior dist3. Note that the generalized chi-squared distribution is approximated to be central in this case. This is only a good approximation if the joint prior is weak compared to m1 and m2. :param m1: moments instance for dist1 :param m2: moments instance for dist2 :param m3: moments instance for dist3 :param d: dimensionality :returns D, ere, S, sigmaD, lambdas, deltamu: Relative entropy, expected relative entropy, surprise, sigma(D), approximate lambdas and dmu of generalized chi-squared """ if m3 is None: mes = 'Samples or moments of joint prior has to be specified' raise Warning(mes) Dpart, deltamu = self.getRelEnt(m1, m2, d) Q = m2.icov - m3.icov T = (m3.mean - m1.mean) * m3.icov W = m2.cov * m1.icov * m2.cov ASigma = (Q * W) * (identity(d) + Q * m1.cov) lambdas = eig(ASigma)[0] twt = (T * W * T.T).A1[0] ere = .5 * (Dpart + trace(ASigma) + twt) D = .5 * (Dpart + deltamu) S = D - ere temp = W * (Q + Q * m1.cov * Q) * W sigmaD = trace(ASigma * ASigma) + 2 * (T * temp * T.T).A1[0] sigmaD = sqrt(.5 * sigmaD) # That step is a kind of crude correction for the ignored # non-centrality of the chi-squared deltamu -= twt return D, ere, S, sigmaD, lambdas, deltamu
def jointprior(self, m1, m2, m3, d): """ Return expected relative entropy, surprise and standard deviation for dist1 and dist2 having joint prior dist3. Note that the generalized chi-squared distribution is approximated to be central in this case. This is only a good approximation if the joint prior is weak compared to m1 and m2. :param m1: moments instance for dist1 :param m2: moments instance for dist2 :param m3: moments instance for dist3 :param d: dimensionality :returns D, ere, S, sigmaD, lambdas, deltamu: Relative entropy, expected relative entropy, surprise, sigma(D), approximate lambdas and dmu of generalized chi-squared """ if m3 is None: mes='Samples or moments of joint prior has to be specified' raise Warning(mes) Dpart, deltamu = self.getRelEnt(m1, m2, d) Q = m2.icov - m3.icov T = (m3.mean - m1.mean) * m3.icov W = m2.cov * m1.icov * m2.cov ASigma = (Q * W) * (identity(d) + Q * m1.cov) lambdas = eig(ASigma)[0] twt = (T * W * T.T).A1[0] ere = .5 * (Dpart + trace(ASigma) + twt) D = .5 * (Dpart + deltamu) S = D - ere temp = W * (Q + Q * m1.cov * Q) * W sigmaD = trace(ASigma * ASigma) + 2 * (T * temp * T.T).A1[0] sigmaD = sqrt(.5 * sigmaD) # That step is a kind of crude correction for the ignored # non-centrality of the chi-squared deltamu -= twt return D, ere, S, sigmaD, lambdas, deltamu
def decomTest(): ''' 半导体制造数据集测试,及可视化 :return: ''' dataMat = pca.replaceNanWithMean() meanVals = mean(dataMat, axis=0) # 计算均值 meanRemoved = dataMat - meanVals # 去除均值 covMat = cov(meanRemoved, rowvar=0) # 计算协方差 eigVals, eigVects = linalg.eig(mat(covMat)) # 计算协方差矩阵的特征值和特征向量 print "eigVals >>> ", eigVals eigValInd = argsort(eigVals) # eigValInd = eigValInd[::-1] # sortedEigVals = eigVals[eigValInd] total = sum(sortedEigVals) varPercentage = sortedEigVals / total * 100 plotter.plotter2(varPercentage) # figure_2.png
def __ordinary_fit(self, x: np.ndarray): self.X = x mu = x.mean(axis=0) x_centered = x - mu eigen_values, eigen_vectors = eig(x_centered.T.dot(x_centered)) # eigen_vectors = x_centered.T.dot(eigen_vectors) norms = [] for n in np.linalg.norm(eigen_vectors, axis=0): if n == 0: norms.append(1.0) continue norms.append(n) norms = np.array(norms) eigen_vectors = eigen_vectors / norms eigen_vectors = eigen_vectors.T eigen_values, eigen_vectors = zip(*sorted( zip(eigen_values, eigen_vectors), key=lambda t: t[0], reverse=True)) eigen_vectors = np.array(eigen_vectors) self.eigen_vectors = np.array(eigen_vectors[:self.k]).T
def create_matrix(m, k, p, N, iteration_number, starting_conditions): x = [random.choice([-1, 1]) for _ in range(0, N)] A = zeros(shape=(N, N)) for i in range(0, N): for j in range(0, N): if i == j: A[i][j] = k elif j > i: A[i][j] = (-1) ** (j + 1) * (m / (j + 1)) elif j == (i - 1): A[i][j] = m / (i + 1) x_copy = x b = dot(A, x) D = diag(A) R = A - diagflat(D) x = starting_conditions x_norm = p + 1 i = 0 B = R/D e_vals, e_vect = linalg.eig(B) print(";".join((str(N), str(max(abs(e_vals)))))) # print "results for ||x(i+1) - x(i): " while (x_norm >= p) or (i > iteration_number): prev_x = x x = (b - dot(R, x)) / D x_norm = linalg.norm(x - prev_x, inf) # norma typu max po kolumnach i += 1 # print ";".join((str(N), str("%.8f" % p), str(i), str("%.15f" % linalg.norm(x_copy - x)), str("%.15f" % linalg.norm(x_copy - x, inf)))) + ";", x = x_copy b = dot(A, x) D = diag(A) R = A - diagflat(D) x = starting_conditions b_norm = p + 1 i = 0 # print "results for ||Ax(i) -b ||" while (b_norm >= p) or (i > iteration_number): x = (b - dot(R, x)) / D b_norm = linalg.norm(dot(A, x) - b, inf) i += 1
def create_matrix(m, k, p, N, iteration_number, starting_conditions): x = [random.choice([-1, 1]) for _ in range(0, N)] A = zeros(shape=(N, N)) for i in range(0, N): for j in range(0, N): if i == j: A[i][j] = k elif j > i: A[i][j] = (-1)**(j + 1) * (m / (j + 1)) elif j == (i - 1): A[i][j] = m / (i + 1) x_copy = x b = dot(A, x) D = diag(A) R = A - diagflat(D) x = starting_conditions x_norm = p + 1 i = 0 B = R / D e_vals, e_vect = linalg.eig(B) print(";".join((str(N), str(max(abs(e_vals)))))) # print "results for ||x(i+1) - x(i): " while (x_norm >= p) or (i > iteration_number): prev_x = x x = (b - dot(R, x)) / D x_norm = linalg.norm(x - prev_x, inf) # norma typu max po kolumnach i += 1 # print ";".join((str(N), str("%.8f" % p), str(i), str("%.15f" % linalg.norm(x_copy - x)), str("%.15f" % linalg.norm(x_copy - x, inf)))) + ";", x = x_copy b = dot(A, x) D = diag(A) R = A - diagflat(D) x = starting_conditions b_norm = p + 1 i = 0 # print "results for ||Ax(i) -b ||" while (b_norm >= p) or (i > iteration_number): x = (b - dot(R, x)) / D b_norm = linalg.norm(dot(A, x) - b, inf) i += 1
def complementary(self, m1, m2, d): """ Return expected relative entropy, surprise and standard deviation for dist1 being the prior of dist2. :param m1: moments instance for dist1 :param m2: moments instance for dist2 :param d: dimensionality :returns D, ere, S, sigmaD, lambdas, deltamu: Relative entropy, expected relative entropy, surprise, sigma(D), lambdas and dmu of generalized chi-squared """ Dpart, deltamu = self.getRelEnt(m1, m2, d) ASigma = matrix(identity(d) - m1.icov * m2.cov) lambdas = eig(ASigma)[0] ere = -.5 * log(det(m2.cov) / det(m1.cov)) D = .5 * (Dpart + deltamu) S = D - ere sigmaD = trace(ASigma * ASigma) sigmaD = sqrt(.5 * sigmaD) return D, ere, S, sigmaD, lambdas, deltamu
def replacement(self, m1, m2, d): """ Return expected relative entropy, surprise and standard deviation for dist1 and dist2 being separately analysed posteriors. :param m1: moments instance for dist1 :param m2: moments instance for dist2 :param d: dimensionality :returns D, ere, S, sigmaD, lambdas, deltamu: Relative entropy, expected relative entropy, surprise, sigma(D), lambdas and dmu of generalized chi-squared """ Dpart, deltamu = self.getRelEnt(m1, m2, d) ASigma = matrix(m1.icov * m2.cov + identity(d)) lambdas = eig(ASigma)[0] ere = .5 * (Dpart + trace(ASigma)) D = .5 * (Dpart + deltamu) S = D - ere sigmaD = trace(ASigma * ASigma) sigmaD = sqrt(.5 * sigmaD) return D, ere, S, sigmaD, lambdas, deltamu
def pca(dataMat, topNfeat): meanValues = mean(dataMat, axis=0) # count mean meanRemoved = dataMat - meanValues # subtract mean covMat = cov(meanRemoved, rowvar=0) # count cov every feature eigVals, eigVects = linalg.eig(mat(covMat)) # 求特征值和特征向量 print(eigVals) eigValInd = argsort(-eigVals) # sort from big to small, 1×n print(eigValInd) eigValInd = eigValInd[:topNfeat] # select top feature 1×r print(eigValInd) redEigVects = eigVects[:, eigValInd] # choose top 3 # print(meanRemoved.shape) # print(redEigVects.shape) # print(type(meanRemoved)) print(redEigVects) lowDDataMat = meanRemoved * redEigVects # m×r Y=X*P # reconMat = (lowDDataMat * redEigVects.T) + meanValues return lowDDataMat
def test_simple_array_operations(self): a = array([[1.,2.], [3.,4.]]) numpy.testing.assert_array_equal(a.transpose(), array([[1.,3.], [2.,4.]])) numpy.testing.assert_array_almost_equal(trace(a), 5) inv_a = inv(a) b = array([[-2.,1.], [1.5,-.5]]) self.assertTrue(numpy.allclose(inv_a,b)) i = dot(a,inv_a) numpy.testing.assert_array_almost_equal(i, eye(2), 1) numpy.testing.assert_array_almost_equal(inv_a, b) # system of linear equations a = array([[3,2,-1], [2,-2,4], [-1,0.5,-1]]) b = array([1,-2,0]) c = solve(a,b) d = dot(a,c) numpy.testing.assert_array_almost_equal(b, d, 1) a = array([[.8,.3], [.2,.7]]) eigen_values, eigen_vectors = eig(a) lambda_1 = eigen_values[0] x_1 = eigen_vectors[:,0] lambda_2 = eigen_values[1] x_2 = eigen_vectors[:,1]
std_hat_j.append(temp) temp = 0 X_prime = np.zeros((41, 6), dtype=float) for j in range(6): for i in range(41): X_prime[i, j] = (X[i, j] - m_j[j]) / std_hat_j[j] S = np.zeros((6, 6), dtype=float) for i in range(41): S += 1 / 41 * np.outer(X_prime[i], np.transpose(X_prime[i])) # eig_values = linalg.eigvals(S) eig_values, eig_vectors = linalg.eig(S) # print(eig_vectors) for i in range(6): eig_values[i] = math.fabs(eig_values[i]) sorted_eig_values = sorted(eig_values, reverse=True) print("Q2) a)") for i in range(6): print(sorted_eig_values[i]) fig = plt.figure(figsize=(8, 5)) sum_per_column_x_prime = [] for i in range(6):
for i in range(N)])) for k in range(N): for l in range(N): if abs(tmp[k, l]) > 1e-8: print "%.2g " % tmp[k, l], else: print "0 ", print print map(lambda x: "%.2g" % x, [ dot(inv(matrix([[M(k, l) for k in range(N)] for l in range(N)])), [v(k) for k in range(N)])[0, i] for i in range(N) ]) for k in range(N): for l in range(N): print "%.2g " % M(k, l), print Minv = inv(matrix([[M(k, l) for k in range(N)] for l in range(N)])) basis = eig(Minv)[1] invbasis = inv(basis) diagonalized = dot(invbasis, dot(Minv, basis)) for i in range(N): expression = [] for j in range(N): if abs(invbasis[i, j]) > 1e-9: expression.append("%.2g A_{%d}" % (invbasis[i, j], j)) print " + ".join(expression) + ": %g" % sqrt(diagonal(diagonalized)[i])
target = data[:, options.target_column] if options.target_column != shape(data)[1] - 1: data = concatenate((data[:, 0:options.target_column], data[:, options.target_column + 1:-1]), axis=0) else: data = data[:, 0:options.target_column] #subselect data dimensions of interest means = mean(data, axis=0) interest_data = data - means # transform data using PCA components logger.info("Transforming the data...") interest_data = matrix(interest_data).T # point are written columnwise C = cov(interest_data) # covariance matrix (u, V) = eig(C) # eigenvector of C in V columnwise invV = inv(V) interest_data_transformed = dot(invV, interest_data).T # uncomment if you want to use only the points within specified range logger.info("Normalizing the data...") if options.koef: (interest_data_transformed, target) = remove_outliers(interest_data_transformed, options.koef, target) interest_data_transformed = norm(interest_data_transformed) if options.target_column != None: interest_data_transformed = concatenate( (interest_data_transformed, matrix(target).T), axis=1) # write data
Mrows = len(square_matrix) count = 0 while count < Mrows: Mcolumns = len(square_matrix[count]) if not Mrows == Mcolumns: return print("must be a square matrix") number_of_eigenvectors = 3 A = zeros([10, 10]) n = 0 while n <= 9: A[n, n] = 2 n = n + 1 while n <= 8: A[n, n + 1] = 1 A[n + 1, n] = 1 n = n + 1 H = (1 / (2 * (1 / 9**2))) * A (V, D) = linalg.eig(H) order = sort(V) print(order) print(V[order[0:3]]) print(D[order[0:3]])