def qubit2_conv_comp_base_rep_to_svd_diag_rep(rho: np.ndarray): a, b, t = qubit2_conv_comp_base_rep_to_hilbert_schmidt_rep(rho) u, s, vh = np.linalg.svd(t) if MF.check_matrix_symmetric(t): print(concurrency(rho)) if MF.check_matrixy_antisymmetric(t): print("nant") return np.dot(u.T, a).real, np.dot(vh, b).real, s, u, vh
def test_analyse_matrix_sparse(self): m1 = np.zeros((100, 100)) m1[0][0] = 0.0004 non_zero, N_total = MF.analyse_matrix_sparse(m1) self.assertTrue(non_zero == 1) self.assertTrue(N_total == 100 * 100) non_zero, N_total = MF.analyse_matrix_sparse(m1, rtol_places=2) self.assertTrue(non_zero == 0) m2 = np.ones((20, 20)) non_zero, N_total = MF.analyse_matrix_sparse(m2, rtol_places=2) self.assertTrue(non_zero == N_total)
def make_pure_random_2qbit_density_matrix_by_unitary_partial_trace2(p, n): """Create an ensemble of density matrices by first constructing an Seperable state of 8 uncoupled qubits, then applying a random unitary matrix and then tracing out to 2 qbits. The state should in principle be closer to the totally mixed state than in the procedure with only 4 uncoupled qubits""" N = 2**8 rho = np.zeros((N, N)) + 0.j rho += np.kron( make_random_1qubit_density_matrix(1), np.kron( make_random_1qubit_density_matrix(1), np.kron( make_random_1qubit_density_matrix(1), np.kron( make_random_1qubit_density_matrix(1), np.kron( make_random_1qubit_density_matrix(1), np.kron( make_random_1qubit_density_matrix(1), np.kron(make_random_1qubit_density_matrix(1), make_random_1qubit_density_matrix(1)))))))) U = MF.make_matrix_random_unitary(N, N) rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U)))) rho = partial_trace( partial_trace( partial_trace(partial_trace(partial_trace(partial_trace(rho)))))) return rho
def fitnessFunction(individual, algorithm, n_labels, ground_truth, m1, m2, m3): w1 = individual[0] w2 = individual[1] # w3 = individual[2] w3 = 0 corr = mf.calculateCorrelationMatrix(m1, m2, m3, w1, w2, w3) if algorithm == 'complete': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ elif algorithm == 'average': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr) labels = agglomerative.labels_ elif algorithm == 'kmedoids': _, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) #fitness = metrics.homogeneity_score(labels, ground_truth) #fitness = metrics.adjusted_rand_score(labels, ground_truth) #fitness = sum(individual) fitness = metrics.calinski_harabaz_score(corr, labels) corr = None m1 = None m2 = None m3 = None return fitness
def mutual_information(p_x: np.ndarray, p_yx: np.ndarray) -> float: assert MF.check_matrix_simple_stochastic(p_yx), "conditional matrix p_yx is not \ simple stochastic." assert check_l1_norm(p_x), "p_x is not l1-normed" p_y = np.dot(np.transpose(p_yx), p_x) assert check_l1_norm(p_y), "p_y is not l1-normed" return classical_entropy(p_y) - conditional_entropy(p_x, p_yx)
def create_random_ensemble_arcsin(N: int, K: int, size: int) -> List[np.ndarray]: """TODO: Need more verification. @see: ZyPeNeCo2011""" rho_list = [] n = int(N / 2) phi1 = create_maximally_entangled_state(N, 0, 0) for i in range(size): U = np.zeros((N**2, N**2)) + 0.j for k in range(1, K): U += np.kron(np.eye(N), MF.make_matrix_random_unitary(N, N)) #U += np.kron(MF.make_matrix_random_unitary(N, N), np.eye(N)) phi2 = np.dot(U, phi1) psi = (phi1 + phi2) psi /= np.sqrt(np.dot(psi, psi)) rho = density_matrix(psi) for k in range(n): rho = partial_trace(rho) / np.trace(rho) rho /= np.trace(rho) assert check_density_operator(rho), "rho is not a density matrix" rho_list.append(rho) return rho_list
def conditional_entropy(p_x: np.ndarray, p_yx: np.ndarray) -> float: assert MF.check_matrix_simple_stochastic(p_yx), "conditional matrix p_yx is not \ simple stochastic." assert check_l1_norm(p_x), "p_x is not l1-normed" H = 0 for i, p in enumerate(p_x): H += p * classical_entropy(p_yx[i]) return H
def create_random_ensemble_pure(N: int, n=4) -> List[np.ndarray]: list = [] for i in range(N): rho = np.kron(make_random_1qubit_density_matrix(1), make_random_1qubit_density_matrix(1)) U = MF.make_matrix_random_unitary(4, 4) rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U)))) list.append(rho) return list
def make_pure_random_2qbit_density_matrix_by_unitary(p, n): rho = np.zeros((4, 4)) + 0.j pp = np.random.rand(n) pp = pp / np.sum(pp) for i in range(n): rho += np.kron(make_random_1qubit_density_matrix(p), make_random_1qubit_density_matrix(p)) * pp[i] U = MF.make_matrix_random_unitary(4, 4) rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U)))) return rho
def make_cue_matrix(N: int) -> np.ndarray: """ mu = 0 ep = 1/np.sqrt(N) sigma = ep**2 / 8 K = np.random.normal(mu, sigma**0.5, (N ,N)) + 0.j H = K + np.transpose(K)""" m = MF.make_matrix_ginibre(N) H, R = np.linalg.qr(m) return H
def get_conditional_prob_from_joint_prob(p_xy: np.ndarray) -> np.ndarray: assert check_joint_probability_matrix(p_xy), "p_xy is not a joint probability \ matrix." size_a, size_b = p_xy.shape p_x = np.dot(p_xy, np.ones(size_b)) assert check_l1_norm(p_x), "p_x is not l1-normed." for i in range(size_a): p_xy[i] *= 1/p_x[i] assert MF.check_matrix_simple_stochastic(p_xy), "after calculation the matrix \ is not a conditional probability matrix" return p_xy
def test_check_matrix_triagonal(self): m1 = MF.make_matrix_random_triagonal(3, 5) m2 = MF.make_matrix_random_triagonal(3, 5).T self.assertTrue(MF.check_matrix_triagonal(m1)) self.assertTrue(MF.check_matrix_triagonal(m2)) m3 = MF.make_matrix_random_unitary(5, 5) self.assertFalse(MF.check_matrix_triagonal(m3))
def make_pure_random_2qbit_density_matrix_by_unitary_partial_trace(p, n): """Create an ensemble of density matrices by first constructing an Seperable state of 4 uncoupled qubits, then applying a random unitary matrix and then tracing out 2 qbits.""" rho = np.zeros((16, 16)) + 0.j rho += np.kron( make_random_1qubit_density_matrix(1), np.kron( make_random_1qubit_density_matrix(1), np.kron(make_random_1qubit_density_matrix(1), make_random_1qubit_density_matrix(1)))) U = MF.make_matrix_random_unitary(16, 16) rho = np.dot(U, np.dot(rho, np.conjugate(np.transpose(U)))) rho = partial_trace(partial_trace(rho)) return rho
def fitness(indv): w1, w2, w3 = indv.solution #return x + 10*sin(5*x) + 7*cos(4*x) corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ metrics = cl.clusterEvaluation(corr, labels, ground_truth) if metrics[0] <= 0: return 1 print(metrics[0] * 100) return float(metrics[0]) * 100
def fitness(indv): w1, w2, w3 = indv.solution corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) if algorithm == 'complete': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ elif algorithm == 'average': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr) labels = agglomerative.labels_ elif algorithm == 'kmedoids': _, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) metrics = ce.clusterEvaluation(corr, labels, ground_truth) writer.write(str(current_iteration)+': '+str(w1)+' '+str(w2)+' '+str(w3)+' '.join(str(x) for x in metrics)+'\n')
def fitness(indv): w1, w2, w3 = indv.solution corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) if algorithm == 'complete': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ elif algorithm == 'average': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr) labels = agglomerative.labels_ elif algorithm == 'kmedoids': _, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) metrics = ce.clusterEvaluation(corr, labels, ground_truth) return float(metrics[0]) * 100
sample_for_domains = spl sample = str(spl) + '.' matrix1 = rs.loadMatrixFromFile(sample, measure1) matrix2 = rs.loadMatrixFromFile(sample, measure2) matrix3 = rs.loadMatrixFromFile(sample, measure3) domains = rs.loadDomainListFromFile(sample) n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) matrix1 = mf.minMaxScale(matrix1) matrix2 = mf.minMaxScale(matrix2) matrix3 = mf.minMaxScale(matrix3) matrix1 = mf.calculateDistances(matrix1) matrix2 = mf.calculateDistances(matrix2) matrix3 = mf.calculateDistances(matrix3) for w1 in np.arange(0.05, 1.05, 0.05): w2 = 0 w3 = 1 - w1 corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3)
##################################################### # LOAD PROTEIN DATA ##################################################### path_to_results = 'C:/ShareSSD/scop/tests/' measure1 = 'rmsd' measure2 = 'gdt_2' measure3 = 'seq' algorithm = 'complete' sample = 'a.1.' sample_for_domains = 'a.1' matrix1 = rs.loadMatrixFromFile(sample, measure1) matrix2 = rs.loadMatrixFromFile(sample, measure2) matrix3 = rs.loadMatrixFromFile(sample, measure3) matrix1 = mf.minMaxScale(matrix1) matrix2 = mf.minMaxScale(matrix2) matrix3 = mf.minMaxScale(matrix3) matrix1 = mf.calculateDistances(matrix1) matrix2 = mf.calculateDistances(matrix2) matrix3 = mf.calculateDistances(matrix3) domains = rs.loadDomainListFromFile(sample_for_domains) n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ##################################################### # GENETIC ALGORITHM CONFIG ##################################################### # Define population. indv_template = DecimalIndividual(ranges=[(0.1, 0.9),(0.1, 0.9),(0.1, 0.9)], eps=[0.001,0.001,0.001])
def test_check_matrix_diagonal(self): m = np.eye(5) self.assertTrue(MF.check_matrix_diagonal(m)) m[0][1] = 1 self.assertFalse(MF.check_matrix_diagonal(m))
def readDistances(sample, measure): path_to_matrix = 'C:/ShareSSD/scop/data_old/sim_' + sample + '_' + measure counter = 0 matrix = [] with open(path_to_matrix, 'r') as fp: size = 0 domains = [] #get number of structures line = fp.readline() while line: if 'PDB :' in str(line): if '#' not in str(line): size += 1 domain = str(line).strip().split()[-1].split('/')[-1] domains.append(str(domain)) if 'Distance records' in str(line): break line = fp.readline() #get distance matrix while line: if 'DIST :' in str(line): if '#' not in str(line): print(line) parsed = str(line).strip().split() current_row = parsed[2] value = float(parsed[4]) while current_row == parsed[2]: matrix.append(value) line = fp.readline() parsed = str(line).strip().split() value = float(parsed[4]) counter += 1 i = 0 while i < counter: matrix.append(0) i += 1 i = 0 matrix.append(value) line = fp.readline() counter += 1 i = 0 while i < counter: matrix.append(0) i += 1 i = 0 matrix = np.asmatrix(matrix) matrix = matrix.reshape(size, size - 1) n, _ = matrix.shape X0 = np.zeros((n, 1)) matrix = np.hstack((X0, matrix)) #np.savetxt("C:/ShareSSD/scop/data_old/matrix2.txt", matrix,delimiter=' ', newline='\n') #if 'rmsd' in path_to_matrix: #matrix = matrix/(matrix.max()/1) matrix = mf.symmetrizeMatrix(matrix) #if 'gdt' in path_to_matrix: #matrix = mf.processGDTMatrix(matrix) #np.savetxt("C:/ShareSSD/scop/data_old/matrix3.txt", matrix,delimiter=' ', newline='\n') return domains, matrix
from sklearn.cluster import AgglomerativeClustering # load protein data before loop path_to_results = 'C:/ShareSSD/scop/clustering_results/' measure1 = 'gdt_2' measure2 = 'gdt_2' measure3 = 'seq' sample = 'a.3.' sample_for_domains = 'a.3' matrix1 = rs.loadMatrixFromFile(sample, measure1) matrix2 = rs.loadMatrixFromFile(sample, measure2) matrix3 = rs.loadMatrixFromFile(sample, measure3) matrix1 = mf.minMaxScale(matrix1) matrix2 = mf.minMaxScale(matrix2) matrix3 = mf.minMaxScale(matrix3) #experimentar divisoes com medias e desvio padrao #matrix1 = mf.calculateDistances(matrix1) #matrix2 = mf.calculateDistances(matrix2) #matrix3 = mf.calculateDistances(matrix3) domains = rs.loadDomainListFromFile(sample_for_domains) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains)
def readDistances(sample, measure): path_to_matrix = 'C:/ShareSSD/scop/data/sim_' + sample + '_' + measure path_to_values = 'C:/ShareSSD/scop/values_' + sample + '_' + measure counter = 0 matrix = [] with open(path_to_matrix, 'r') as fp: size = 0 domains = [] #get structures line = fp.readline() while line: if 'PDB :' in str(line): if '#' not in str(line): size += 1 domain = str(line).strip().split()[-1].split('/')[-1] domains.append(str(domain)) if 'Distance records' in str(line): break line = fp.readline() #get distance matrix while line: if 'DIST :' in str(line): if '#' not in str(line): print(line) parsed = str(line).strip().split() current_row = parsed[2] value = float(parsed[4]) while current_row == parsed[2]: matrix.append(value) line = fp.readline() parsed = str(line).strip().split() value = float(parsed[4]) counter += 1 i = 0 while i < counter: matrix.append(0) i += 1 i = 0 matrix.append(value) line = fp.readline() counter += 1 i = 0 while i < counter: matrix.append(0) i += 1 i = 0 matrix = np.asmatrix(matrix) matrix = matrix.reshape(size, size - 1) n, m = matrix.shape X0 = np.zeros((n, 1)) matrix = np.hstack((X0, matrix)) # np.savetxt("C:/ShareSSD/scop/data_old2/kernel_"+measure, matrix, delimiter=' ', newline='\n') # if 'rmsd' in path_to_matrix: # matrix = matrix/(matrix.max()/1) # if 'gdt' in path_to_matrix: # matrix = mf.processgdtmatrix(matrix) matrix = mf.symmetrizeMatrix(matrix) # save kernel values with open(path_to_values, 'w') as nf: for i in range(0, n): for j in range(0, m): nf.write(str(matrix[i, j]) + '\n') # np.savetxt("C:/ShareSSD/scop/data_old2/matrix_"+measure, matrix, delimiter=' ', newline='\n') matrix.dump('C:/ShareSSD/scop/matrix_' + sample + '_' + measure) return domains, matrix
ground_truth = list(map(int, ground_truth)) #matrix1 = mf.minMaxScale(matrix1) #matrix2 = mf.minMaxScale(matrix2) #matrix3 = mf.minMaxScale(matrix3) # matrix1 = mf.calculatedistances(matrix1) # matrix2 = mf.calculatedistances(matrix2) # matrix3 = mf.calculatedistances(matrix3) #for w1 in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]: w1 = 0.5 w2 = 0.5 w3 = 0 corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) # Hierarchical for link in ['complete', 'average']: agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ metrics = cl.clusterEvaluation(corr, labels, ground_truth) cl.saveResults(measure1, measure2, 'hierarchical_' + link, sample, metrics) # K-Medoids medoids, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) metrics = cl.clusterEvaluation(corr, labels, ground_truth) cl.saveResults(measure1, measure2, 'kmedoids', sample, metrics)
def test_operation_matrix_commutation(self): m1 = np.array([[0, 1], [1, 0]]) + 0.j m2 = np.array([[1, 0], [0, -1]]) + 0.j m3 = np.array([[0, -1.j], [1.j, 0]]) self.assertTrue( np.allclose(MF.operation_matrix_commutation(m1, m2), -2.j * m3))
n = scop.getUniqueClassifications('a.1') measure1 = 'seq' measure2 = 'seq' #measure2 = 'maxsub' #read matrices domains, matrix1 = ff.readDistances('a.1.', 'rmsd') matrix1 = ff.loadMatrixFromFile('a.1.', measure1) #matrix2 = matrix1 #domains, matrix2 = ff.readDistances('a.1.', measure2) matrix2 = ff.loadMatrixFromFile('a.1.', measure2) ground_truth = scop.getDomainLabels(domains) matrix1 = mf.calculateDistances(matrix1, matrix1) matrix2 = mf.calculateDistances(matrix2, matrix2) for w1 in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]: corr = mf.calculateCorrelation(w1, matrix1, matrix2) for link in ['complete','average']: with open(path_to_results+'euclhiearchical_'+link+'_'+str(w1)+'_'+measure1+'_'+measure2,'w') as file: agglo = AgglomerativeClustering(affinity='precomputed', n_clusters=n, linkage=link).fit(corr) labels = agglo.labels_ metrics = ce.clusterEvaluation(corr, labels, ground_truth) print(w1)
from sklearn.cluster import AgglomerativeClustering # load protein data before loop path_to_results = '/home/pedro/Desktop/scop/clustering_results/' measure1 = 'rmsd' sample = 'a.1.' sample_for_domains = 'a.1' X = rs.loadMatrixFromFile(sample, measure1) mean = 1.79197547637771 std_dev = 0.669382812243833 #X = (X - (mean/std_dev)) X = mf.minMaxScale(X) X = mf.calculateDistances(X) domains = rs.loadDomainListFromFile(sample_for_domains) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) X = np.asmatrix(X) M, C = kmedoids.kMedoids(X, n_labels, 100)
def test_check_matrix_unitary(self): m1 = np.array([[1 + 1.j, 2, 3], [4, 5, 6]]) m2 = MF.make_matrix_random_unitary(5, 6) self.assertFalse(MF.check_matrix_unitary(m1)) self.assertTrue(MF.check_matrix_unitary(m2))
from sklearn.cluster import AgglomerativeClustering # load protein data before loop path_to_results = 'C:/ShareSSD/scop/clustering_results/' measure1 = 'rmsd' sample = 'a.1.' sample_for_domains = 'a.1' X = rs.loadMatrixFromFile(sample, measure1) mean = 1.79197547637771 std_dev = 0.669382812243833 #X = (X - (mean/std_dev)) X = mf.calculateDistances(X) domains = rs.loadDomainListFromFile(sample) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) X = np.asmatrix(X) agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(X)
def make_random_density_matrix_from_ginibre(N: int) -> np.ndarray: """Creates a random density matrix of size N from a ginibre matrix. @see MatrixFunctions.make_matrix_ginibre""" m = MF.make_matrix_ginibre(N) # Defines Measure of matrix and positiv m2 = np.dot(np.conjugate(m.T), m) # Make hermitian return m2 / np.trace(m2) # Trace to one normation
def readSimilaritiesToMatrix(sample, measure): path_to_matrix = 'C:/ShareSSD/scop/data/values_' + sample + '_' + measure path_to_domains = 'C:/ShareSSD/scop/data/domains_' + sample counter = 0 matrix = [] row = [] with open(path_to_matrix, 'r') as fp: domains = set() line = fp.readline() while line: if 'END' in line: break parsed = str(line).strip().split(' ') structure1 = parsed[0] structure2 = parsed[1] value = float(parsed[2]) domains.add(structure1) domains.add(structure2) # add the respective amount of zeroes to the current row counter += 1 i = 0 while i < counter: row.append(0) i += 1 i = 0 # track the current structure and read its alignments current_row = parsed[0] while current_row == parsed[0] and line: row.append(value) line = fp.readline() if 'END' in line: break parsed = str(line).strip().split(' ') print(parsed) value = float(parsed[2]) matrix.append(row) row = [] line = fp.readline() counter += 1 i = 0 while i < counter: row.append(0) i += 1 i = 0 matrix.append(row) matrix = np.asmatrix(matrix) # symmetrize and write results to file matrix = mf.symmetrizeMatrix(matrix) matrix = np.matrix(matrix) matrix.dump("C:/ShareSSD/scop/data/matrix_" + sample + '_' + measure) # write domain list to file if not os.path.isfile(path_to_domains): with open(path_to_domains, 'w') as nf: domains = list(domains) for domain in domains: nf.write(domain + '\n') nf.write('END')