# load protein data before loop path_to_results = 'C:/ShareSSD/scop/clustering_results_single/' measure1 = 'maxsub' measure2 = 'maxsub' measure3 = 'maxsub' sample_for_domains = 'a.1' sample = 'a.1.' path_to_matrix = 'C:/ShareSSD/scop/tests/matrix_' + sample + '_' + measure1 matrix1 = np.load(path_to_matrix) matrix2 = matrix1 matrix3 = matrix1 domains = rs.loadDomainListFromFile(sample) n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) matrix1 = mf.minMaxScale(matrix1) matrix2 = mf.minMaxScale(matrix2) matrix3 = mf.minMaxScale(matrix3) matrix1 = mf.calculateDistances(matrix1) matrix2 = mf.calculateDistances(matrix2) matrix3 = mf.calculateDistances(matrix3) w1 = 1
from sklearn.cluster import AgglomerativeClustering for measure in ['rmsd', 'gdt_2', 'gdt_4']: for spl in ['a.1', 'a.3', 'b.2', 'b.3']: # load protein data before loop path_to_results = 'C:/ShareSSD/scop/clustering_results_seq/' measure1 = measure measure2 = measure measure3 = 'seq' sample_for_domains = spl sample = str(spl) + '.' matrix1 = rs.loadMatrixFromFile(sample, measure1) matrix2 = rs.loadMatrixFromFile(sample, measure2) matrix3 = rs.loadMatrixFromFile(sample, measure3) domains = rs.loadDomainListFromFile(sample) n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) matrix1 = mf.minMaxScale(matrix1) matrix2 = mf.minMaxScale(matrix2) matrix3 = mf.minMaxScale(matrix3)
import Clustering as cl import MatrixFunctions as mf import ReadSimilarities as rs import UtilitiesSCOP as scop import numpy as np from sklearn.cluster import AgglomerativeClustering # load protein data before loop path_to_results = '/home/pedro/Desktop/scop/clustering_results/' measure1 = 'rmsd' sample = 'a.1.' sample_for_domains = 'a.1' X = rs.loadMatrixFromFile(sample, measure1) mean = 1.79197547637771 std_dev = 0.669382812243833 #X = (X - (mean/std_dev)) X = mf.minMaxScale(X) X = mf.calculateDistances(X) domains = rs.loadDomainListFromFile(sample_for_domains) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth)