n = scop.getUniqueClassifications('a.1') measure1 = 'seq' measure2 = 'seq' #measure2 = 'maxsub' #read matrices domains, matrix1 = ff.readDistances('a.1.', 'rmsd') matrix1 = ff.loadMatrixFromFile('a.1.', measure1) #matrix2 = matrix1 #domains, matrix2 = ff.readDistances('a.1.', measure2) matrix2 = ff.loadMatrixFromFile('a.1.', measure2) ground_truth = scop.getDomainLabels(domains) matrix1 = mf.calculateDistances(matrix1, matrix1) matrix2 = mf.calculateDistances(matrix2, matrix2) for w1 in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]: corr = mf.calculateCorrelation(w1, matrix1, matrix2) for link in ['complete','average']: with open(path_to_results+'euclhiearchical_'+link+'_'+str(w1)+'_'+measure1+'_'+measure2,'w') as file: agglo = AgglomerativeClustering(affinity='precomputed', n_clusters=n, linkage=link).fit(corr) labels = agglo.labels_ metrics = ce.clusterEvaluation(corr, labels, ground_truth) print(w1)
matrix2 = rs.loadMatrixFromFile(sample, measure2) matrix3 = rs.loadMatrixFromFile(sample, measure3) domains = rs.loadDomainListFromFile(sample) n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) matrix1 = mf.minMaxScale(matrix1) matrix2 = mf.minMaxScale(matrix2) matrix3 = mf.minMaxScale(matrix3) matrix1 = mf.calculateDistances(matrix1) matrix2 = mf.calculateDistances(matrix2) matrix3 = mf.calculateDistances(matrix3) for w1 in np.arange(0.05, 1.05, 0.05): w2 = 0 w3 = 1 - w1 corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) # Hierarchical for link in ['complete', 'average']: agglomerative = AgglomerativeClustering( affinity='precomputed',
from sklearn.cluster import AgglomerativeClustering # load protein data before loop path_to_results = '/home/pedro/Desktop/scop/clustering_results/' measure1 = 'rmsd' sample = 'a.1.' sample_for_domains = 'a.1' X = rs.loadMatrixFromFile(sample, measure1) mean = 1.79197547637771 std_dev = 0.669382812243833 #X = (X - (mean/std_dev)) X = mf.minMaxScale(X) X = mf.calculateDistances(X) domains = rs.loadDomainListFromFile(sample_for_domains) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) X = np.asmatrix(X) M, C = kmedoids.kMedoids(X, n_labels, 100) print('medoids:')