# load protein data before loop
path_to_results = 'C:/ShareSSD/scop/clustering_results_single/'
measure1 = 'maxsub'
measure2 = 'maxsub'
measure3 = 'maxsub'

sample_for_domains = 'a.1'
sample = 'a.1.'

path_to_matrix = 'C:/ShareSSD/scop/tests/matrix_' + sample + '_' + measure1

matrix1 = np.load(path_to_matrix)
matrix2 = matrix1
matrix3 = matrix1

domains = rs.loadDomainListFromFile(sample)

n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)
ground_truth = map(int, ground_truth)
ground_truth = list(map(int, ground_truth))

matrix1 = mf.minMaxScale(matrix1)
matrix2 = mf.minMaxScale(matrix2)
matrix3 = mf.minMaxScale(matrix3)

matrix1 = mf.calculateDistances(matrix1)
matrix2 = mf.calculateDistances(matrix2)
matrix3 = mf.calculateDistances(matrix3)

w1 = 1
예제 #2
0
from sklearn.cluster import AgglomerativeClustering

for measure in ['rmsd', 'gdt_2', 'gdt_4']:
    for spl in ['a.1', 'a.3', 'b.2', 'b.3']:

        # load protein data before loop
        path_to_results = 'C:/ShareSSD/scop/clustering_results_seq/'
        measure1 = measure
        measure2 = measure
        measure3 = 'seq'

        sample_for_domains = spl
        sample = str(spl) + '.'

        matrix1 = rs.loadMatrixFromFile(sample, measure1)
        matrix2 = rs.loadMatrixFromFile(sample, measure2)
        matrix3 = rs.loadMatrixFromFile(sample, measure3)

        domains = rs.loadDomainListFromFile(sample)

        n_labels = scop.getUniqueClassifications(sample_for_domains)

        ground_truth = scop.getDomainLabels(domains)
        ground_truth = map(int, ground_truth)
        ground_truth = list(map(int, ground_truth))

        matrix1 = mf.minMaxScale(matrix1)
        matrix2 = mf.minMaxScale(matrix2)
        matrix3 = mf.minMaxScale(matrix3)
import Clustering as cl
import MatrixFunctions as mf
import ReadSimilarities as rs
import UtilitiesSCOP as scop
import numpy as np

from sklearn.cluster import AgglomerativeClustering

# load protein data before loop
path_to_results = '/home/pedro/Desktop/scop/clustering_results/'
measure1 = 'rmsd'

sample = 'a.1.'
sample_for_domains = 'a.1'
X = rs.loadMatrixFromFile(sample, measure1)

mean = 1.79197547637771
std_dev = 0.669382812243833

#X = (X - (mean/std_dev))
X = mf.minMaxScale(X)
X = mf.calculateDistances(X)

domains = rs.loadDomainListFromFile(sample_for_domains)

# read existing labels
n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)

ground_truth = map(int, ground_truth)