n = scop.getUniqueClassifications('a.1')

measure1 = 'seq'
measure2 = 'seq'
#measure2 = 'maxsub'

#read matrices
domains, matrix1 = ff.readDistances('a.1.', 'rmsd')
matrix1 = ff.loadMatrixFromFile('a.1.', measure1)
#matrix2 = matrix1
#domains, matrix2 = ff.readDistances('a.1.', measure2)
matrix2 = ff.loadMatrixFromFile('a.1.', measure2)

ground_truth = scop.getDomainLabels(domains)

matrix1 = mf.calculateDistances(matrix1, matrix1)
matrix2 = mf.calculateDistances(matrix2, matrix2)

for w1 in [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]:

    corr = mf.calculateCorrelation(w1, matrix1, matrix2)

    for link in ['complete','average']:

        with open(path_to_results+'euclhiearchical_'+link+'_'+str(w1)+'_'+measure1+'_'+measure2,'w') as file:

            agglo = AgglomerativeClustering(affinity='precomputed', n_clusters=n, linkage=link).fit(corr)
            labels = agglo.labels_
            metrics = ce.clusterEvaluation(corr, labels, ground_truth)

            print(w1)
Exemple #2
0
        matrix2 = rs.loadMatrixFromFile(sample, measure2)
        matrix3 = rs.loadMatrixFromFile(sample, measure3)

        domains = rs.loadDomainListFromFile(sample)

        n_labels = scop.getUniqueClassifications(sample_for_domains)

        ground_truth = scop.getDomainLabels(domains)
        ground_truth = map(int, ground_truth)
        ground_truth = list(map(int, ground_truth))

        matrix1 = mf.minMaxScale(matrix1)
        matrix2 = mf.minMaxScale(matrix2)
        matrix3 = mf.minMaxScale(matrix3)

        matrix1 = mf.calculateDistances(matrix1)
        matrix2 = mf.calculateDistances(matrix2)
        matrix3 = mf.calculateDistances(matrix3)

        for w1 in np.arange(0.05, 1.05, 0.05):

            w2 = 0
            w3 = 1 - w1

            corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1,
                                                 w2, w3)

            # Hierarchical
            for link in ['complete', 'average']:
                agglomerative = AgglomerativeClustering(
                    affinity='precomputed',
from sklearn.cluster import AgglomerativeClustering

# load protein data before loop
path_to_results = '/home/pedro/Desktop/scop/clustering_results/'
measure1 = 'rmsd'

sample = 'a.1.'
sample_for_domains = 'a.1'
X = rs.loadMatrixFromFile(sample, measure1)

mean = 1.79197547637771
std_dev = 0.669382812243833

#X = (X - (mean/std_dev))
X = mf.minMaxScale(X)
X = mf.calculateDistances(X)

domains = rs.loadDomainListFromFile(sample_for_domains)

# read existing labels
n_labels = scop.getUniqueClassifications(sample_for_domains)
ground_truth = scop.getDomainLabels(domains)

ground_truth = map(int, ground_truth)
ground_truth = list(map(int, ground_truth))

X = np.asmatrix(X)

M, C = kmedoids.kMedoids(X, n_labels, 100)

print('medoids:')