def fitnessFunction(individual, algorithm, n_labels, ground_truth, m1, m2, m3): w1 = individual[0] w2 = individual[1] # w3 = individual[2] w3 = 0 corr = mf.calculateCorrelationMatrix(m1, m2, m3, w1, w2, w3) if algorithm == 'complete': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ elif algorithm == 'average': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr) labels = agglomerative.labels_ elif algorithm == 'kmedoids': _, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) #fitness = metrics.homogeneity_score(labels, ground_truth) #fitness = metrics.adjusted_rand_score(labels, ground_truth) #fitness = sum(individual) fitness = metrics.calinski_harabaz_score(corr, labels) corr = None m1 = None m2 = None m3 = None return fitness
def fitness(indv): w1, w2, w3 = indv.solution corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) if algorithm == 'complete': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ elif algorithm == 'average': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr) labels = agglomerative.labels_ elif algorithm == 'kmedoids': _, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) metrics = ce.clusterEvaluation(corr, labels, ground_truth) writer.write(str(current_iteration)+': '+str(w1)+' '+str(w2)+' '+str(w3)+' '.join(str(x) for x in metrics)+'\n')
def fitness(indv): w1, w2, w3 = indv.solution corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) if algorithm == 'complete': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ elif algorithm == 'average': agglomerative = AgglomerativeClustering(affinity='precomputed', n_clusters=n_labels, linkage='average').fit(corr) labels = agglomerative.labels_ elif algorithm == 'kmedoids': _, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) metrics = ce.clusterEvaluation(corr, labels, ground_truth) return float(metrics[0]) * 100
matrix2 = mf.calculateDistances(matrix2) matrix3 = mf.calculateDistances(matrix3) for w1 in np.arange(0.05, 1.05, 0.05): w2 = 0 w3 = 1 - w1 corr = mf.calculateCorrelationMatrix(matrix1, matrix2, matrix3, w1, w2, w3) # Hierarchical for link in ['complete', 'average']: agglomerative = AgglomerativeClustering( affinity='precomputed', n_clusters=n_labels, linkage='complete').fit(corr) labels = agglomerative.labels_ metrics = ce.clusterEvaluation(corr, labels, ground_truth) print(metrics) ce.saveResultsWithWeights(measure1, measure2, w1, 'hierarchical_' + link, sample, metrics) # K-Medoids medoids, clusters = km.kMedoids(corr, n_labels, 100) labels = km.sortLabels(clusters) metrics = ce.clusterEvaluation(corr, labels, ground_truth) ce.saveResultsWithWeights(measure1, measure2, w1, 'kmedoids', sample, metrics)
################################## # USAGE ########################## ################################## # readBin('/path/to/testcase', removeEquivalents=1) # k_means_plusplus(#ofClusters, distanceMeasure) # k_medoids(#ofClusters, distanceMeasure) # distanceMeasure: # 1 - Hamming # 2 - Jaccard # 3 - Euclidian (rounded) kMean = KMeansPP.KMeansPP() kMean.readBin('../test_result/MSTP_bin_result.txt') kMean.k_means_plusplus(10, 2) print(kMean.clusters) reduced = greedy_pick(kMean.centroids) mut_score(reduced, kMean.dataset) kMed = KMedoids.KMedoids() kMed.readBin('../test_result/MSTP_bin_result.txt') kMed.k_medoids(9, 2) print(kMed.clusters) reduced = greedy_pick(kMed.medoids) mut_score(reduced, kMed.dataset)
mean = 1.79197547637771 std_dev = 0.669382812243833 #X = (X - (mean/std_dev)) X = mf.minMaxScale(X) X = mf.calculateDistances(X) domains = rs.loadDomainListFromFile(sample_for_domains) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) X = np.asmatrix(X) M, C = kmedoids.kMedoids(X, n_labels, 100) print('medoids:') for point_idx in M: print(X[point_idx]) print('') print('clustering result:') for label in C: for point_idx in C[label]: print('label {0}: {1}'.format(label, X[point_idx]))
#X = (X - (mean/std_dev)) X = mf.minMaxScale(X) X = mf.calculateDistances(X) domains = rs.loadDomainListFromFile(sample_for_domains) # read existing labels n_labels = scop.getUniqueClassifications(sample_for_domains) ground_truth = scop.getDomainLabels(domains) ground_truth = map(int, ground_truth) ground_truth = list(map(int, ground_truth)) X = np.asmatrix(X) medoids, clusters = kmedoids.kMedoids(X, n_labels, 100) print('clustering result:') for label in clusters: for point_idx in clusters[label]: print('label {0}, {1}: {2}'.format(label, ground_truth[point_idx], X[point_idx])) #cl.clusterEvaluationNoLabels(X, clusters) index_list = [] new_dict = {} for label in clusters: for point_idx in clusters[label]: new_dict[point_idx] = label