def main(): with open("json/{}.json".format(METRIC)) as data: json_obj = json.load(data) created_clusters = clusters.read_json(json_obj) labels = make_vectors.get_list("data/itemIndex.txt") hierarchy, level1_set, level2_set = get_hierarchy("data/itemHierarchy.csv") list_nodes = prune( created_clusters, len(level1_set)) # Prune so that there are len(level1_set) nodes # Evaluate which items belong to which clusters in the len(level1_set) nodes for node in list_nodes: evaluate_cluster(node, labels, hierarchy) clusters.printhclust(created_clusters, labels)
import clusters docs, words, data = clusters.readfile('titles_vectors.txt') rdata = clusters.rotatematrix(data) clust = clusters.hcluster(rdata, distance=clusters.pearson) print('clusters by pearson correlation') clusters.printhclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclustpearson.jpg') clust = clusters.hcluster(rdata, distance=clusters.tanimoto) print('clusters by tanimoto coefficient') clusters.printhclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclusttanimoto.jpg') clust = clusters.hcluster(rdata, distance=clusters.euclidean) print('clusters by euclidean distance') clusters.printhclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclusteuclidean.jpg')
import clusters docs, words, data = clusters.readfile('titles_vectors.txt') clust = clusters.hcluster(data, distance=clusters.pearson) print('clusters by pearson correlation') clusters.printhclust(clust, labels=docs) clusters.drawdendrogram(clust, docs, jpeg='docsclust_pearson.jpg') clust = clusters.hcluster(data, distance=clusters.tanimoto) print('clusters by tanimoto coefficient') clusters.printhclust(clust, labels=docs) clusters.drawdendrogram(clust, docs, jpeg='docsclust_tanimoto.jpg') clust = clusters.hcluster(data, distance=clusters.euclidean) print('clusters by euclidean distance') clusters.printhclust(clust, labels=docs) clusters.drawdendrogram(clust, docs, jpeg='docsclust_euclidean.jpg') clust = clusters.hcluster(data, distance=clusters.cosine) print('clusters by euclidean distance') clusters.printhclust(clust, labels=docs) clusters.drawdendrogram(clust, docs, jpeg='docsclust_cosine.jpg')
import clusters row_names, column_names, data = clusters.readfile('dataset_vectors.txt') clust = clusters.hcluster(data) print('clusters by euclidean distance') clusters.printhclust(clust, labels=row_names) clusters.drawdendrogram(clust, row_names, jpeg='hcluster_euclidean_centroid.jpg') print() clust = clusters.hcluster(data, clusters.find_by_min) print('clusters by euclidean distance') clusters.printhclust(clust, labels=row_names) clusters.drawdendrogram(clust, row_names, jpeg='hcluster_euclidean_min.jpg') print() clust = clusters.hcluster(data, clusters.find_by_max) print('clusters by euclidean distance') clusters.printhclust(clust, labels=row_names) clusters.drawdendrogram(clust, row_names, jpeg='hcluster_euclidean_max.jpg')
import clusters, data_processing '''Import Dataset''' data = data_processing.open_csv_file('dataset.csv') '''Create a list of countries in the order of the similarity matrix''' countries_list = data_processing.get_country_names(data) '''Create numerical attributes matrix''' attr_matrix = data_processing.create_attribute_matrix(data) data_processing.str_to_float(attr_matrix) '''hierachical clustering: euclidean distance''' num_cluster = 3 resulting_clusters = clusters.hcluster(attr_matrix, distance=clusters.euclidean) print('clusters by euclidean distance') clusters.printhclust(resulting_clusters, labels=countries_list) clusters.drawdendrogram(resulting_clusters, countries_list, jpeg='Euclidean Cluster.jpg') '''hierachical clustering: tanimoto coefficient''' resulting_clusters = clusters.hcluster(attr_matrix, distance=clusters.tanimoto) print('clusters by tanimoto coefficient') clusters.printhclust(resulting_clusters, labels=countries_list) clusters.drawdendrogram(resulting_clusters, countries_list, jpeg='Tanimoto Cluster.jpg') print() '''hierachical clustering: pearson similarity''' resulting_clusters = clusters.hcluster(attr_matrix, distance=clusters.pearson) print('clusters by pearson correlation') clusters.printhclust(resulting_clusters, labels=countries_list) clusters.drawdendrogram(resulting_clusters, countries_list,