Exemple #1
0
def main():
    with open("json/{}.json".format(METRIC)) as data:
        json_obj = json.load(data)
        created_clusters = clusters.read_json(json_obj)

    labels = make_vectors.get_list("data/itemIndex.txt")
    hierarchy, level1_set, level2_set = get_hierarchy("data/itemHierarchy.csv")

    list_nodes = prune(
        created_clusters,
        len(level1_set))  # Prune so that there are len(level1_set) nodes
    # Evaluate which items belong to which clusters in the len(level1_set) nodes
    for node in list_nodes:
        evaluate_cluster(node, labels, hierarchy)

    clusters.printhclust(created_clusters, labels)
import clusters

docs, words, data = clusters.readfile('titles_vectors.txt')
rdata = clusters.rotatematrix(data)

clust = clusters.hcluster(rdata, distance=clusters.pearson)
print('clusters by pearson correlation')
clusters.printhclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclustpearson.jpg')

clust = clusters.hcluster(rdata, distance=clusters.tanimoto)
print('clusters by tanimoto coefficient')
clusters.printhclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclusttanimoto.jpg')

clust = clusters.hcluster(rdata, distance=clusters.euclidean)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclusteuclidean.jpg')
Exemple #3
0
import clusters

docs, words, data = clusters.readfile('titles_vectors.txt')

clust = clusters.hcluster(data, distance=clusters.pearson)
print('clusters by pearson correlation')
clusters.printhclust(clust, labels=docs)
clusters.drawdendrogram(clust, docs, jpeg='docsclust_pearson.jpg')

clust = clusters.hcluster(data, distance=clusters.tanimoto)
print('clusters by tanimoto coefficient')
clusters.printhclust(clust, labels=docs)
clusters.drawdendrogram(clust, docs, jpeg='docsclust_tanimoto.jpg')

clust = clusters.hcluster(data, distance=clusters.euclidean)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=docs)
clusters.drawdendrogram(clust, docs, jpeg='docsclust_euclidean.jpg')

clust = clusters.hcluster(data, distance=clusters.cosine)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=docs)
clusters.drawdendrogram(clust, docs, jpeg='docsclust_cosine.jpg')
import clusters

row_names, column_names, data = clusters.readfile('dataset_vectors.txt')

clust = clusters.hcluster(data)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=row_names)
clusters.drawdendrogram(clust,
                        row_names,
                        jpeg='hcluster_euclidean_centroid.jpg')

print()
clust = clusters.hcluster(data, clusters.find_by_min)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=row_names)
clusters.drawdendrogram(clust, row_names, jpeg='hcluster_euclidean_min.jpg')

print()
clust = clusters.hcluster(data, clusters.find_by_max)
print('clusters by euclidean distance')
clusters.printhclust(clust, labels=row_names)
clusters.drawdendrogram(clust, row_names, jpeg='hcluster_euclidean_max.jpg')
import clusters, data_processing
'''Import Dataset'''
data = data_processing.open_csv_file('dataset.csv')
'''Create a list of countries in the order of the similarity matrix'''
countries_list = data_processing.get_country_names(data)
'''Create numerical attributes matrix'''
attr_matrix = data_processing.create_attribute_matrix(data)
data_processing.str_to_float(attr_matrix)
'''hierachical clustering: euclidean distance'''
num_cluster = 3
resulting_clusters = clusters.hcluster(attr_matrix,
                                       distance=clusters.euclidean)
print('clusters by euclidean distance')
clusters.printhclust(resulting_clusters, labels=countries_list)
clusters.drawdendrogram(resulting_clusters,
                        countries_list,
                        jpeg='Euclidean Cluster.jpg')
'''hierachical clustering: tanimoto coefficient'''
resulting_clusters = clusters.hcluster(attr_matrix, distance=clusters.tanimoto)
print('clusters by tanimoto coefficient')
clusters.printhclust(resulting_clusters, labels=countries_list)
clusters.drawdendrogram(resulting_clusters,
                        countries_list,
                        jpeg='Tanimoto Cluster.jpg')
print()
'''hierachical clustering: pearson similarity'''
resulting_clusters = clusters.hcluster(attr_matrix, distance=clusters.pearson)
print('clusters by pearson correlation')
clusters.printhclust(resulting_clusters, labels=countries_list)
clusters.drawdendrogram(resulting_clusters,
                        countries_list,