def __main__(): blognames, words, data = clusters.read_file("blogdata.txt") clust = clusters.hcluster(data, distance=euclidean_distance) clusters.draw_dendogram(clust, blognames, jpeg="ex3dendrogram.jpg") # I think this weights against groupings that have similar word use rates but different word use counts.
def __main__(): entries, words, data = clusters.read_file('entrydata.txt') clust = clusters.hcluster(data) clusters.draw_dendogram(clust, entries, jpeg="ex2dend.jpg")
def __main__(): wants, people, data = clusters.read_file('zebo.txt') clust = clusters.hcluster(data, distance=manhattan_distance) clusters.draw_dendogram(clust, wants, jpeg="ex4dend.jpg")
def __main__(): tag_list = build_tag_list("programming") tags, urls, data = build_tag_matrix(tag_list) cluster = hcluster(data) draw_dendogram(cluster, urls, jpeg="delicious.jpg")