コード例 #1
0
def __main__():
    blognames, words, data = clusters.read_file("blogdata.txt")
    clust = clusters.hcluster(data, distance=euclidean_distance)
    clusters.draw_dendogram(clust, blognames, jpeg="ex3dendrogram.jpg")


# I think this weights against groupings that have similar word use rates but different word use counts.
コード例 #2
0
def __main__():
    entries, words, data = clusters.read_file('entrydata.txt')
    clust = clusters.hcluster(data)
    clusters.draw_dendogram(clust, entries, jpeg="ex2dend.jpg")
コード例 #3
0
def __main__():
    wants, people, data = clusters.read_file('zebo.txt')
    clust = clusters.hcluster(data, distance=manhattan_distance)
    clusters.draw_dendogram(clust, wants, jpeg="ex4dend.jpg")
コード例 #4
0
def __main__():
    wants, people, data = clusters.read_file('zebo.txt')
    clust = clusters.hcluster(data, distance=manhattan_distance)
    clusters.draw_dendogram(clust, wants, jpeg="ex4dend.jpg")
コード例 #5
0
def __main__():
    entries, words, data = clusters.read_file('entrydata.txt')
    clust = clusters.hcluster(data)
    clusters.draw_dendogram(clust, entries, jpeg="ex2dend.jpg")
コード例 #6
0
"""
Exercise 6

After completing exercise five, create a function that runs K-means
clustering over different values of k. How does the total distance
change as the number if clusters increases? At what point does the
improvement from having more clusters become very small?
"""
import clusters
from matplotlib import pyplot
trash, other_trash, DATA = clusters.read_file("blogdata.txt")


def run_experiment(k):
    return clusters.k_cluster(DATA, k=k)[0]


def run_experiments():
    ks = []
    errors = []
    pyplot.xlabel("K Value")
    pyplot.ylabel("Error")
    pyplot.xlim([0, 25])
    pyplot.ylim([0, 100])
    pyplot.title("Plot of K Value and Error")
    pyplot.ion()
    for i in range(2, 25):
        ks.append(i)
        errors.append(run_experiment(i))
        pyplot.plot(ks, errors, "b.")
        if i == 2:
コード例 #7
0
def __main__():
    blognames, words, data = clusters.read_file("blogdata.txt")
    clust = clusters.hcluster(data, distance=euclidean_distance)
    clusters.draw_dendogram(clust, blognames, jpeg="ex3dendrogram.jpg")

# I think this weights against groupings that have similar word use rates but different word use counts.