def createDendrogram(): blogs, colnames, data = clusters.readfile('blogdata.txt') cluster = clusters.hcluster(data) clusters.drawdendrogram(cluster, blogs, jpeg='Dendrogram.jpg') f = open("ASCII.txt", 'w') sys.stdout = f clusters.printclust(cluster, labels=blogs) f.close() sys.stderr.close()
def main(): # returns blog titles, words in blog (10%-50% boundaries), list of frequency info blognames,words,data=clusters.readfile('blogdata.txt') # returns a tree of foo.id, foo.left, foo.right clust=clusters.hcluster(data) # walks tree and prints ascii approximation of a dendogram; distance measure is Pearson's r clusters.printclust(clust,labels=blognames)
for i in range(len(v1)): d += (v1[i] - v2[i])**2 return math.sqrt(d) #getBlogs() #main() blognames, words, data = clusters.readfile('similarblogdata.txt') print(blognames) print(words) print(data) for i in range(len(data[1:])): if len(data[i + 1]) != len(data[i]): print(blognames[i + 1]) print(len(data[i + 1])) print(blognames[i]) print(len(data[i])) clust = clusters.hcluster(data) clusters.printclust(clust, labels=blognames) clusters.drawdendrogram(clust, blognames, jpeg='sblogclust.jpg') kclust = clusters.kcluster(data, k=5) printkclustValues(kclust) kclust = clusters.kcluster(data, k=10) printkclustValues(kclust) kclust = clusters.kcluster(data, k=20) printkclustValues(kclust) coords = clusters.scaledown(data) clusters.draw2d(coords, blognames, jpeg='sblogs2d.jpg')
#Shawn Jones #!/usr/local/bin/python # all code here stolen shamelessly from # "Programming Collective Intelligence, Chapter 3" import sys sys.path.insert(0, '../libs') import clusters blognames,words,data=clusters.readfile('blogdata1V2.txt') clust = clusters.hcluster(data) # print ASCII dendrogram clusters.printclust(clust, labels=blognames) # save JPEG dendrogram clusters.drawdendrogram(clust, blognames, jpeg='blogclust.jpg')
def printclust(): reload(clusters) clusters.printclust(clust,labels=blognames)
import clusters docs, words, data = clusters.readfile('titlesdata.txt') rdata = clusters.rotatematrix(data) clust = clusters.hcluster(rdata, distance=clusters.pearson) print 'clusters by pearson correlation' clusters.printclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclustpearson.jpg') clust = clusters.hcluster(rdata, distance=clusters.tanimoto) print 'clusters by tanimoto coefficient' clusters.printclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclusttanimoto.jpg') clust = clusters.hcluster(rdata, distance=clusters.euclidean) print 'clusters by euclidean distance' clusters.printclust(clust, labels=words) clusters.drawdendrogram(clust, words, jpeg='wordsclusteuclidean.jpg')
import clusters name, word, data = clusters.readfile('blogdata1 (copy).txt') cluster = clusters.hcluster(data) clusters.printclust(cluster, labels=name) clusters.drawdendrogram(cluster, name, jpeg='BlogCluster.jpg')
import clusters blognames, words, data = clusters.readfile('blogdata.txt') cl = clusters.hcluster(data) clusters.printclust(cl, labels=blognames) #ascii diagram clusters.drawdendrogram(cl, blognames, jpeg='blogcluster.jpg') #drawing the dendrogram
import clusters blog, words, data = clusters.readfile('tfidf.txt') variable = clusters.hcluster(data) # print ASCII dendrogram clusters.printclust(variable, labels=blog) # save JPEG dendrogram clusters.drawdendrogram(variable, blog, jpeg='clusterblogtfidf.jpg')
def createAsciiDendogram(): blognames,words,data=clusters.readfile('blogVector.txt') clust=clusters.hcluster(data) clusters.printclust(clust,labels=blognames)
import clusters blog,words,data=clusters.readfile('tfidf.txt') variable = clusters.hcluster(data) # print ASCII dendrogram clusters.printclust(variable, labels=blog) # save JPEG dendrogram clusters.drawdendrogram(variable, blog, jpeg='clusterblogtfidf.jpg')
def generateAscii(): blognames,words,data=clusters.readfile('blogdata.txt') clust=clusters.hcluster(data) clusters.printclust(clust,labels=blognames)
import clusters as cl import numpy as np import random blognames, words, data = cl.readfile('blogdata.txt') ''' clust = cl.hcluster(data) cl.printclust(clust,labels=blognames) cl.drawdendrogram(clust,blognames,jpeg='blogclust.jpg') rdata = cl.rotatematrix(data) wordclust = cl.hcluster(rdata) cl.printclust(wordclust,labels=words) cl.drawdendrogram(wordclust,words,jpeg='wordclust.jpg') k = 4 kclust = cl.kcluster(data,k=k) l = [[blognames[r] for r in kclust[i]] for i in range(k)] for ll in l: print len(ll),ll kclust = cl.kcluster_np(data,k=k) l = [[blognames[r] for r in kclust[i]] for i in range(k)] for ll in l: print len(ll),ll wants,people,data = cl.readfile('zebo') clust = cl.hcluster(data,distance=cl.tanimoto) cl.drawdendrogram(clust,wants)
def draw_acii_dendogram(): blognames, words, data = clusters.readfile('Outputs/blogdata.txt') clust = clusters.hcluster(data) clusters.printclust(clust, labels=blognames)