def createDendrogram():
    blogs, colnames, data = clusters.readfile('blogdata.txt')
    cluster = clusters.hcluster(data)
    clusters.drawdendrogram(cluster, blogs, jpeg='Dendrogram.jpg')
    f = open("ASCII.txt", 'w')
    sys.stdout = f
    clusters.printclust(cluster, labels=blogs)
    f.close()
    sys.stderr.close()
Ejemplo n.º 2
0
def main():

    # returns blog titles, words in blog (10%-50% boundaries), list of frequency info
    blognames,words,data=clusters.readfile('blogdata.txt') 

    # returns a tree of foo.id, foo.left, foo.right
    clust=clusters.hcluster(data)

    # walks tree and prints ascii approximation of a dendogram; distance measure is Pearson's r
    clusters.printclust(clust,labels=blognames) 
Ejemplo n.º 3
0
    for i in range(len(v1)):
        d += (v1[i] - v2[i])**2
    return math.sqrt(d)


#getBlogs()
#main()

blognames, words, data = clusters.readfile('similarblogdata.txt')
print(blognames)
print(words)
print(data)
for i in range(len(data[1:])):
    if len(data[i + 1]) != len(data[i]):
        print(blognames[i + 1])
        print(len(data[i + 1]))
        print(blognames[i])
        print(len(data[i]))
clust = clusters.hcluster(data)
clusters.printclust(clust, labels=blognames)
clusters.drawdendrogram(clust, blognames, jpeg='sblogclust.jpg')

kclust = clusters.kcluster(data, k=5)
printkclustValues(kclust)
kclust = clusters.kcluster(data, k=10)
printkclustValues(kclust)
kclust = clusters.kcluster(data, k=20)
printkclustValues(kclust)
coords = clusters.scaledown(data)
clusters.draw2d(coords, blognames, jpeg='sblogs2d.jpg')
Ejemplo n.º 4
0
#Shawn Jones

#!/usr/local/bin/python

# all code here stolen shamelessly from 

# "Programming Collective Intelligence, Chapter 3"

import sys

sys.path.insert(0, '../libs')

import clusters

blognames,words,data=clusters.readfile('blogdata1V2.txt')

clust = clusters.hcluster(data)

# print ASCII dendrogram

clusters.printclust(clust, labels=blognames)

# save JPEG dendrogram

clusters.drawdendrogram(clust, blognames, jpeg='blogclust.jpg')

Ejemplo n.º 5
0
Archivo: run.py Proyecto: wz125/courses
def printclust():
  reload(clusters)
  clusters.printclust(clust,labels=blognames)
Ejemplo n.º 6
0
import clusters

docs, words, data = clusters.readfile('titlesdata.txt')
rdata = clusters.rotatematrix(data)

clust = clusters.hcluster(rdata, distance=clusters.pearson)
print 'clusters by pearson correlation'
clusters.printclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclustpearson.jpg')

clust = clusters.hcluster(rdata, distance=clusters.tanimoto)
print 'clusters by tanimoto coefficient'
clusters.printclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclusttanimoto.jpg')

clust = clusters.hcluster(rdata, distance=clusters.euclidean)
print 'clusters by euclidean distance'
clusters.printclust(clust, labels=words)
clusters.drawdendrogram(clust, words, jpeg='wordsclusteuclidean.jpg')
Ejemplo n.º 7
0
import clusters

name, word, data = clusters.readfile('blogdata1 (copy).txt')
cluster = clusters.hcluster(data)

clusters.printclust(cluster, labels=name)

clusters.drawdendrogram(cluster, name, jpeg='BlogCluster.jpg')
Ejemplo n.º 8
0
import clusters

blognames, words, data = clusters.readfile('blogdata.txt')
cl = clusters.hcluster(data)
clusters.printclust(cl, labels=blognames)  #ascii diagram
clusters.drawdendrogram(cl, blognames,
                        jpeg='blogcluster.jpg')  #drawing the dendrogram
Ejemplo n.º 9
0
import clusters
blog, words, data = clusters.readfile('tfidf.txt')
variable = clusters.hcluster(data)

# print ASCII dendrogram
clusters.printclust(variable, labels=blog)

# save JPEG dendrogram
clusters.drawdendrogram(variable, blog, jpeg='clusterblogtfidf.jpg')
Ejemplo n.º 10
0
def createAsciiDendogram():
	blognames,words,data=clusters.readfile('blogVector.txt')
	clust=clusters.hcluster(data)

	clusters.printclust(clust,labels=blognames)
Ejemplo n.º 11
0
import clusters
blog,words,data=clusters.readfile('tfidf.txt')
variable = clusters.hcluster(data)

# print ASCII dendrogram
clusters.printclust(variable, labels=blog)

# save JPEG dendrogram
clusters.drawdendrogram(variable, blog, jpeg='clusterblogtfidf.jpg')
Ejemplo n.º 12
0
def generateAscii():
    blognames,words,data=clusters.readfile('blogdata.txt') 
    clust=clusters.hcluster(data)
    clusters.printclust(clust,labels=blognames) 
import clusters as cl
import numpy as np
import random

blognames, words, data = cl.readfile('blogdata.txt')
'''
clust = cl.hcluster(data)
cl.printclust(clust,labels=blognames)
cl.drawdendrogram(clust,blognames,jpeg='blogclust.jpg')

rdata = cl.rotatematrix(data)
wordclust = cl.hcluster(rdata)
cl.printclust(wordclust,labels=words)
cl.drawdendrogram(wordclust,words,jpeg='wordclust.jpg')

k = 4
kclust = cl.kcluster(data,k=k)
l = [[blognames[r] for r in kclust[i]] for i in range(k)]
for ll in l:
    print len(ll),ll


kclust = cl.kcluster_np(data,k=k)
l = [[blognames[r] for r in kclust[i]] for i in range(k)]
for ll in l:
    print len(ll),ll


wants,people,data = cl.readfile('zebo')
clust = cl.hcluster(data,distance=cl.tanimoto)
cl.drawdendrogram(clust,wants)
Ejemplo n.º 14
0
def draw_acii_dendogram():
    blognames, words, data = clusters.readfile('Outputs/blogdata.txt')
    clust = clusters.hcluster(data)
    clusters.printclust(clust, labels=blognames)