Example #1
0
def main(argv):
    query = reduce(lambda x, y: x + " " + y ,argv[1:])
    results = bossapiv2.search(query, 20)
    if len(results) == 0:
        print "0 results"
    else:
        wordlist,wordvectors = bosstextproc.textprocess(results) 
        clusts = cluster.hcluster(rows=wordvectors, distance=distance.pearson, threshold=1.15)
        clusts = cluster.sortBySmallestId(clusts)
        cluster.printclusters(clusts, results, wordlist)
Example #2
0
def main(argv):
    query = reduce(lambda x, y: x + " " + y ,argv[1:])
    results = bossapiv2.search(query, 20)
    if len(results) == 0:
        print "0 results"
    else:
        wordlist,wordvectors = bosstextproc.textprocess(results) 
        clusts = cluster.hcluster(rows=wordvectors)
        clusts = cluster.divide(clusts[0], threshold=1.15)
        cluster.printclusters_eval(clusts, results, wordlist)
Example #3
0
    os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg')
]
# extract feature vector (8 bins per color channel)
features = zeros([len(imlist), 512])
for i, f in enumerate(imlist):
    im = array(Image.open(f))
    # multi-dimensional histogram
    h, edges = histogramdd(im.reshape(-1, 3),
                           8,
                           normed=True,
                           range=[(0, 255), (0, 255), (0, 255)])
    features[i] = h.flatten()
time2 = time.time()
feature_time = time2 - time1
print("feature time:%ss" % feature_time)
tree = cluster.hcluster(features)
time3 = time.time()
tree_time = time3 - time2
print("tree time:%ss" % tree_time)
# visualize clusters with some (arbitrary) threshold
clusters = tree.extract_clusters(tree.distance)
print(len(clusters))
time4 = time.time()
extract_cluster_time = time4 - time3
print("extract cluster time:%ss" % extract_cluster_time)
# plot images for clusters with more than 3 elements
c_id = 0
for c in clusters:
    path_cluster = "../test_traindata/test_cluster/%s/" % c_id
    if (not os.path.exists(path_cluster)):
        os.makedirs(path_cluster)
Example #4
0
# clusterTest.py

import cluster

blognames, words, data = cluster.readfile('blogdata.txt')
clust = cluster.hcluster(data)

cluster.printclust(clust, labels=blognames)
Example #5
0
import cluster

sBlogNames, sWords, sData = cluster.readfile('blogdata.txt')

sClust = cluster.hcluster(sData)
cluster.printclust(sClust, aLabels=sBlogNames)
cluster.drawdendrogram(sClust, sBlogNames, 'blogclust.jpg')