def main(argv): query = reduce(lambda x, y: x + " " + y ,argv[1:]) results = bossapiv2.search(query, 20) if len(results) == 0: print "0 results" else: wordlist,wordvectors = bosstextproc.textprocess(results) clusts = cluster.hcluster(rows=wordvectors, distance=distance.pearson, threshold=1.15) clusts = cluster.sortBySmallestId(clusts) cluster.printclusters(clusts, results, wordlist)
def main(argv): query = reduce(lambda x, y: x + " " + y ,argv[1:]) results = bossapiv2.search(query, 20) if len(results) == 0: print "0 results" else: wordlist,wordvectors = bosstextproc.textprocess(results) clusts = cluster.hcluster(rows=wordvectors) clusts = cluster.divide(clusts[0], threshold=1.15) cluster.printclusters_eval(clusts, results, wordlist)
os.path.join(path, f) for f in os.listdir(path) if f.endswith('.jpg') ] # extract feature vector (8 bins per color channel) features = zeros([len(imlist), 512]) for i, f in enumerate(imlist): im = array(Image.open(f)) # multi-dimensional histogram h, edges = histogramdd(im.reshape(-1, 3), 8, normed=True, range=[(0, 255), (0, 255), (0, 255)]) features[i] = h.flatten() time2 = time.time() feature_time = time2 - time1 print("feature time:%ss" % feature_time) tree = cluster.hcluster(features) time3 = time.time() tree_time = time3 - time2 print("tree time:%ss" % tree_time) # visualize clusters with some (arbitrary) threshold clusters = tree.extract_clusters(tree.distance) print(len(clusters)) time4 = time.time() extract_cluster_time = time4 - time3 print("extract cluster time:%ss" % extract_cluster_time) # plot images for clusters with more than 3 elements c_id = 0 for c in clusters: path_cluster = "../test_traindata/test_cluster/%s/" % c_id if (not os.path.exists(path_cluster)): os.makedirs(path_cluster)
# clusterTest.py import cluster blognames, words, data = cluster.readfile('blogdata.txt') clust = cluster.hcluster(data) cluster.printclust(clust, labels=blognames)
import cluster sBlogNames, sWords, sData = cluster.readfile('blogdata.txt') sClust = cluster.hcluster(sData) cluster.printclust(sClust, aLabels=sBlogNames) cluster.drawdendrogram(sClust, sBlogNames, 'blogclust.jpg')