def compare_true_templates(): docs = doc.get_docs_nested(get_data_dir(sys.argv[2])) #confirm = cluster.PerfectCONFIRM(docs) confirm = cluster.BestPerfectCONFIRM(docs, lr=0.05) confirm.cluster() analyzer = metric.KnownClusterAnalyzer(confirm) analyzer.print_all() analyzer.draw_centers() analyzer.clusters[0].center.push_away(analyzer.clusters[1].center) print "PUSHING APART!" print print analyzer = metric.KnownClusterAnalyzer(confirm) analyzer.draw_centers() analyzer.print_all() print print
def get_acc_v_measure(clusters): class Mock: pass m = Mock() m.get_clusters = lambda: clusters analyzer = metric.KnownClusterAnalyzer(m) acc = analyzer.accuracy() v = analyzer.v_measure() return acc, v
def print_cluster_analysis(clusters): class Mock: pass m = Mock() m.get_clusters = lambda: clusters analyzer = metric.KnownClusterAnalyzer(m) analyzer.print_general_info() analyzer.print_label_conf_mat() analyzer.print_label_cluster_mat() analyzer.print_metric_info()
def print_analysis(instances, clusters): class Mock: pass m = Mock() m.get_clusters = lambda: clusters m.get_docs = lambda: instances analyzer = metric.KnownClusterAnalyzer(m) analyzer.print_general_info() analyzer.print_histogram_info() analyzer.print_label_conf_mat() analyzer.print_label_cluster_mat() analyzer.print_label_info() analyzer.print_metric_info()
def main(args): if(len(args) != 2): print "Usage: mds.py clustering.pkl" print " C is the cluster in clustering.pkl to display" sys.exit(0) path = args[1] print "Loading" clusters = clustering = utils.load_obj(path) #map(lambda c: c.set_label(), clustering) for i in [5]: clusters = reclusterWithOPTICS(clusters, i) _docs = reduce(lambda x,y: x+y, map(lambda c: c.members, clusters)) confirm = BaseCONFIRM(_docs) confirm.clusters = clusters print "Original Number of Clusters:", len(clustering) print "Final Number of Clusters:", len(clusters) '''print reps imgs = [] for idx in reps: if idx == 0: imgs.append(clustering[i].center) else: idx = idx -1 imgs.append(clustering[i].members[idx]) display(imgs)''' #print len(selectWithHac(clustering)) #print streamSelector(clustering) #print entropy(clustering) #print "Analyzing" analyzer = metric.KnownClusterAnalyzer(confirm) analyzer.print_all() print "User Queries:", QueryCount
def double_cluster_known(): docs = doc.get_docs_nested(get_data_dir(sys.argv[2])) epsilon = float(sys.argv[3]) organizer = cluster.TemplateSorter(docs) organizer.go(epsilon) organizer.prune_clusters() clusters = organizer.get_clusters() print "Initial Clustering Complete" print "Reclustering..." centers = map(lambda x: x.center, clusters) organizer.go(epsilon,templates=centers) organizer.prune_clusters() clusters = organizer.get_clusters() print print analyzer = metric.KnownClusterAnalyzer(clusters) analyzer.draw_centers() analyzer.print_all()