def test_dbscan_clustering_with_tweets(self): from_date = datetime.datetime(2011, 1, 25, 12, 0, 0) to_date = datetime.datetime(2011, 1, 26, 12, 30, 0) items = ws.get_documents_by_date(from_date, to_date, limit=100) epsilon = 0.02 min_pts = 2 dbscan = DBSCANClusterer(filter_terms=False) dbscan.add_documents(items) clusters = dbscan.run(epsilon, min_pts, pca=True) dbscan.dump_clusters_to_file("dbscan_with_tweets") #======================================================================= dbscan.plot_scatter() #dbscan.plot_growth_timeline(cumulative=False, plot_method="matplotlib") # dbscan.plot_growth_timeline(cumulative=True) dbscan.plot_growth_timeline(cumulative=True)
points = [] points.append([1,1]) points.append([1.5,1]) points.append([1.8,1.5]) points.append([2.1,1]) points.append([3.1,2]) points.append([4.1,2]) points.append([5.1,2]) points.append([10,10]) points.append([11,10.5]) points.append([9.5,11]) points.append([9.9,11.4]) points.append([15.0, 17.0]) points.append([15.0, 17.0]) points.append([7.5, -5.0]) dbscan = DBSCANClusterer() #Small hacks..in normal usage never set td_matrix by urself #and never populate a dummy document_dict dbscan.td_matrix = points dbscan.document_dict = OrderedDict( [('0','dummy'), ('1', 'dummy'), ('2', 'dummy'),('3', 'dummy'),('4', 'dummy'),('5', 'dummy'), ('6', 'dummy'),('7', 'dummy'),('8', 'dummy'),('9', 'dummy'),('10', 'dummy'),('11', 'dummy'),('12', 'dummy'),('13', 'dummy')]) class Test_Dbscan_clustering(unittest.TestCase): def test_dbscan_cluster(self): clusters = dbscan.run(epsilon, min_pts) print '\n========== Results of Clustering =============' for cluster, members in clusters.iteritems(): print '\n--------Cluster %d---------' % cluster for point in members: print point