from retrieval import article import sys __author__="Dan" __date__ ="$16-Mar-2010 21:40:24$" if __name__ == "__main__": todays_date = datetime.datetime.today() two_week_difference = datetime.timedelta(days=-14) week_difference = datetime.timedelta(days=-7) two_day_difference = datetime.timedelta(days=-3) week_ago_date_mysql = (todays_date + week_difference).strftime("%Y-%m-%d") days_ago_date_mysql = (todays_date + two_day_difference).strftime("%Y-%m-%d") article_list = article.db_get_all_articles("NOT isnull(clusterid) AND EXISTS (SELECT articleid FROM articleswithterms WHERE articleid = articles.id) AND `datepublished` >= '%(weekago)s'" % {"weekago" : week_ago_date_mysql}) models,inv_index = articleunigram.db_load_models(article_list) mergers_query = "SELECT * FROM proposedmergers WHERE `approved` = 1" db = database.connect_to_database() cur = db.cursor(cursorclass = MySQLdb.cursors.DictCursor) clusterer = ClusterMaker(inv_index = inv_index) cur.execute(mergers_query) result = cur.fetchall() proposed_mergers = dict() for row in result: clusters_query = "SELECT `clusterid` FROM proposedmergers_clusters WHERE `mergerid` = %d" % (row['id']) cur.execute(clusters_query) clusters = cur.fetchall() proposed_mergers[row['id']] = [clustermodel.ClusterModel(inv_index = inv_index,id = cl['clusterid'],load_full_article = True) for cl in clusters] for p in proposed_mergers.items():
print "Setting cluster type to " + str(self.cluster_type) def get_cluster_info(self,e = None): if e: self.selected_cluster = int(self.lb_clusters.GetSelection(int(e.GetSelection()))) print str(self.selected_cluster) def start_clustering(self,e = None): if e: clusterer = clustering.ClusterMaker(cluster_type = self.cluster_type) cluster_thread = clustering.ClustererThread(clusterer,self.models,self.inv_index,float(self.tc_threshold.GetValue()),self.cluster_method,on_change=self.update) cluster_thread.start() # clusterer.cluster_articles(self.models,self.inv_index,float(self.tc_threshold.GetValue()),self.cluster_method,on_change=self.update) # clusterer.run(self.models,self.inv_index,float(self.tc_threshold.GetValue()),self.cluster_method,on_change=self.update,name = "whatever") def update(self,clusters=None,new_cluster=None,remove_model=None): if clusters: self.tc_num_clusters.SetValue(str(len(clusters))) self.tc_num_clusters.Update() self.clusters = clusters if new_cluster: self.lb_clusters.AppendAndEnsureVisible(str(new_cluster)) self.lb_clusters.Update() theVar = 1 app = wx.App(False) man_articles = article.db_get_all_articles("NOT trainingcluster = 0") models,inv_index = articleunigram.db_load_models(man_articles) frame = ClusterMonitor(None,'Article Browser',models,inv_index) app.MainLoop()