예제 #1
0
 def run(self):
     from dbfrontend import DBProxy
     db = DBProxy()
     clusters = db.get_all_clusters()
     for cluster in clusters:
         print "Cluster " + str(clusters.index(cluster))
         for n in cluster.newss.all():
             print "   " + n.title
         print ""
예제 #2
0
 def run(self):
     from dbfrontend import DBProxy
     db = DBProxy()
     clusters = db.get_all_clusters()
     for cluster in clusters:
         print "Cluster " + str(clusters.index(cluster))
         for n in cluster.newss.all():
             print "   " + n.title
         print ""
예제 #3
0
    def run(self):
        from dbfrontend import DBProxy
        import sanitizer
        rss_entries = self.fetch_rss_entries()
        db = DBProxy()
        db_news = db.get_all_news()
        db_urls = [n.url for n in db_news]
        yet_unfetched_entries = [rss for rss in rss_entries if rss.url not in db_urls]
        unique_rss_entries = []

        for entry in yet_unfetched_entries:
            if entry.url not in [e.url for e in unique_rss_entries]:
                unique_rss_entries.append(entry)

        print "There are " + str(len(unique_rss_entries)) + " news entries" 
        news = self.fetch_and_parse_news(unique_rss_entries)
        print "Fetched "+ str(len(news)) + " news"
        news = [n for n in news if n]
        db.add_list(news)
예제 #4
0
    def run(self):
        from dbfrontend import DBProxy
        import sanitizer
        rss_entries = self.fetch_rss_entries()
        db = DBProxy()
        db_news = db.get_all_news()
        db_urls = [n.url for n in db_news]
        yet_unfetched_entries = [
            rss for rss in rss_entries if rss.url not in db_urls
        ]
        unique_rss_entries = []

        for entry in yet_unfetched_entries:
            if entry.url not in [e.url for e in unique_rss_entries]:
                unique_rss_entries.append(entry)

        print "There are " + str(len(unique_rss_entries)) + " news entries"
        news = self.fetch_and_parse_news(unique_rss_entries)
        print "Fetched " + str(len(news)) + " news"
        news = [n for n in news if n]
        db.add_list(news)
예제 #5
0
 def run(self, distance_function, reduce_function, threshold):
     from dbfrontend import DBProxy
     db = DBProxy()
     db.delete_all_clusters()
     news = db.get_all_news()
     tw = NewsGroup.TITLE_WEIGHT
     bw = NewsGroup.BODY_WEIGHT
     for n in news:
         a = reduce_function(n.clean_body)
         b = reduce_function(n.clean_title)
         n.vector = dict( (n, bw*a.get(n, 0)+tw*b.get(n, 0)) for n in set(a)|set(b) )
     clusters = self.group(db, news, threshold, distance_function)
     db.add_list(clusters)
     return clusters