class StatFactory(): """ Class produce stats so they can be displayed """ def __init__(self): self.db = WebDataStorageManager() def get_number_article_per_source(self, source): nb_article = None nb_article = len(self.db.get_stored_content_from_column("ARTICLE", "Url", Source=source)) return nb_article def get_number_article_per_category(self, category): pass def get_number_article_per_source_and_category(self, source, category): pass
from lemonde_extractor import LeMondeExtractor from database_manager import WebDataStorageManager from web_client import WebRequester # Configuration table_name = "ARTICLE" # Instances creation req = WebRequester() lemonde = LeMondeExtractor() db = WebDataStorageManager() # get articles url list from websites news_feed_page = req.give_page_content(lemonde.get_news_feed()) urls_article_list = lemonde.get_article_webpage_list(news_feed_page) # Database initialisation try: db.create_table(table_name, 'Source','Url','Content','Category') print "Table "+ table_name + " created." except: print "Table already existing, no creation needed." # get urls of already stored artist stored_url_list = db.get_stored_content_from_column(table_name, "Url") # remove article from list if already present if database. It must not be downloaded again for stored_url in stored_url_list: if stored_url[0] in urls_article_list:
def __init__(self): self.db = WebDataStorageManager()