Example #1
0
class StatFactory():
    """
    Class produce stats so they can be displayed
    """
    def __init__(self):
        self.db = WebDataStorageManager()

    def get_number_article_per_source(self, source):
        nb_article = None

        nb_article = len(self.db.get_stored_content_from_column("ARTICLE", "Url", Source=source))

        return nb_article

    def get_number_article_per_category(self, category):
        pass

    def get_number_article_per_source_and_category(self, source, category):
        pass
Example #2
0
lemonde = LeMondeExtractor()
db = WebDataStorageManager()

# get articles url list from websites
news_feed_page = req.give_page_content(lemonde.get_news_feed())
urls_article_list = lemonde.get_article_webpage_list(news_feed_page)

# Database initialisation
try:
    db.create_table(table_name, 'Source','Url','Content','Category')
    print "Table "+ table_name + " created."
except:
    print "Table already existing, no creation needed."

# get urls of already stored artist
stored_url_list = db.get_stored_content_from_column(table_name, "Url")


# remove article from list if already present if database. It must not be downloaded again
for stored_url in stored_url_list:
    if stored_url[0] in urls_article_list:
        urls_article_list.remove(stored_url[0])

# articles aqcuisition process
article_stored = 0
if len(urls_article_list) == 0:
    print "No new article to download"
for article_url in urls_article_list:
    try:
        webpage = req.give_page_content(article_url)
        article_content = lemonde.get_article_text(webpage)