Example #1
0
def parseFeed(rssUrl):

    d = feedparser.parse(rssUrl)
    for itemnumber in range(0, len(d.entries)):

        new_articles= list()
        itemnumber_plus_one = itemnumber +1
        article_title=d.entries[itemnumber].title
        #rssUrl = d.entries[itemnumber].links[0].href
        article_url = parseFeedUrl(d.entries[itemnumber].links[0].href)

        pubtime = datetime.strptime(d.entries[itemnumber].updated, "%a, %d %b %Y %H:%M:%S %Z")
        #pubtime = changeToSummerTime(pubtime)
        polledtime = datetime.now()
        #polledtime = = changeToSummerTime(polledtime)
        category = getCategory(d,rssUrl)
        if(article_url.startswith("http://www.guardian.co.uk") or article_url.startswith("http://www.guardiannews.com")):
            sub_position = 1
            count = itemnumber_plus_one
            ranked_article = [polledtime, pubtime, article_title, article_url, category, rssUrl,  count, sub_position, False]
            addNewArticle(new_articles,ranked_article)
        else:
            description = processDescriptionWithSoup(d.entries[itemnumber].description, 2)
            for entries in description:
                sub_position = entries[0]
                #rssUrl = entries[1]
                article_url = parseUrlFromDescription(entries[1])
                article_title = entries[2]
                count = itemnumber_plus_one
                ranked_article = [polledtime, pubtime, article_title, article_url, category, rssUrl,  count, sub_position, False]
                addNewArticle(new_articles,ranked_article)

        if(len(new_articles)>0):
            logging.info("Found new headlines.  Sending alerts")
            notifications.send_alerts(rssUrl,new_articles)
Example #2
0
def checkDeath(rssUrl):
    print("Feed" + rssUrl)
    stored_articles = models.get_articles_for_feed(rssUrl)
    print(len(stored_articles))
    for item in stored_articles:
        print(item)

    rss_article_url_list = list()
    d = feedparser.parse(rssUrl)
    for item in d['items']:
        article_url = parseFeedUrl(item.link)
        rss_article_url_list.append(article_url)
        print("rss_article_url" + article_url)

    print("first length")
    print(len(rss_article_url_list))
    for itemnumber in range(0, len(d.entries)):
        description = processDescriptionWithSoup(d.entries[itemnumber].description, 2)
        for entries in description:
            article_url = parseUrlFromDescription(entries[1])
            rss_article_url_list.append(article_url)


    print("second length")
    print(len(rss_article_url_list))
    dead_articles = list()
    for x in rss_article_url_list:
        print('RssArticleUrl' + x)
    for article in stored_articles:
        print("article url in stored articles" + article.url)
        if(not article.url in rss_article_url_list):
            print("Article not in the rss Url list anymore")
            print(' article url:    '+ article.url)
            dead_articles.append(article)
            article.delete()
    print("dead articles length")
    print(len(dead_articles))
    
    if(len(dead_articles)>0):
        logging.info("Headline died.  Sending alerts")
        notifications.send_death_alert(rssUrl,dead_articles)