Ejemplo n.º 1
0
def feed_update():
    feed_updated_list = []
    article_updated_list = []
    feeds = Feed.objects.all()
    for feed in feeds:
        try:
            feed_update = feedparser.parse(feed.url, etag=feed.etag)
        except:
            print feed.title, "Does not have an etag!!!!!!!!!!!!!!!!!!"
        title_list = []
        if feed_update.status != 304:
            article_inner = []
            feed_updated_list.append(feed.title)
            feeds = Article.objects.all()
            for i in feeds:
                title_list.append(i.title)
            for entry in feed_update.entries:
                if entry.title not in title_list:
                    article_inner.append(entry.title)
                    article = Article()
                    article.title = entry.title
                    article.url = entry.link
                    c = 'https?://(.*?)/'
                    try:
                        article.domain = re.findall(c, entry.link)[0]
                    except:
                        article.domain = entry.link
                    if feed.author:
                        article.author = feed.author
                    else:
                        article.author = entry.author
                    article.authorSlug = slugify(article.author)
                    #description script
                    try:
                        remove = re.findall('<p>(The post.*?)</p>',
                                            entry.description)[0]
                        article.description = entry.description.replace(
                            remove, '')
                    except:
                        try:
                            remove = re.findall('<p>(The post.*?)</p>',
                                                entry.description)[0]
                        except:
                            article.description = entry.description
                    #end descripton script
                    d = datetime.datetime(*(entry.published_parsed[0:6]))
                    dateString = d.strftime('%Y-%m-%d')
                    article.publication_date = dateString
                    article.feed = feed
                    article.practiceArea = feed.practiceArea
                    article.practiceAreaSlug = feed.practiceArea.replace(
                        " ", "_").lower()
                    article.save()
            article_updated_list.append(article_inner)
            try:
                feed.etag = feed_update.etag
            except:
                pass
            feed.save()
    return feed_updated_list, article_updated_list