Ejemplo n.º 1
0
def parse(rss_url):
    parsed_data = feedparser.parse(rss_url)
    for rss_entry in parsed_data['items']:
        url = aParser.remove_query_from_url(rss_entry['link'])
        yield model.NewsData(url=url,
                             title=html.unescape(rss_entry['title']),
                             pub_date=dt.fromtimestamp(mktime(rss_entry['published_parsed'])),
                             summary=rss_entry.get('summary', None))
Ejemplo n.º 2
0
import django
import os

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "news_project.settings")
django.setup()

from prjparser import aParser
from db.models import News

for news in News.objects.iterator():
    url = aParser.remove_query_from_url(news.url)
    if len(url) != len(news.url):
        if not News.objects.filter(url=url)[:1].exists():
            print("update news id{}\t{}".format(str(news.id), news.url))
            news.url = url
            news.save()
        else:
            print("delete news id{}\t{}".format(str(news.id), news.url))
            news.delete()