Beispiel #1
0
def refresh_feed(feed_id):
    # Used to unescape html entities in titles
    html_parser = HTMLParser.HTMLParser()

    feed = Feed.objects.get(pk=feed_id)

    parsed = feedparser.parse(feed.uri)
    parsed_feed = parsed.feed

    feed.title = html_parser.unescape(parsed_feed.title)
    feed.save()

    for parsed_entry in parsed.entries:
        uuid = getattr(parsed_entry, "id", parsed_entry.link)

        try:
            entry = Entry.objects.get(uuid=uuid)
        except Entry.DoesNotExist:
            entry = Entry()
            entry.feed = feed
            entry.uuid = uuid

        entry.link = parsed_entry.link
        entry.title = html_parser.unescape(parsed_entry.title)
        entry.author = getattr(parsed_entry, "author", None)

        timestamp = time.mktime(parsed_entry.updated_parsed)
        entry.published = datetime.datetime.fromtimestamp(timestamp)

        if hasattr(parsed_entry, "content"):
            entry.content = parsed_entry.content[0].value
        elif hasattr(parsed_entry, "summary"):
            entry.content = parsed_entry.summary

        entry.save()
Beispiel #2
0
def refresh_feed(feed_id):
    # Used to unescape html entities in titles    
    html_parser = HTMLParser.HTMLParser()
    feed = Feed.objects.get(pk=feed_id)
    parsed = feedparser.parse(feed.uri)

    #import pdb; pdb.set_trace()
    if parsed.bozo:
        logger.warning('feedparser got bozo error. skipping feed')
        return

    parsed_feed = parsed.feed
    title = parsed_feed.title if hasattr(parsed_feed,'title') else feed.uri
    feed.title = html_parser.unescape(title)
    feed.save()

    for parsed_entry in parsed.entries:
        uuid = getattr(parsed_entry, 'id', parsed_entry.link)

        try:
            entry = Entry.objects.get(uuid=uuid)
        except Entry.DoesNotExist:
            entry = Entry()
            entry.feed = feed
            entry.uuid = uuid

        entry.link = parsed_entry.link
        entry.title = html_parser.unescape(parsed_entry.title)
        entry.author = getattr(parsed_entry, 'author', None)

        timestamp = time.mktime(parsed_entry.updated_parsed)
        entry.published = datetime.datetime.fromtimestamp(timestamp)

        if hasattr(parsed_entry, 'content'):
            entry.content = parsed_entry.content[0].value
        elif hasattr(parsed_entry, 'summary'):
            entry.content = parsed_entry.summary

        entry.save()