Example #1
0
def parse(content, url):
    """
    produces a python representation of the RSS feed content 
    given. This representation is documented at: 
    http://feedparser.org

    content - string containing RSS/atom/etc xml document
    url - the url that the content was retrieved from.

    raises: InvalidFeedError if no feed could be parse.
    """

    fake_headers = {
        'content-location': url,
        'content-type': 'text/xml; charset=utf-8',
    }
    ff = feedparser.parse(content, header_defaults=fake_headers)

    if ff is None or not 'feed' in ff:
        raise InvalidFeedError()


    # make sure the feed has an id...
    if not 'id' in ff.feed:
        ff.feed['id'] = url

    # make sure the feed has a self referential link
    has_self_ref = False
    ff.feed.setdefault('links', [])
    for link in ff.feed.links:
        if link.rel == 'self':
            has_self_ref = True
            break
    if not has_self_ref:
        ff.feed.links.append(FakeLink(rel='self', href=url, title=''))

    for e in ff.get('entries', []):
        # make sure it has an id
        eid = e.get('id', None)
        if eid is None:
            eid = find_best_entry_id(e)
            if eid is None:
                # throw this entry out, it has no 
                # id, title, summary or content
                # that is recognizable...
                continue
            e['id'] = eid

    return ff
Example #2
0
def _feed_info(request, query):
    if query is None:
        return None

    client = http.create_client(request.context.config)
    try:
        headers = {'Connection': 'close'}
        response, content = client.request(query, headers=headers)

        if response.status != 200: 
            return None

        ff = feedparser.parse(content)
        if ff and 'feed' in ff and 'bozo_exception' not in ff:
            return {'url': query,
                    'title': ff.feed.get('title', '')}
        else:
            return None

    except:
        log.error("Error verifying feed at %s: %s" % (query, traceback.format_exc()))
        return None
    finally:
        http.close_all(client)