def task_rss(self, grab, task):
        """ Extract some information from rss feed
        """
        logger.info("Get rss: {}".format(grab.response.url))

        all_authors = set()
        all_tags = set()
        all_titles = []

        feed = feedparser.parse(remove_bom(grab.response.body))

        # feed = parse_feed(grab)['feed']

        feed_entries = len(feed['entries'])
        # if feed_entries > 0:
        #     print feed['entries'][0].keys()

        for entry in feed['entries']:
            if 'author' in entry:
                all_authors.add(entry['author'].lower())
            if 'tags' in entry:
                tags = filter(None, map(
                    lambda x: x['term'].lower(),
                    entry['tags']
                ))
                all_tags |= set(tags)  # union operator
            if 'title' in entry:
                all_titles.append(entry['title'].lower())

        if 'bozo_exception' in feed:
            logger.error(
                "Error parsing feed: {}".format(feed['bozo_exception']))

        feed_parsing_error = (str(feed['bozo_exception'])
                              if 'bozo_exception' in feed else '')
        content = {
            'authors': list(all_authors),
            'tags': list(all_tags),
            'titles': list(all_titles),
            'entries': feed_entries,
            'error': feed_parsing_error,
        }

        # we put this data into the task object in previous handler
        data = task.data
        data['content'] = content
        self.save_blog(data)
Beispiel #2
0
def parse_feed(grab, teaser_size=1000):
    """
    Extract details of feed fetched with Grab.

    Returns dict with keys:
    * feed
    * entries
    """

    # BOM removing is required because without it
    # sometimes feedparser just raise SegmentationFault o_O
    feed = feedparser.parse(remove_bom(grab.response.body))

    entries = []
    for entry in feed.entries:
        try:
            entries.append(parse_entry(entry, feed, teaser_size=teaser_size))
        except Exception as ex:
            log.error('Entry parsing error', exc_info=ex)

    return {'feed': feed, 'entries': entries}
Beispiel #3
0
def parse_feed(grab, teaser_size=1000):
    """
    Extract details of feed fetched with Grab.

    Returns dict with keys:
    * feed
    * entries
    """

    # BOM removing is required because without it
    # sometimes feedparser just raise SegmentationFault o_O
    feed = feedparser.parse(remove_bom(grab.response.body))

    entries = []
    for entry in feed.entries:
        try:
            entries.append(parse_entry(entry, feed, teaser_size=teaser_size))
        except Exception as ex:
            log.error('Entry parsing error', exc_info=ex)
    
    return {'feed': feed, 'entries': entries}