Esempio n. 1
0
 def from_feedparser_entry(entry):
     # XXX Added some hacks to get a decent item out of YouTube after they
     # stopped having enclosures (2008-1-21).
     enclosure = feedutil.get_first_video_enclosure(entry)
     if enclosure is None:
         if 'link' not in entry:
             raise exceptions.FeedparserEntryError(
                 "No video enclosure and ngo link")
         if entry['link'].find('youtube.com') == -1:
             if not filetypes.isAllowedFilename(entry['link']):
                 raise exceptions.EntryMissingDataError('Link is invalid')
     rv = Item()
     try:
         rv.name = feedutil.to_utf8(entry['title'])
         if enclosure is not None:
             rv.url = feedutil.to_utf8(enclosure['href'])
             # split off the front if there's additional data in the
             # MIME type
             if 'type' in enclosure:
                 rv.mime_type = feedutil.to_utf8(enclosure['type']).split(
                     ';', 1)[0]
             else:
                 rv.mime_type = 'video/unknown'
         elif entry['link'].find('youtube.com') != -1:
             rv.url = entry['link']
             rv.mime_type = 'video/x-flv'
         else:
             rv.url = entry['link']
             rv.mime_type = filetypes.guessMimeType(rv.url)
         if enclosure is not None and 'text' in enclosure:
             rv.description = feedutil.to_utf8(enclosure['text'])
         elif 'description' in entry:
             rv.description = feedutil.to_utf8(entry['description'])
         elif 'media_description' in entry:
             rv.description = feedutil.to_utf8(entry['media_description'])
         elif entry.get('link', '').find('youtube.com') != -1:
             match = re.search(r'<div><span>(.*?)</span></div>',
                               rv.description, re.S)
             if match:
                 rv.description = feedutil.to_utf8(
                     saxutils.unescape(match.group(1)))
         rv.description  # this will raise an AttributeError if it wasn't set
     except (AttributeError, KeyError), e:
         raise exceptions.EntryMissingDataError(e.args[0])
Esempio n. 2
0
 def from_feedparser_entry(entry):
     # XXX Added some hacks to get a decent item out of YouTube after they
     # stopped having enclosures (2008-1-21).
     enclosure = feedutil.get_first_video_enclosure(entry)
     if enclosure is None:
         if 'link' not in entry:
             raise exceptions.FeedparserEntryError(
                 "No video enclosure and ngo link")
         if entry['link'].find('youtube.com') == -1:
             if not filetypes.isAllowedFilename(entry['link']):
                 raise exceptions.EntryMissingDataError('Link is invalid')
     rv = Item()
     try:
         rv.name = feedutil.to_utf8(entry['title'])
         if enclosure is not None:
             rv.url = feedutil.to_utf8(enclosure['href'])
             # split off the front if there's additional data in the
             # MIME type
             if 'type' in enclosure:
                 rv.mime_type = feedutil.to_utf8(enclosure['type']
                                                 ).split(';', 1)[0]
             else:
                 rv.mime_type = 'video/unknown'
         elif entry['link'].find('youtube.com') != -1:
             rv.url = entry['link']
             rv.mime_type = 'video/x-flv'
         else:
             rv.url = entry['link']
             rv.mime_type = filetypes.guessMimeType(rv.url)
         if enclosure is not None and 'text' in enclosure:
             rv.description = feedutil.to_utf8(enclosure['text'])
         elif 'description' in entry:
             rv.description = feedutil.to_utf8(entry['description'])
         elif 'media_description' in entry:
             rv.description = feedutil.to_utf8(entry['media_description'])
         elif entry.get('link', '').find('youtube.com') != -1:
             match = re.search(r'<div><span>(.*?)</span></div>',
                               rv.description, re.S)
             if match:
                 rv.description = feedutil.to_utf8(
                     saxutils.unescape(match.group(1)))
         rv.description # this will raise an AttributeError if it wasn't set
     except (AttributeError, KeyError), e:
         raise exceptions.EntryMissingDataError(e.args[0])
Esempio n. 3
0
def get_channels_and_items(feed_url):
    feed_sha1 = sha.sha(feed_url).hexdigest()
    feed_key = 'share_feed-' + feed_sha1
    items_key = 'share_feed_items-' + feed_sha1

    # check the cache.  If not...
    cached_channel = cache.cache.get(feed_key)
    cached_items = cache.cache.get(items_key)
    if cached_channel:
        channel = cached_channel
        items = cached_items
    else:
        # check to see if we have that feed in our database...
        try:
            channel = Channel.objects.get(url=feed_url)
        except Channel.DoesNotExist:
            ## parse the feed
            try:
                parsed = feedparser.parse(feed_url)
            except Exception:
                logging.exception('error parsing %s' % feed_url)
                raise FeedFetchingError('feedparser error while parsing')

            #ok, so this doesn't work...
            if hasattr(parsed, 'status') and parsed.status != 200:
                raise FeedFetchingError(
                    "Got a non-200 status while parsing feed")

            if parsed.bozo:  # this didn't work either
                raise FeedFetchingError('Feed is unparsable')

            ## generate fake channel
            thumbnail_url = (settings.STATIC_BASE_URL +
                             'images/generic_feed_thumb.png')

            channel = FakeChannel(parsed.feed.get('title', ''),
                                  parsed.feed.get('subtitle', ''), feed_url,
                                  parsed.feed.get('link', ''), thumbnail_url)

            items = []
            # why not limit the number of items here to 4?
            # Because we might need to check to see if a particular
            # item is in this feed if it's being faked...
            for entry in parsed.entries:
                enclosure = feedutil.get_first_video_enclosure(entry)
                link = None
                if enclosure is not None and 'href' in enclosure:
                    link = enclosure['href']
                if not link:
                    if not entry.get('link'):
                        continue
                    link = entry.link

                updated_datetime = None
                if entry.get('updated_parsed'):
                    updated_datetime = datetime.datetime(
                        *entry.updated_parsed[:7])
                if 'title' not in entry and not ('summary' in entry
                                                 or 'description' in entry):
                    continue
                item = FakeItem(
                    link, entry.title,
                    entry.get('summary', entry.get('description',
                                                   '')), updated_datetime,
                    thumbnail_url)  # just use whatever thumbnail the channel
                # has?
                items.append(item)
        else:
            items = channel.items.all()[:4]

        cache.cache.set(feed_key, channel)
        cache.cache.set(items_key, items)

    return channel, items
Esempio n. 4
0
def get_channels_and_items(feed_url):
    feed_sha1 = sha.sha(feed_url).hexdigest()
    feed_key = 'share_feed-' + feed_sha1
    items_key = 'share_feed_items-' + feed_sha1

    # check the cache.  If not...
    cached_channel = cache.cache.get(feed_key)
    cached_items = cache.cache.get(items_key)
    if cached_channel:
        channel = cached_channel
        items = cached_items
    else:
        # check to see if we have that feed in our database...
        try:
            channel = Channel.objects.get(url=feed_url)
        except Channel.DoesNotExist:
            ## parse the feed
            try:
                parsed = feedparser.parse(feed_url)
            except Exception:
                logging.exception('error parsing %s' % feed_url)
                raise FeedFetchingError('feedparser error while parsing')

            #ok, so this doesn't work...
            if hasattr(parsed, 'status') and parsed.status != 200:
                raise FeedFetchingError(
                    "Got a non-200 status while parsing feed")

            if parsed.bozo: # this didn't work either
                raise FeedFetchingError('Feed is unparsable')

            ## generate fake channel
            thumbnail_url = (
                settings.STATIC_BASE_URL + 'images/generic_feed_thumb.png')

            channel = FakeChannel(
                parsed.feed.get('title', ''),
                parsed.feed.get('subtitle', ''),
                feed_url,
                parsed.feed.get('link', ''),
                thumbnail_url)

            items = []
            # why not limit the number of items here to 4?
            # Because we might need to check to see if a particular
            # item is in this feed if it's being faked...
            for entry in parsed.entries:
                enclosure = feedutil.get_first_video_enclosure(entry)
                link = None
                if enclosure is not None and 'href' in enclosure:
                    link = enclosure['href']
                if not link:
                    if not entry.get('link'):
                        continue
                    link = entry.link

                updated_datetime = None
                if entry.get('updated_parsed'):
                    updated_datetime = datetime.datetime(
                        *entry.updated_parsed[:7])
                if 'title' not in entry and not (
                    'summary' in entry or 'description' in entry):
                    continue
                item = FakeItem(
                    link,
                    entry.title,
                    entry.get('summary', entry.get('description', '')),
                    updated_datetime,
                    thumbnail_url) # just use whatever thumbnail the channel
                                   # has?
                items.append(item)
        else:
            items = channel.items.all()[:4]

        cache.cache.set(feed_key, channel)
        cache.cache.set(items_key, items)

    return channel, items