def from_feedparser_entry(entry): # XXX Added some hacks to get a decent item out of YouTube after they # stopped having enclosures (2008-1-21). enclosure = feedutil.get_first_video_enclosure(entry) if enclosure is None: if 'link' not in entry: raise exceptions.FeedparserEntryError( "No video enclosure and ngo link") if entry['link'].find('youtube.com') == -1: if not filetypes.isAllowedFilename(entry['link']): raise exceptions.EntryMissingDataError('Link is invalid') rv = Item() try: rv.name = feedutil.to_utf8(entry['title']) if enclosure is not None: rv.url = feedutil.to_utf8(enclosure['href']) # split off the front if there's additional data in the # MIME type if 'type' in enclosure: rv.mime_type = feedutil.to_utf8(enclosure['type']).split( ';', 1)[0] else: rv.mime_type = 'video/unknown' elif entry['link'].find('youtube.com') != -1: rv.url = entry['link'] rv.mime_type = 'video/x-flv' else: rv.url = entry['link'] rv.mime_type = filetypes.guessMimeType(rv.url) if enclosure is not None and 'text' in enclosure: rv.description = feedutil.to_utf8(enclosure['text']) elif 'description' in entry: rv.description = feedutil.to_utf8(entry['description']) elif 'media_description' in entry: rv.description = feedutil.to_utf8(entry['media_description']) elif entry.get('link', '').find('youtube.com') != -1: match = re.search(r'<div><span>(.*?)</span></div>', rv.description, re.S) if match: rv.description = feedutil.to_utf8( saxutils.unescape(match.group(1))) rv.description # this will raise an AttributeError if it wasn't set except (AttributeError, KeyError), e: raise exceptions.EntryMissingDataError(e.args[0])
def from_feedparser_entry(entry): # XXX Added some hacks to get a decent item out of YouTube after they # stopped having enclosures (2008-1-21). enclosure = feedutil.get_first_video_enclosure(entry) if enclosure is None: if 'link' not in entry: raise exceptions.FeedparserEntryError( "No video enclosure and ngo link") if entry['link'].find('youtube.com') == -1: if not filetypes.isAllowedFilename(entry['link']): raise exceptions.EntryMissingDataError('Link is invalid') rv = Item() try: rv.name = feedutil.to_utf8(entry['title']) if enclosure is not None: rv.url = feedutil.to_utf8(enclosure['href']) # split off the front if there's additional data in the # MIME type if 'type' in enclosure: rv.mime_type = feedutil.to_utf8(enclosure['type'] ).split(';', 1)[0] else: rv.mime_type = 'video/unknown' elif entry['link'].find('youtube.com') != -1: rv.url = entry['link'] rv.mime_type = 'video/x-flv' else: rv.url = entry['link'] rv.mime_type = filetypes.guessMimeType(rv.url) if enclosure is not None and 'text' in enclosure: rv.description = feedutil.to_utf8(enclosure['text']) elif 'description' in entry: rv.description = feedutil.to_utf8(entry['description']) elif 'media_description' in entry: rv.description = feedutil.to_utf8(entry['media_description']) elif entry.get('link', '').find('youtube.com') != -1: match = re.search(r'<div><span>(.*?)</span></div>', rv.description, re.S) if match: rv.description = feedutil.to_utf8( saxutils.unescape(match.group(1))) rv.description # this will raise an AttributeError if it wasn't set except (AttributeError, KeyError), e: raise exceptions.EntryMissingDataError(e.args[0])
def get_channels_and_items(feed_url): feed_sha1 = sha.sha(feed_url).hexdigest() feed_key = 'share_feed-' + feed_sha1 items_key = 'share_feed_items-' + feed_sha1 # check the cache. If not... cached_channel = cache.cache.get(feed_key) cached_items = cache.cache.get(items_key) if cached_channel: channel = cached_channel items = cached_items else: # check to see if we have that feed in our database... try: channel = Channel.objects.get(url=feed_url) except Channel.DoesNotExist: ## parse the feed try: parsed = feedparser.parse(feed_url) except Exception: logging.exception('error parsing %s' % feed_url) raise FeedFetchingError('feedparser error while parsing') #ok, so this doesn't work... if hasattr(parsed, 'status') and parsed.status != 200: raise FeedFetchingError( "Got a non-200 status while parsing feed") if parsed.bozo: # this didn't work either raise FeedFetchingError('Feed is unparsable') ## generate fake channel thumbnail_url = (settings.STATIC_BASE_URL + 'images/generic_feed_thumb.png') channel = FakeChannel(parsed.feed.get('title', ''), parsed.feed.get('subtitle', ''), feed_url, parsed.feed.get('link', ''), thumbnail_url) items = [] # why not limit the number of items here to 4? # Because we might need to check to see if a particular # item is in this feed if it's being faked... for entry in parsed.entries: enclosure = feedutil.get_first_video_enclosure(entry) link = None if enclosure is not None and 'href' in enclosure: link = enclosure['href'] if not link: if not entry.get('link'): continue link = entry.link updated_datetime = None if entry.get('updated_parsed'): updated_datetime = datetime.datetime( *entry.updated_parsed[:7]) if 'title' not in entry and not ('summary' in entry or 'description' in entry): continue item = FakeItem( link, entry.title, entry.get('summary', entry.get('description', '')), updated_datetime, thumbnail_url) # just use whatever thumbnail the channel # has? items.append(item) else: items = channel.items.all()[:4] cache.cache.set(feed_key, channel) cache.cache.set(items_key, items) return channel, items
def get_channels_and_items(feed_url): feed_sha1 = sha.sha(feed_url).hexdigest() feed_key = 'share_feed-' + feed_sha1 items_key = 'share_feed_items-' + feed_sha1 # check the cache. If not... cached_channel = cache.cache.get(feed_key) cached_items = cache.cache.get(items_key) if cached_channel: channel = cached_channel items = cached_items else: # check to see if we have that feed in our database... try: channel = Channel.objects.get(url=feed_url) except Channel.DoesNotExist: ## parse the feed try: parsed = feedparser.parse(feed_url) except Exception: logging.exception('error parsing %s' % feed_url) raise FeedFetchingError('feedparser error while parsing') #ok, so this doesn't work... if hasattr(parsed, 'status') and parsed.status != 200: raise FeedFetchingError( "Got a non-200 status while parsing feed") if parsed.bozo: # this didn't work either raise FeedFetchingError('Feed is unparsable') ## generate fake channel thumbnail_url = ( settings.STATIC_BASE_URL + 'images/generic_feed_thumb.png') channel = FakeChannel( parsed.feed.get('title', ''), parsed.feed.get('subtitle', ''), feed_url, parsed.feed.get('link', ''), thumbnail_url) items = [] # why not limit the number of items here to 4? # Because we might need to check to see if a particular # item is in this feed if it's being faked... for entry in parsed.entries: enclosure = feedutil.get_first_video_enclosure(entry) link = None if enclosure is not None and 'href' in enclosure: link = enclosure['href'] if not link: if not entry.get('link'): continue link = entry.link updated_datetime = None if entry.get('updated_parsed'): updated_datetime = datetime.datetime( *entry.updated_parsed[:7]) if 'title' not in entry and not ( 'summary' in entry or 'description' in entry): continue item = FakeItem( link, entry.title, entry.get('summary', entry.get('description', '')), updated_datetime, thumbnail_url) # just use whatever thumbnail the channel # has? items.append(item) else: items = channel.items.all()[:4] cache.cache.set(feed_key, channel) cache.cache.set(items_key, items) return channel, items