def get_feed_data(self): data = {} parsed = self.cleaned_data['url'] data['url'] = parsed.url def try_to_get(feed_key): try: return to_utf8(parsed['feed'][feed_key]) except (KeyError, TypeError): return None data['name'] = self.cleaned_data['name'] data['website_url'] = try_to_get('link') match = YOUTUBE_USER_URL_RE.match(data['website_url']) if match: data['website_url'] = 'http://www.youtube.com/user/%s' % ( match.groups()[0]) data['publisher'] = try_to_get('publisher') data['description'] = try_to_get('description') try: data['thumbnail_url'] = to_utf8(parsed['feed'].image.href) except AttributeError: data['thumbnail_url'] = None # Special hack for YouTube titles. # It's really a PITA to have to strip out 'YouTube :: ' from # the title all the time, and it certainly doesn't look good # to have channel names like that in Miro. So we reverse that # and put it at the end. youtube_re = YOUTUBE_TITLE_RE.match(data['name']) if youtube_re: data['name'] = u'%s :: YouTube' % ( youtube_re.groupdict()['realtitle']) return data
def from_feedparser_entry(entry): # XXX Added some hacks to get a decent item out of YouTube after they # stopped having enclosures (2008-1-21). enclosure = feedutil.get_first_video_enclosure(entry) if enclosure is None: if 'link' not in entry: raise exceptions.FeedparserEntryError( "No video enclosure and ngo link") if entry['link'].find('youtube.com') == -1: if not filetypes.isAllowedFilename(entry['link']): raise exceptions.EntryMissingDataError('Link is invalid') rv = Item() try: rv.name = feedutil.to_utf8(entry['title']) if enclosure is not None: rv.url = feedutil.to_utf8(enclosure['href']) # split off the front if there's additional data in the # MIME type if 'type' in enclosure: rv.mime_type = feedutil.to_utf8(enclosure['type'] ).split(';', 1)[0] else: rv.mime_type = 'video/unknown' elif entry['link'].find('youtube.com') != -1: rv.url = entry['link'] rv.mime_type = 'video/x-flv' else: rv.url = entry['link'] rv.mime_type = filetypes.guessMimeType(rv.url) if enclosure is not None and 'text' in enclosure: rv.description = feedutil.to_utf8(enclosure['text']) elif 'description' in entry: rv.description = feedutil.to_utf8(entry['description']) elif 'media_description' in entry: rv.description = feedutil.to_utf8(entry['media_description']) elif entry.get('link', '').find('youtube.com') != -1: match = re.search(r'<div><span>(.*?)</span></div>', rv.description, re.S) if match: rv.description = feedutil.to_utf8( saxutils.unescape(match.group(1))) rv.description # this will raise an AttributeError if it wasn't set except (AttributeError, KeyError), e: raise exceptions.EntryMissingDataError(e.args[0])
def from_feedparser_entry(entry): # XXX Added some hacks to get a decent item out of YouTube after they # stopped having enclosures (2008-1-21). enclosure = feedutil.get_first_video_enclosure(entry) if enclosure is None: if 'link' not in entry: raise exceptions.FeedparserEntryError( "No video enclosure and ngo link") if entry['link'].find('youtube.com') == -1: if not filetypes.isAllowedFilename(entry['link']): raise exceptions.EntryMissingDataError('Link is invalid') rv = Item() try: rv.name = feedutil.to_utf8(entry['title']) if enclosure is not None: rv.url = feedutil.to_utf8(enclosure['href']) # split off the front if there's additional data in the # MIME type if 'type' in enclosure: rv.mime_type = feedutil.to_utf8(enclosure['type']).split( ';', 1)[0] else: rv.mime_type = 'video/unknown' elif entry['link'].find('youtube.com') != -1: rv.url = entry['link'] rv.mime_type = 'video/x-flv' else: rv.url = entry['link'] rv.mime_type = filetypes.guessMimeType(rv.url) if enclosure is not None and 'text' in enclosure: rv.description = feedutil.to_utf8(enclosure['text']) elif 'description' in entry: rv.description = feedutil.to_utf8(entry['description']) elif 'media_description' in entry: rv.description = feedutil.to_utf8(entry['media_description']) elif entry.get('link', '').find('youtube.com') != -1: match = re.search(r'<div><span>(.*?)</span></div>', rv.description, re.S) if match: rv.description = feedutil.to_utf8( saxutils.unescape(match.group(1))) rv.description # this will raise an AttributeError if it wasn't set except (AttributeError, KeyError), e: raise exceptions.EntryMissingDataError(e.args[0])
def try_to_get(feed_key): try: return to_utf8(parsed['feed'][feed_key]) except (KeyError, TypeError): return None
class Item(Thumbnailable): channel = models.ForeignKey(Channel, related_name='items') url = models.URLField(max_length=255) name = models.CharField(max_length=255) description = models.TextField() mime_type = models.CharField(max_length=50) thumbnail_url = models.CharField(max_length=255, blank=True, null=True) size = models.IntegerField() guid = models.CharField(max_length=255) date = models.DateTimeField() class Meta: db_table = 'cg_channel_item' ordering = ['-date', '-id'] THUMBNAIL_DIR = 'item-thumbnails' THUMBNAIL_SIZES = [ (97, 65), (200, 134), ] def get_url(self): return '/items/%i' % self.id def get_absolute_url(self): return util.make_absolute_url(self.get_url()) def get_guid(self): try: return self.guid except AttributeError: return None def get_missing_image_url(self, width, height): return self.channel.thumb_url(width, height) def thumb(self): url = self.thumb_url(97, 65) return util.mark_safe( '<img width="97" height="68" src="%s" alt="%s">' % (url, self.name.replace('"', "'"))) def download_url(self): data = { 'title1': self.name, 'description1': self.description, 'length1': str(self.size), 'type1': self.mime_type, 'thumbnail1': self.thumb_url(200, 133), 'url1': self.url } return settings.DOWNLOAD_URL + util.format_get_data(data) def linked_name(self): return '<a href="%s">%s</a>' % (self.download_url(), self.name) def update_search_data(self): raise NotImplementedError # not doing this right now if self.search_data is None: #self.search_data = search.ItemSearchData() self.search_data.item_id = self.id self.search_data.text = ' '.join([self.description, self.url]) self.search_data.important_text = self.name self.search_data.save() def download_thumbnail(self, redownload=False): if self.thumbnail_url is None: return if (not self.thumbnail_exists()) or redownload: util.ensure_dir_exists(settings.IMAGE_DOWNLOAD_CACHE_DIR) cache_path = os.path.join(settings.IMAGE_DOWNLOAD_CACHE_DIR, util.hash_string(self.thumbnail_url)) if os.path.exists(cache_path) and not redownload: image_file = file(cache_path, 'rb') else: url = self.thumbnail_url[:8] + self.thumbnail_url[8:].replace( '//', '/') image_file = try_to_download_thumb(url) if image_file is None: return util.copy_obj(cache_path, image_file) self.save_thumbnail(image_file) @staticmethod def from_feedparser_entry(entry): # XXX Added some hacks to get a decent item out of YouTube after they # stopped having enclosures (2008-1-21). enclosure = feedutil.get_first_video_enclosure(entry) if enclosure is None: if 'link' not in entry: raise exceptions.FeedparserEntryError( "No video enclosure and ngo link") if entry['link'].find('youtube.com') == -1: if not filetypes.isAllowedFilename(entry['link']): raise exceptions.EntryMissingDataError('Link is invalid') rv = Item() try: rv.name = feedutil.to_utf8(entry['title']) if enclosure is not None: rv.url = feedutil.to_utf8(enclosure['href']) # split off the front if there's additional data in the # MIME type if 'type' in enclosure: rv.mime_type = feedutil.to_utf8(enclosure['type']).split( ';', 1)[0] else: rv.mime_type = 'video/unknown' elif entry['link'].find('youtube.com') != -1: rv.url = entry['link'] rv.mime_type = 'video/x-flv' else: rv.url = entry['link'] rv.mime_type = filetypes.guessMimeType(rv.url) if enclosure is not None and 'text' in enclosure: rv.description = feedutil.to_utf8(enclosure['text']) elif 'description' in entry: rv.description = feedutil.to_utf8(entry['description']) elif 'media_description' in entry: rv.description = feedutil.to_utf8(entry['media_description']) elif entry.get('link', '').find('youtube.com') != -1: match = re.search(r'<div><span>(.*?)</span></div>', rv.description, re.S) if match: rv.description = feedutil.to_utf8( saxutils.unescape(match.group(1))) rv.description # this will raise an AttributeError if it wasn't set except (AttributeError, KeyError), e: raise exceptions.EntryMissingDataError(e.args[0]) if enclosure is not None: try: rv.size = int(feedutil.to_utf8(enclosure['length'])) except (KeyError, ValueError): rv.size = 0 try: rv.guid = feedutil.to_utf8(entry['id']) except KeyError: rv.guid = '' try: updated_parsed = entry['updated_parsed'] if updated_parsed is None: # I think this is a feedparser bug, if you can't parse the # updated time, why set the attribute? raise KeyError('updated_parsed') rv.date = feedutil.struct_time_to_datetime(updated_parsed) except KeyError: rv.date = None rv.thumbnail_url = feedutil.get_thumbnail_url(entry) return rv