def from_feedparser_entry( entry, channel): episode=podcastItem( channel) episode.title=entry.get( 'title', util.get_first_line( util.remove_html_tags( entry.get( 'summary', '')))) episode.link=entry.get( 'link', '') episode.description=util.remove_html_tags( entry.get( 'summary', entry.get( 'link', entry.get( 'title', '')))) episode.guid=entry.get( 'id', '') if entry.get( 'updated_parsed', None): episode.pubDate=util.updated_parsed_to_rfc2822( entry.updated_parsed) if episode.title == '': log( 'Warning: Episode has no title, adding anyways.. (Feed Is Buggy!)', sender=episode) enclosure=None if hasattr(entry, 'enclosures') and len(entry.enclosures) > 0: enclosure=entry.enclosures[0] if len(entry.enclosures) > 1: for e in entry.enclosures: if hasattr( e, 'href') and hasattr( e, 'length') and hasattr( e, 'type') and (e.type.startswith('audio/') or e.type.startswith('video/')): if util.normalize_feed_url(e.href) is not None: log( 'Selected enclosure: %s', e.href, sender=episode) enclosure=e break episode.url=util.normalize_feed_url( enclosure.get( 'href', '')) elif hasattr(entry, 'link'): extension=util.file_extension_from_url(entry.link) file_type=util.file_type_by_extension(extension) if file_type is not None: log('Adding episode with link to file type "%s".', file_type, sender=episode) episode.url=entry.link if not episode.url: raise ValueError( 'Episode has an invalid URL') if not episode.pubDate: metainfo=episode.get_metainfo() if 'pubdate' in metainfo: episode.pubDate=metainfo['pubdate'] if hasattr( enclosure, 'length'): try: episode.length=int(enclosure.length) except: episode.length=-1 # For episodes with a small length amount, try to find it via HTTP HEAD if episode.length <= 100: metainfo=episode.get_metainfo() if 'length' in metainfo: episode.length=metainfo['length'] if hasattr( enclosure, 'type'): episode.mimetype=enclosure.type if episode.title == '': ( filename, extension )=os.path.splitext( os.path.basename( episode.url)) episode.title=filename return episode
def from_podcastparser_entry(cls, entry, channel): episode = cls(channel) episode.guid = entry['guid'] episode.title = entry['title'] episode.link = entry['link'] episode.description = entry['description'] if entry.get('description_html'): episode.description_html = entry['description_html'] # TODO: This really should be handled in podcastparser and not here. elif util.is_html(entry['description']): episode.description_html = entry['description'] episode.description = util.remove_html_tags(entry['description']) episode.total_time = entry['total_time'] episode.published = entry['published'] episode.payment_url = entry['payment_url'] audio_available = any(enclosure['mime_type'].startswith('audio/') for enclosure in entry['enclosures']) video_available = any(enclosure['mime_type'].startswith('video/') for enclosure in entry['enclosures']) for enclosure in entry['enclosures']: episode.mime_type = enclosure['mime_type'] # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mime_type.startswith('image/') and (audio_available or video_available): continue # If we have audio or video available later on, skip # 'application/octet-stream' data types (fixes Linux Outlaws) if episode.mime_type == 'application/octet-stream' and (audio_available or video_available): continue episode.url = util.normalize_feed_url(enclosure['url']) if not episode.url: continue episode.file_size = enclosure['file_size'] return episode # Brute-force detection of the episode link episode.url = util.normalize_feed_url(entry['link']) if not episode.url: return None if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode return None
def from_podcastparser_entry(cls, entry, channel): episode = cls(channel) episode.guid = entry["guid"] episode.title = entry["title"] episode.link = entry["link"] episode.description = entry["description"] episode.total_time = entry["total_time"] episode.published = entry["published"] episode.payment_url = entry["payment_url"] audio_available = any(enclosure["mime_type"].startswith("audio/") for enclosure in entry["enclosures"]) video_available = any(enclosure["mime_type"].startswith("video/") for enclosure in entry["enclosures"]) for enclosure in entry["enclosures"]: episode.mime_type = enclosure["mime_type"] # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mime_type.startswith("image/") and (audio_available or video_available): continue # If we have audio or video available later on, skip # 'application/octet-stream' data types (fixes Linux Outlaws) if episode.mime_type == "application/octet-stream" and (audio_available or video_available): continue episode.url = util.normalize_feed_url(enclosure["url"]) if not episode.url: continue episode.file_size = enclosure["file_size"] return episode # Brute-force detection of the episode link episode.url = util.normalize_feed_url(entry["link"]) if not episode.url: return None if any(mod.is_video_link(episode.url) for mod in (youtube, vimeo, escapist_videos)): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode return None
def normalize_feed_url(self, url): for prefix, expansion in self.get_prefixes().items(): if url.startswith(prefix + ':'): old_url = url url = expansion % (url[len(prefix) + 1:], ) logger.info('Expanding prefix {} -> {}'.format(old_url, url)) break return util.normalize_feed_url(url)
def normalize_feed_url(self, url): for prefix, expansion in self.get_prefixes().items(): if url.startswith(prefix + ':'): old_url = url url = expansion % (url[len(prefix) + 1:],) logger.info('Expanding prefix {} -> {}'.format(old_url, url)) break return util.normalize_feed_url(url)
def rewrite_url(self, new_url): new_url = util.normalize_feed_url(new_url) if new_url is None: return None self.url = new_url self.http_etag = None self.http_last_modified = None self.save() return new_url
def get_podcast(self, url): """Get a specific podcast by URL Returns a podcast object for the URL or None if the podcast has not been subscribed to. """ url = util.normalize_feed_url(url) channel = PodcastChannel.load(self._db, url, create=False, download_dir=self._config.download_dir) if channel is None: return None else: return Podcast(channel, self)
def receive_clipboard_text(clipboard, text, second_try): # Heuristic: If there is a space in the clipboard # text, assume it's some arbitrary text, and no URL if text is not None and ' ' not in text: url = util.normalize_feed_url(text) if url is not None: self.entry_url.set_text(url) self.entry_url.set_position(-1) return if not second_try: clipboard = gtk.Clipboard() clipboard.request_text(receive_clipboard_text, True)
def receive_clipboard_text(clipboard, text, second_try): # Heuristic: If space is present in clipboard text # normalize_feed_url will either fix to valid url or # return None if URL cannot be validated if text is not None: url = util.normalize_feed_url(text) if url is not None: self.entry_url.set_text(url) self.entry_url.set_position(-1) return if not second_try: clipboard = Gtk.Clipboard.get(Gdk.SELECTION_PRIMARY) clipboard.request_text(receive_clipboard_text, True)
def get_podcast(self, url): """Get a specific podcast by URL Returns a podcast object for the URL or None if the podcast has not been subscribed to. """ url = util.normalize_feed_url(url) if url is None: return None channel = self._model.load_podcast(url, create=False) if channel is None: return None else: return Podcast(channel, self)
def del_channel( url): url=util.normalize_feed_url( url) channels=load_channels( load_items=False) keep_channels=[] for channel in channels: if channel.url == url: msg( 'delete', urllib.unquote( channel.url)) else: keep_channels.append( channel) if len(keep_channels) < len(channels): save_channels( keep_channels) else: msg('error', _('Could not remove podcast.'))
def create_podcast(self, url, title=None): """Subscribe to a new podcast Add a subscription for "url", optionally renaming the podcast to "title" and return the resulting object. """ url = util.normalize_feed_url(url) podcast = self._model.load_podcast(url, create=True, \ max_episodes=self._config.max_episodes_per_feed, \ mimetype_prefs=self._config.mimetype_prefs) if podcast is not None: if title is not None: podcast.rename(title) podcast.save() return Podcast(podcast, self) return None
def on_btn_add_clicked(self, widget): url = self.entry_url.get_text().strip() self.on_btn_close_clicked(widget) if self.add_podcast_list is not None: title = None # FIXME: Add title GUI element self.add_podcast_list([(title, url)]) # Heuristic: If there is a space in the clipboard # text, assume it's some arbitrary text, and no URL if text is not None and ' ' not in text: url = util.normalize_feed_url(text) if url is not None: self.entry_url.set_text(url) self.entry_url.set_position(-1) return if not second_try: clipboard = Gtk.Clipboard.get(Gdk.SELECTION_PRIMARY) clipboard.request_text(receive_clipboard_text, True) clipboard.request_text(receive_clipboard_text, False)
def create_podcast(self, url, title=None): """Subscribe to a new podcast Add a subscription for "url", optionally renaming the podcast to "title" and return the resulting object. """ url = util.normalize_feed_url(url) podcast = PodcastChannel.load(self._db, url, create=True, \ max_episodes=self._config.max_episodes_per_feed, \ download_dir=self._config.download_dir, \ allow_empty_feeds=self._config.allow_empty_feeds) if podcast is not None: if title is not None: podcast.set_custom_title(title) podcast.save() return Podcast(podcast, self) return None
def rewrite_url(self, url): """Set a new URL for this podcast Sets a new feed URL for this podcast. Use with care. See also: gPodder bug 1020 """ url = util.normalize_feed_url(url) if url is None: return None self._podcast.url = url # Remove etag + last_modified to force a refresh next time self._podcast.http_etag = None self._podcast.http_last_modified = None self._podcast.save() return url
def create_podcast(self, url, title=None): """Subscribe to a new podcast Add a subscription for "url", optionally renaming the podcast to "title" and return the resulting object. """ url = util.normalize_feed_url(url) if url is None: return None podcast = self._model.load_podcast(url, create=True, \ max_episodes=self._config.max_episodes_per_feed) if podcast is not None: if title is not None: podcast.rename(title) podcast.save() return Podcast(podcast, self) return None
def create_podcast(self, url, title=None): """Subscribe to a new podcast Add a subscription for "url", optionally renaming the podcast to "title" and return the resulting object. """ url = util.normalize_feed_url(url) podcast = PodcastChannel.load(self._db, url, create=True, \ max_episodes=self._config.max_episodes_per_feed, \ download_dir=self._config.download_dir, \ allow_empty_feeds=self._config.allow_empty_feeds, \ mimetype_prefs=self._config.mimetype_prefs) if podcast is not None: if title is not None: podcast.set_custom_title(title) podcast.save() return Podcast(podcast, self) return None
def add_channel( url): callback_error=lambda s: msg( 'error', s) url=util.normalize_feed_url( url) try: channel=podcastChannel.get_by_url( url, force_update=True) podcastChannel.sync_cache() except: msg( 'error', _('Could not load feed from URL: %s'), urllib.unquote( url)) return if channel: channels=load_channels( load_items=False) if channel.url in ( c.url for c in channels ): msg( 'error', _('Already added: %s'), urllib.unquote( url)) return channels.append( channel) save_channels( channels) msg( 'add', urllib.unquote( url)) else: msg('error', _('Could not add podcast.'))
def from_feedparser_entry(cls, entry, channel): episode = cls(channel) episode.guid = entry.get('id', '') # Replace multi-space and newlines with single space (Maemo bug 11173) episode.title = re.sub('\s+', ' ', entry.get('title', '')) episode.link = entry.get('link', '') if 'content' in entry and len(entry['content']) and \ entry['content'][0].get('type', '') == 'text/html': episode.description = entry['content'][0].value else: episode.description = entry.get('summary', '') # Fallback to subtitle if summary is not available if not episode.description: episode.description = entry.get('subtitle', '') try: total_time = 0 # Parse iTunes-specific podcast duration metadata itunes_duration = entry.get('itunes_duration', '') if itunes_duration: total_time = util.parse_time(itunes_duration) # Parse time from YouTube descriptions if it's a YouTube feed if youtube.is_youtube_guid(episode.guid): result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<', episode.description) if result: youtube_duration = result.group(1) total_time = util.parse_time(youtube_duration) episode.total_time = total_time except: pass episode.published = feedcore.get_pubdate(entry) enclosures = entry.get('enclosures', []) media_rss_content = entry.get('media_content', []) audio_available = any(e.get('type', '').startswith('audio/') \ for e in enclosures + media_rss_content) video_available = any(e.get('type', '').startswith('video/') \ for e in enclosures + media_rss_content) # XXX: Make it possible for hooks/extensions to override this by # giving them a list of enclosures and the "self" object (podcast) # and letting them sort and/or filter the list of enclosures to # get the desired enclosure picked by the algorithm below. filter_and_sort_enclosures = lambda x: x # read the flattr auto-url, if exists payment_info = [link['href'] for link in entry.get('links', []) if link['rel'] == 'payment'] if payment_info: episode.payment_url = sorted(payment_info, key=get_payment_priority)[0] # Enclosures for e in filter_and_sort_enclosures(enclosures): episode.mime_type = e.get('type', 'application/octet-stream') if episode.mime_type == '': # See Maemo bug 10036 logger.warn('Fixing empty mimetype in ugly feed') episode.mime_type = 'application/octet-stream' if '/' not in episode.mime_type: continue # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mime_type.startswith('image/') and \ (audio_available or video_available): continue # If we have audio or video available later on, skip # 'application/octet-stream' data types (fixes Linux Outlaws) if episode.mime_type == 'application/octet-stream' and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.file_size = int(e.length) or -1 except: episode.file_size = -1 return episode # Media RSS content for m in filter_and_sort_enclosures(media_rss_content): episode.mime_type = m.get('type', 'application/octet-stream') if '/' not in episode.mime_type: continue # Skip images in Media RSS if we have audio/video (bug 1444) if episode.mime_type.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.file_size = int(m.get('filesize', 0)) or -1 except: episode.file_size = -1 try: episode.total_time = int(m.get('duration', 0)) or 0 except: episode.total_time = 0 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if (youtube.is_video_link(episode.url) or \ vimeo.is_video_link(episode.url)): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode return None
def from_feedparser_entry(entry, channel): episode = PodcastEpisode(channel) episode.title = entry.get('title', '') episode.link = entry.get('link', '') episode.description = entry.get('summary', '') # Fallback to subtitle if summary is not available0 if not episode.description: episode.description = entry.get('subtitle', '') episode.guid = entry.get('id', '') if entry.get('updated_parsed', None): episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,)) # Enclosures for e in entry.get('enclosures', ()): episode.mimetype = e.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.length = int(e.length) or -1 except: episode.length = -1 return episode # Media RSS content for m in entry.get('media_content', ()): episode.mimetype = m.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.length = int(m.fileSize) or -1 except: episode.length = -1 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if youtube.is_video_link(episode.url): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode # Scan MP3 links in description text mp3s = re.compile(r'http://[^"]*\.mp3') for content in entry.get('content', ()): html = content.value for match in mp3s.finditer(html): episode.url = match.group(0) return episode return None
def from_feedparser_entry(entry, channel, mimetype_prefs=''): episode = PodcastEpisode(channel) #log("from_feedparser_entry(%s)" % entry.get('title','')) # Replace multi-space and newlines with single space (Maemo bug 11173) episode.title = re.sub('\s+', ' ', entry.get('title', '')) episode.link = entry.get('link', '') #print("summary=%s" % entry.summary) if 'content' in entry and len(entry['content']) and \ entry['content'][0].get('type', '') == 'text/html': episode.description = entry['content'][0].value else: episode.description = entry.get('summary', '') try: # Parse iTunes-specific podcast duration metadata total_time = util.parse_time(entry.get('itunes_duration', '')) episode.total_time = total_time except: pass # Fallback to subtitle if summary is not available0 if not episode.description: episode.description = entry.get('subtitle', '') #print("episode %s description=%s" % (episode.title,episode.description)) episode.guid = entry.get('id', '') if entry.get('updated_parsed', None): episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,)) enclosures = entry.get('enclosures', []) media_rss_content = entry.get('media_content', []) audio_available = any(e.get('type', '').startswith('audio/') \ for e in enclosures + media_rss_content) video_available = any(e.get('type', '').startswith('video/') \ for e in enclosures + media_rss_content) # Create the list of preferred mime types mimetype_prefs = mimetype_prefs.split(',') def calculate_preference_value(enclosure): """Calculate preference value of an enclosure This is based on mime types and allows users to prefer certain mime types over others (e.g. MP3 over AAC, ...) """ mimetype = enclosure.get('type', None) try: # If the mime type is found, return its (zero-based) index return mimetype_prefs.index(mimetype) except ValueError: # If it is not found, assume it comes after all listed items return len(mimetype_prefs) # Enclosures for e in sorted(enclosures, key=calculate_preference_value): episode.mimetype = e.get('type', 'application/octet-stream') if episode.mimetype == '': # See Maemo bug 10036 log('Fixing empty mimetype in ugly feed', sender=episode) episode.mimetype = 'application/octet-stream' if '/' not in episode.mimetype: continue # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mimetype.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.length = int(e.length) or -1 except: episode.length = -1 return episode # Media RSS content for m in sorted(media_rss_content, key=calculate_preference_value): episode.mimetype = m.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue # Skip images in Media RSS if we have audio/video (bug 1444) if episode.mimetype.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.length = int(m.fileSize) or -1 except: episode.length = -1 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if youtube.is_video_link(episode.url): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode # Scan MP3 links in description text mp3s = re.compile(r'http://[^"]*\.mp3') for content in entry.get('content', ()): html = content.value for match in mp3s.finditer(html): episode.url = match.group(0) return episode #don't return None : for non-podcast channels episode.state = gpodder.STATE_NORMAL episode.url = '' return episode