def from_feedparser_entry(cls, entry, channel): episode = cls(channel) episode.guid = entry.get('id', '') # Replace multi-space and newlines with single space (Maemo bug 11173) episode.title = re.sub('\s+', ' ', entry.get('title', '')) episode.link = entry.get('link', '') if 'content' in entry and len(entry['content']) and \ entry['content'][0].get('type', '') == 'text/html': episode.description = entry['content'][0].value else: episode.description = entry.get('summary', '') # Fallback to subtitle if summary is not available if not episode.description: episode.description = entry.get('subtitle', '') try: total_time = 0 # Parse iTunes-specific podcast duration metadata itunes_duration = entry.get('itunes_duration', '') if itunes_duration: total_time = util.parse_time(itunes_duration) # Parse time from YouTube descriptions if it's a YouTube feed if youtube.is_youtube_guid(episode.guid): result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<', episode.description) if result: youtube_duration = result.group(1) total_time = util.parse_time(youtube_duration) episode.total_time = total_time except: pass episode.published = feedcore.get_pubdate(entry) enclosures = entry.get('enclosures', []) media_rss_content = entry.get('media_content', []) audio_available = any(e.get('type', '').startswith('audio/') \ for e in enclosures + media_rss_content) video_available = any(e.get('type', '').startswith('video/') \ for e in enclosures + media_rss_content) # XXX: Make it possible for hooks/extensions to override this by # giving them a list of enclosures and the "self" object (podcast) # and letting them sort and/or filter the list of enclosures to # get the desired enclosure picked by the algorithm below. filter_and_sort_enclosures = lambda x: x # read the flattr auto-url, if exists payment_info = [link['href'] for link in entry.get('links', []) if link['rel'] == 'payment'] if payment_info: episode.payment_url = sorted(payment_info, key=get_payment_priority)[0] # Enclosures for e in filter_and_sort_enclosures(enclosures): episode.mime_type = e.get('type', 'application/octet-stream') if episode.mime_type == '': # See Maemo bug 10036 logger.warn('Fixing empty mimetype in ugly feed') episode.mime_type = 'application/octet-stream' if '/' not in episode.mime_type: continue # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mime_type.startswith('image/') and \ (audio_available or video_available): continue # If we have audio or video available later on, skip # 'application/octet-stream' data types (fixes Linux Outlaws) if episode.mime_type == 'application/octet-stream' and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.file_size = int(e.length) or -1 except: episode.file_size = -1 return episode # Media RSS content for m in filter_and_sort_enclosures(media_rss_content): episode.mime_type = m.get('type', 'application/octet-stream') if '/' not in episode.mime_type: continue # Skip images in Media RSS if we have audio/video (bug 1444) if episode.mime_type.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.file_size = int(m.get('filesize', 0)) or -1 except: episode.file_size = -1 try: episode.total_time = int(m.get('duration', 0)) or 0 except: episode.total_time = 0 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if (youtube.is_video_link(episode.url) or \ vimeo.is_video_link(episode.url)): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode return None
def from_feedparser_entry(entry, channel, mimetype_prefs=''): episode = PodcastEpisode(channel) #log("from_feedparser_entry(%s)" % entry.get('title','')) # Replace multi-space and newlines with single space (Maemo bug 11173) episode.title = re.sub('\s+', ' ', entry.get('title', '')) episode.link = entry.get('link', '') #print("summary=%s" % entry.summary) if 'content' in entry and len(entry['content']) and \ entry['content'][0].get('type', '') == 'text/html': episode.description = entry['content'][0].value else: episode.description = entry.get('summary', '') try: # Parse iTunes-specific podcast duration metadata total_time = util.parse_time(entry.get('itunes_duration', '')) episode.total_time = total_time except: pass # Fallback to subtitle if summary is not available0 if not episode.description: episode.description = entry.get('subtitle', '') #print("episode %s description=%s" % (episode.title,episode.description)) episode.guid = entry.get('id', '') if entry.get('updated_parsed', None): episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,)) enclosures = entry.get('enclosures', []) media_rss_content = entry.get('media_content', []) audio_available = any(e.get('type', '').startswith('audio/') \ for e in enclosures + media_rss_content) video_available = any(e.get('type', '').startswith('video/') \ for e in enclosures + media_rss_content) # Create the list of preferred mime types mimetype_prefs = mimetype_prefs.split(',') def calculate_preference_value(enclosure): """Calculate preference value of an enclosure This is based on mime types and allows users to prefer certain mime types over others (e.g. MP3 over AAC, ...) """ mimetype = enclosure.get('type', None) try: # If the mime type is found, return its (zero-based) index return mimetype_prefs.index(mimetype) except ValueError: # If it is not found, assume it comes after all listed items return len(mimetype_prefs) # Enclosures for e in sorted(enclosures, key=calculate_preference_value): episode.mimetype = e.get('type', 'application/octet-stream') if episode.mimetype == '': # See Maemo bug 10036 log('Fixing empty mimetype in ugly feed', sender=episode) episode.mimetype = 'application/octet-stream' if '/' not in episode.mimetype: continue # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mimetype.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.length = int(e.length) or -1 except: episode.length = -1 return episode # Media RSS content for m in sorted(media_rss_content, key=calculate_preference_value): episode.mimetype = m.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue # Skip images in Media RSS if we have audio/video (bug 1444) if episode.mimetype.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.length = int(m.fileSize) or -1 except: episode.length = -1 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if youtube.is_video_link(episode.url): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode # Scan MP3 links in description text mp3s = re.compile(r'http://[^"]*\.mp3') for content in entry.get('content', ()): html = content.value for match in mp3s.finditer(html): episode.url = match.group(0) return episode #don't return None : for non-podcast channels episode.state = gpodder.STATE_NORMAL episode.url = '' return episode