def file_type(self): # Assume all YouTube/Vimeo links are video files if youtube.is_video_link(self.url) or vimeo.is_video_link( self.url) or escapist_videos.is_video_link(self.url): return 'video' return util.file_type_by_extension(self.extension())
def _convert_episode(self, episode): if not youtube.is_video_link(episode.url): logger.debug('Not a YouTube video. Ignoring.') return filename = episode.local_filename(create=False) dirname = os.path.dirname(filename) basename, ext = os.path.splitext(os.path.basename(filename)) if open(filename, 'rb').read(3) != 'FLV': logger.debug('Not a FLV file. Ignoring.') return if ext == '.mp4': # Move file out of place for conversion newname = os.path.join(dirname, basename+'.flv') os.rename(filename, newname) filename = newname target = os.path.join(dirname, basename+'.mp4') cmd = FFMPEG_CMD % { 'infile': filename, 'outfile': target } ffmpeg = subprocess.Popen(shlex.split(str(cmd)), stdout=subprocess.PIPE, stderr=subprocess.PIPE ) stdout, stderr = ffmpeg.communicate() if ffmpeg.returncode == 0: logger.info('FLV conversion successful.') if not self.test: os.remove(filename) episode.download_filename = basename+'.mp4' episode.save() else: logger.info('Error converting file. FFMPEG installed?') try: os.remove(target) except OSError: pass
def get_playback_url(self, fmt_id=None, allow_partial=False): """Local (or remote) playback/streaming filename/URL Returns either the local filename or a streaming URL that can be used to playback this episode. Also returns the filename of a partially downloaded file in case partial (preview) playback is desired. """ url = self.local_filename(create=False) if (allow_partial and url is not None and os.path.exists(url + '.partial')): return url + '.partial' if url is None or not os.path.exists(url): url = self.url if youtube.is_video_link(url): url = youtube.get_real_download_url(url, fmt_id) return url
def file_type(self): # Assume all YouTube/Vimeo links are video files if youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or escapist_videos.is_video_link(self.url): return "video" return util.file_type_by_extension(self.extension())
def local_filename(self, create, force_update=False, check_only=False, template=None, return_wanted_filename=False): """Get (and possibly generate) the local saving filename Pass create=True if you want this function to generate a new filename if none exists. You only want to do this when planning to create/download the file after calling this function. Normally, you should pass create=False. This will only create a filename when the file already exists from a previous version of gPodder (where we used md5 filenames). If the file does not exist (and the filename also does not exist), this function will return None. If you pass force_update=True to this function, it will try to find a new (better) filename and move the current file if this is the case. This is useful if (during the download) you get more information about the file, e.g. the mimetype and you want to include this information in the file name generation process. If check_only=True is passed to this function, it will never try to rename the file, even if would be a good idea. Use this if you only want to check if a file exists. If "template" is specified, it should be a filename that is to be used as a template for generating the "real" filename. The generated filename is stored in the database for future access. If return_wanted_filename is True, the filename will not be written to the database, but simply returned by this function (for use by the "import external downloads" feature). """ if self.download_filename is None and (check_only or not create): return None ext = self.extension(may_call_local_filename=False).encode("utf-8", "ignore") if not check_only and (force_update or not self.download_filename): # Avoid and catch gPodder bug 1440 and similar situations if template == "": logger.warn("Empty template. Report this podcast URL %s", self.channel.url) template = None # Try to find a new filename for the current file if template is not None: # If template is specified, trust the template's extension episode_filename, ext = os.path.splitext(template) else: episode_filename, _ = util.filename_from_url(self.url) fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH) if "redirect" in fn_template and template is None: # This looks like a redirection URL - force URL resolving! logger.warn("Looks like a redirection to me: %s", self.url) url = util.get_real_url(self.channel.authenticate_url(self.url)) logger.info("Redirection resolved to: %s", url) episode_filename, _ = util.filename_from_url(url) fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH) # Use title for YouTube, Vimeo and Soundcloud downloads if ( youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or escapist_videos.is_video_link(self.url) or fn_template == "stream" ): sanitized = util.sanitize_filename(self.title, self.MAX_FILENAME_LENGTH) if sanitized: fn_template = sanitized # If the basename is empty, use the md5 hexdigest of the URL if not fn_template or fn_template.startswith("redirect."): logger.error("Report this feed: Podcast %s, episode %s", self.channel.url, self.url) fn_template = hashlib.md5(self.url).hexdigest() # Find a unique filename for this episode wanted_filename = self.find_unique_file_name(fn_template, ext) if return_wanted_filename: # return the calculated filename without updating the database return wanted_filename # The old file exists, but we have decided to want a different filename if self.download_filename and wanted_filename != self.download_filename: # there might be an old download folder crawling around - move it! new_file_name = os.path.join(self.channel.save_dir, wanted_filename) old_file_name = os.path.join(self.channel.save_dir, self.download_filename) if os.path.exists(old_file_name) and not os.path.exists(new_file_name): logger.info("Renaming %s => %s", old_file_name, new_file_name) os.rename(old_file_name, new_file_name) elif force_update and not os.path.exists(old_file_name): # When we call force_update, the file might not yet exist when we # call it from the downloading code before saving the file logger.info("Choosing new filename: %s", new_file_name) else: logger.warn("%s exists or %s does not", new_file_name, old_file_name) logger.info('Updating filename of %s to "%s".', self.url, wanted_filename) elif self.download_filename is None: logger.info("Setting download filename: %s", wanted_filename) self.download_filename = wanted_filename self.save() return os.path.join( util.sanitize_encoding(self.channel.save_dir), util.sanitize_encoding(self.download_filename) )
def has_website_link(self): return bool(self.link) and (self.link != self.url or youtube.is_video_link(self.link))
def from_feedparser_entry(cls, entry, channel): episode = cls(channel) episode.guid = entry.get('id', '') # Replace multi-space and newlines with single space (Maemo bug 11173) episode.title = re.sub('\s+', ' ', entry.get('title', '')) episode.link = entry.get('link', '') if 'content' in entry and len(entry['content']) and \ entry['content'][0].get('type', '') == 'text/html': episode.description = entry['content'][0].value else: episode.description = entry.get('summary', '') # Fallback to subtitle if summary is not available if not episode.description: episode.description = entry.get('subtitle', '') try: total_time = 0 # Parse iTunes-specific podcast duration metadata itunes_duration = entry.get('itunes_duration', '') if itunes_duration: total_time = util.parse_time(itunes_duration) # Parse time from YouTube descriptions if it's a YouTube feed if youtube.is_youtube_guid(episode.guid): result = re.search(r'Time:<[^>]*>\n<[^>]*>([:0-9]*)<', episode.description) if result: youtube_duration = result.group(1) total_time = util.parse_time(youtube_duration) episode.total_time = total_time except: pass episode.published = feedcore.get_pubdate(entry) enclosures = entry.get('enclosures', []) media_rss_content = entry.get('media_content', []) audio_available = any(e.get('type', '').startswith('audio/') \ for e in enclosures + media_rss_content) video_available = any(e.get('type', '').startswith('video/') \ for e in enclosures + media_rss_content) # XXX: Make it possible for hooks/extensions to override this by # giving them a list of enclosures and the "self" object (podcast) # and letting them sort and/or filter the list of enclosures to # get the desired enclosure picked by the algorithm below. filter_and_sort_enclosures = lambda x: x # read the flattr auto-url, if exists payment_info = [link['href'] for link in entry.get('links', []) if link['rel'] == 'payment'] if payment_info: episode.payment_url = sorted(payment_info, key=get_payment_priority)[0] # Enclosures for e in filter_and_sort_enclosures(enclosures): episode.mime_type = e.get('type', 'application/octet-stream') if episode.mime_type == '': # See Maemo bug 10036 logger.warn('Fixing empty mimetype in ugly feed') episode.mime_type = 'application/octet-stream' if '/' not in episode.mime_type: continue # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mime_type.startswith('image/') and \ (audio_available or video_available): continue # If we have audio or video available later on, skip # 'application/octet-stream' data types (fixes Linux Outlaws) if episode.mime_type == 'application/octet-stream' and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.file_size = int(e.length) or -1 except: episode.file_size = -1 return episode # Media RSS content for m in filter_and_sort_enclosures(media_rss_content): episode.mime_type = m.get('type', 'application/octet-stream') if '/' not in episode.mime_type: continue # Skip images in Media RSS if we have audio/video (bug 1444) if episode.mime_type.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.file_size = int(m.get('filesize', 0)) or -1 except: episode.file_size = -1 try: episode.total_time = int(m.get('duration', 0)) or 0 except: episode.total_time = 0 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if (youtube.is_video_link(episode.url) or \ vimeo.is_video_link(episode.url)): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode return None
def local_filename(self, create, force_update=False, check_only=False, template=None, return_wanted_filename=False): """Get (and possibly generate) the local saving filename Pass create=True if you want this function to generate a new filename if none exists. You only want to do this when planning to create/download the file after calling this function. Normally, you should pass create=False. This will only create a filename when the file already exists from a previous version of gPodder (where we used md5 filenames). If the file does not exist (and the filename also does not exist), this function will return None. If you pass force_update=True to this function, it will try to find a new (better) filename and move the current file if this is the case. This is useful if (during the download) you get more information about the file, e.g. the mimetype and you want to include this information in the file name generation process. If check_only=True is passed to this function, it will never try to rename the file, even if would be a good idea. Use this if you only want to check if a file exists. If "template" is specified, it should be a filename that is to be used as a template for generating the "real" filename. The generated filename is stored in the database for future access. If return_wanted_filename is True, the filename will not be written to the database, but simply returned by this function (for use by the "import external downloads" feature). """ if self.download_filename is None and (check_only or not create): return None ext = self.extension(may_call_local_filename=False).encode( 'utf-8', 'ignore') if not check_only and (force_update or not self.download_filename): # Avoid and catch gPodder bug 1440 and similar situations if template == '': logger.warn('Empty template. Report this podcast URL %s', self.channel.url) template = None # Try to find a new filename for the current file if template is not None: # If template is specified, trust the template's extension episode_filename, ext = os.path.splitext(template) else: episode_filename, _ = util.filename_from_url(self.url) fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH) if 'redirect' in fn_template and template is None: # This looks like a redirection URL - force URL resolving! logger.warn('Looks like a redirection to me: %s', self.url) url = util.get_real_url(self.channel.authenticate_url( self.url)) logger.info('Redirection resolved to: %s', url) episode_filename, _ = util.filename_from_url(url) fn_template = util.sanitize_filename(episode_filename, self.MAX_FILENAME_LENGTH) # Use title for YouTube, Vimeo and Soundcloud downloads if (youtube.is_video_link(self.url) or vimeo.is_video_link(self.url) or escapist_videos.is_video_link(self.url) or fn_template == 'stream'): sanitized = util.sanitize_filename(self.title, self.MAX_FILENAME_LENGTH) if sanitized: fn_template = sanitized # If the basename is empty, use the md5 hexdigest of the URL if not fn_template or fn_template.startswith('redirect.'): logger.error('Report this feed: Podcast %s, episode %s', self.channel.url, self.url) fn_template = hashlib.md5(self.url).hexdigest() # Find a unique filename for this episode wanted_filename = self.find_unique_file_name(fn_template, ext) if return_wanted_filename: # return the calculated filename without updating the database return wanted_filename # The old file exists, but we have decided to want a different filename if self.download_filename and wanted_filename != self.download_filename: # there might be an old download folder crawling around - move it! new_file_name = os.path.join(self.channel.save_dir, wanted_filename) old_file_name = os.path.join(self.channel.save_dir, self.download_filename) if os.path.exists( old_file_name) and not os.path.exists(new_file_name): logger.info('Renaming %s => %s', old_file_name, new_file_name) os.rename(old_file_name, new_file_name) elif force_update and not os.path.exists(old_file_name): # When we call force_update, the file might not yet exist when we # call it from the downloading code before saving the file logger.info('Choosing new filename: %s', new_file_name) else: logger.warn('%s exists or %s does not', new_file_name, old_file_name) logger.info('Updating filename of %s to "%s".', self.url, wanted_filename) elif self.download_filename is None: logger.info('Setting download filename: %s', wanted_filename) self.download_filename = wanted_filename self.save() return os.path.join(util.sanitize_encoding(self.channel.save_dir), util.sanitize_encoding(self.download_filename))
def has_website_link(self): return bool(self.link) and (self.link != self.url or \ youtube.is_video_link(self.link))
def from_feedparser_entry(entry, channel): episode = PodcastEpisode(channel) episode.title = entry.get('title', '') episode.link = entry.get('link', '') episode.description = entry.get('summary', '') # Fallback to subtitle if summary is not available0 if not episode.description: episode.description = entry.get('subtitle', '') episode.guid = entry.get('id', '') if entry.get('updated_parsed', None): episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,)) # Enclosures for e in entry.get('enclosures', ()): episode.mimetype = e.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.length = int(e.length) or -1 except: episode.length = -1 return episode # Media RSS content for m in entry.get('media_content', ()): episode.mimetype = m.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.length = int(m.fileSize) or -1 except: episode.length = -1 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if youtube.is_video_link(episode.url): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode # Scan MP3 links in description text mp3s = re.compile(r'http://[^"]*\.mp3') for content in entry.get('content', ()): html = content.value for match in mp3s.finditer(html): episode.url = match.group(0) return episode return None
def file_type(self): # Assume all YouTube links are video files if youtube.is_video_link(self.url): return 'video' return util.file_type_by_extension(self.extension())
def from_feedparser_entry(entry, channel, mimetype_prefs=''): episode = PodcastEpisode(channel) #log("from_feedparser_entry(%s)" % entry.get('title','')) # Replace multi-space and newlines with single space (Maemo bug 11173) episode.title = re.sub('\s+', ' ', entry.get('title', '')) episode.link = entry.get('link', '') #print("summary=%s" % entry.summary) if 'content' in entry and len(entry['content']) and \ entry['content'][0].get('type', '') == 'text/html': episode.description = entry['content'][0].value else: episode.description = entry.get('summary', '') try: # Parse iTunes-specific podcast duration metadata total_time = util.parse_time(entry.get('itunes_duration', '')) episode.total_time = total_time except: pass # Fallback to subtitle if summary is not available0 if not episode.description: episode.description = entry.get('subtitle', '') #print("episode %s description=%s" % (episode.title,episode.description)) episode.guid = entry.get('id', '') if entry.get('updated_parsed', None): episode.pubDate = rfc822.mktime_tz(entry.updated_parsed+(0,)) enclosures = entry.get('enclosures', []) media_rss_content = entry.get('media_content', []) audio_available = any(e.get('type', '').startswith('audio/') \ for e in enclosures + media_rss_content) video_available = any(e.get('type', '').startswith('video/') \ for e in enclosures + media_rss_content) # Create the list of preferred mime types mimetype_prefs = mimetype_prefs.split(',') def calculate_preference_value(enclosure): """Calculate preference value of an enclosure This is based on mime types and allows users to prefer certain mime types over others (e.g. MP3 over AAC, ...) """ mimetype = enclosure.get('type', None) try: # If the mime type is found, return its (zero-based) index return mimetype_prefs.index(mimetype) except ValueError: # If it is not found, assume it comes after all listed items return len(mimetype_prefs) # Enclosures for e in sorted(enclosures, key=calculate_preference_value): episode.mimetype = e.get('type', 'application/octet-stream') if episode.mimetype == '': # See Maemo bug 10036 log('Fixing empty mimetype in ugly feed', sender=episode) episode.mimetype = 'application/octet-stream' if '/' not in episode.mimetype: continue # Skip images in feeds if audio or video is available (bug 979) # This must (and does) also look in Media RSS enclosures (bug 1430) if episode.mimetype.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(e.get('href', '')) if not episode.url: continue try: episode.length = int(e.length) or -1 except: episode.length = -1 return episode # Media RSS content for m in sorted(media_rss_content, key=calculate_preference_value): episode.mimetype = m.get('type', 'application/octet-stream') if '/' not in episode.mimetype: continue # Skip images in Media RSS if we have audio/video (bug 1444) if episode.mimetype.startswith('image/') and \ (audio_available or video_available): continue episode.url = util.normalize_feed_url(m.get('url', '')) if not episode.url: continue try: episode.length = int(m.fileSize) or -1 except: episode.length = -1 return episode # Brute-force detection of any links for l in entry.get('links', ()): episode.url = util.normalize_feed_url(l.get('href', '')) if not episode.url: continue if youtube.is_video_link(episode.url): return episode # Check if we can resolve this link to a audio/video file filename, extension = util.filename_from_url(episode.url) file_type = util.file_type_by_extension(extension) if file_type is None and hasattr(l, 'type'): extension = util.extension_from_mimetype(l.type) file_type = util.file_type_by_extension(extension) # The link points to a audio or video file - use it! if file_type is not None: return episode # Scan MP3 links in description text mp3s = re.compile(r'http://[^"]*\.mp3') for content in entry.get('content', ()): html = content.value for match in mp3s.finditer(html): episode.url = match.group(0) return episode #don't return None : for non-podcast channels episode.state = gpodder.STATE_NORMAL episode.url = '' return episode
def on_episode_downloaded(self, episode): if youtube.is_video_link(episode.url): self._convert_episode(episode)