def get_metadata(url): """Get file download metadata Returns a (size, type, name) from the given download URL. Will use the network connection to determine the metadata via the HTTP header fields. """ track_response = util.urlopen(url) filesize = track_response.headers['content-length'] or '0' filetype = track_response.headers['content-type'] or 'application/octet-stream' headers_s = '\n'.join('%s:%s' % (k, v) for k, v in list(track_response.headers.items())) filename = util.get_header_param(track_response.headers, 'filename', 'content-disposition') \ or os.path.basename(os.path.dirname(url)) track_response.close() return filesize, filetype, filename
def run(self): # Speed calculation (re-)starts here self.__start_time = 0 self.__start_blocks = 0 # If the download has already been cancelled, skip it if self.status == DownloadTask.CANCELLED: util.delete_file(self.tempname) self.progress = 0.0 self.speed = 0.0 self.recycle() return False # We only start this download if its status is "downloading" if self.status != DownloadTask.DOWNLOADING: return False # We are downloading this file right now self.status = DownloadTask.DOWNLOADING self._notification_shown = False # Restore a reference to this task in the episode # when running a recycled task following a pause or failed # see #649 if not self.episode.download_task: self.episode.download_task = self url = self.__episode.url try: if url == '': raise DownloadNoURLException() if self.downloader: downloader = self.downloader.custom_downloader( self._config, self.episode) else: downloader = registry.custom_downloader.resolve( self._config, None, self.episode) if downloader: logger.info('Downloading %s with %s', url, downloader) else: downloader = DefaultDownloader.custom_downloader( self._config, self.episode) headers, real_url = downloader.retrieve_resume( self.tempname, self.status_updated) new_mimetype = headers.get('content-type', self.__episode.mime_type) old_mimetype = self.__episode.mime_type _basename, ext = os.path.splitext(self.filename) if new_mimetype != old_mimetype or util.wrong_extension(ext): logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype) old_extension = self.__episode.extension() self.__episode.mime_type = new_mimetype # don't call local_filename because we'll get the old download name new_extension = self.__episode.extension( may_call_local_filename=False) # If the desired filename extension changed due to the new # mimetype, we force an update of the local filename to fix the # extension. if old_extension != new_extension or util.wrong_extension(ext): self.filename = self.__episode.local_filename( create=True, force_update=True) # In some cases, the redirect of a URL causes the real filename to # be revealed in the final URL (e.g. http://gpodder.org/bug/1423) if real_url != url and not util.is_known_redirecter(real_url): realname, realext = util.filename_from_url(real_url) # Only update from redirect if the redirected-to filename has # a proper extension (this is needed for e.g. YouTube) if not util.wrong_extension(realext): real_filename = ''.join((realname, realext)) self.filename = self.__episode.local_filename( create=True, force_update=True, template=real_filename) logger.info( 'Download was redirected (%s). New filename: %s', real_url, os.path.basename(self.filename)) # Look at the Content-disposition header; use if if available disposition_filename = util.get_header_param( headers, 'filename', 'content-disposition') # Some servers do send the content-disposition header, but provide # an empty filename, resulting in an empty string here (bug 1440) if disposition_filename is not None and disposition_filename != '': # The server specifies a download filename - try to use it # filename_from_url to remove query string; see #591 fn, ext = util.filename_from_url(disposition_filename) logger.debug( "converting disposition filename '%s' to local filename '%s%s'", disposition_filename, fn, ext) disposition_filename = fn + ext self.filename = self.__episode.local_filename( create=True, force_update=True, template=disposition_filename) new_mimetype, encoding = mimetypes.guess_type(self.filename) if new_mimetype is not None: logger.info('Using content-disposition mimetype: %s', new_mimetype) self.__episode.mime_type = new_mimetype # Re-evaluate filename and tempname to take care of podcast renames # while downloads are running (which will change both file names) self.filename = self.__episode.local_filename(create=False) self.tempname = os.path.join(os.path.dirname(self.filename), os.path.basename(self.tempname)) shutil.move(self.tempname, self.filename) # Model- and database-related updates after a download has finished self.__episode.on_downloaded(self.filename) except DownloadCancelledException: logger.info('Download has been cancelled/paused: %s', self) if self.status == DownloadTask.CANCELLED: util.delete_file(self.tempname) self.progress = 0.0 self.speed = 0.0 except DownloadNoURLException: self.status = DownloadTask.FAILED self.error_message = _('Episode has no URL to download') except urllib.error.ContentTooShortError as ctse: self.status = DownloadTask.FAILED self.error_message = _('Missing content from server') except ConnectionError as ce: # special case request exception self.status = DownloadTask.FAILED logger.error('Download failed: %s', str(ce), exc_info=True) d = {'host': ce.args[0].pool.host, 'port': ce.args[0].pool.port} self.error_message = _( "Couldn't connect to server %(host)s:%(port)s" % d) except RequestException as re: # extract MaxRetryError to shorten the exception message if isinstance(re.args[0], MaxRetryError): re = re.args[0] logger.error('%s while downloading "%s"', str(re), self.__episode.title, exc_info=True) self.status = DownloadTask.FAILED d = {'error': str(re)} self.error_message = _('Request Error: %(error)s') % d except IOError as ioe: logger.error('%s while downloading "%s": %s', ioe.strerror, self.__episode.title, ioe.filename, exc_info=True) self.status = DownloadTask.FAILED d = {'error': ioe.strerror, 'filename': ioe.filename} self.error_message = _('I/O Error: %(error)s: %(filename)s') % d except gPodderDownloadHTTPError as gdhe: logger.error('HTTP %s while downloading "%s": %s', gdhe.error_code, self.__episode.title, gdhe.error_message, exc_info=True) self.status = DownloadTask.FAILED d = {'code': gdhe.error_code, 'message': gdhe.error_message} self.error_message = _('HTTP Error %(code)s: %(message)s') % d except Exception as e: self.status = DownloadTask.FAILED logger.error('Download failed: %s', str(e), exc_info=True) self.error_message = _('Error: %s') % (str(e), ) if self.status == DownloadTask.DOWNLOADING: # Everything went well - we're done self.status = DownloadTask.DONE if self.total_size <= 0: self.total_size = util.calculate_size(self.filename) logger.info('Total size updated to %d', self.total_size) self.progress = 1.0 gpodder.user_extensions.on_episode_downloaded(self.__episode) return True self.speed = 0.0 # We finished, but not successfully (at least not really) return False