Example #1
0
 def custom_downloader(config, episode):
     url = episode.url
     # Resolve URL and start downloading the episode
     res = registry.download_url.resolve(config, None, episode, False)
     if res:
         url = res
     if url == episode.url:
         # don't modify custom urls (#635 - vimeo breaks if * is unescaped)
         url = url.strip()
         url = util.iri_to_url(url)
     return DefaultDownload(config, episode, url)
Example #2
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            return False

        # We only start this download if its status is "downloading"
        if self.status != DownloadTask.DOWNLOADING:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        # Restore a reference to this task in the episode
        # when running a recycled task following a pause or failed
        # see #649
        if not self.episode.download_task:
            self.episode.download_task = self

        try:

            custom_downloader = registry.custom_downloader.resolve(
                self._config, None, self.episode)

            url = self.__episode.url
            if custom_downloader:
                logger.info('Downloading %s with %s', url, custom_downloader)
                headers, real_url = custom_downloader.retrieve_resume(
                    self.tempname, reporthook=self.status_updated)
            else:
                # Resolve URL and start downloading the episode
                res = registry.download_url.resolve(self._config, None,
                                                    self.episode)
                if res:
                    url = res
                if url == self.__episode.url:
                    # don't modify custom urls (#635 - vimeo breaks if * is unescaped)
                    url = url.strip()
                    url = util.iri_to_url(url)

                logger.info("Downloading %s", url)
                downloader = DownloadURLOpener(self.__episode.channel)

                # HTTP Status codes for which we retry the download
                retry_codes = (408, 418, 504, 598, 599)
                max_retries = max(0, self._config.auto.retries)

                # Retry the download on timeout (bug 1013)
                for retry in range(max_retries + 1):
                    if retry > 0:
                        logger.info('Retrying download of %s (%d)', url, retry)
                        time.sleep(1)

                    try:
                        headers, real_url = downloader.retrieve_resume(
                            url, self.tempname, reporthook=self.status_updated)
                        # If we arrive here, the download was successful
                        break
                    except urllib.error.ContentTooShortError as ctse:
                        if retry < max_retries:
                            logger.info('Content too short: %s - will retry.',
                                        url)
                            continue
                        raise
                    except socket.timeout as tmout:
                        if retry < max_retries:
                            logger.info('Socket timeout: %s - will retry.',
                                        url)
                            continue
                        raise
                    except gPodderDownloadHTTPError as http:
                        if retry < max_retries and http.error_code in retry_codes:
                            logger.info('HTTP error %d: %s - will retry.',
                                        http.error_code, url)
                            continue
                        raise

            new_mimetype = headers.get('content-type',
                                       self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype,
                            new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(
                        create=True, force_update=True, template=real_filename)
                    logger.info(
                        'Download was redirected (%s). New filename: %s',
                        real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, 'filename',
                                                    'content-disposition')

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                # filename_from_url to remove query string; see #591
                fn, ext = util.filename_from_url(disposition_filename)
                logger.debug(
                    "converting disposition filename '%s' to local filename '%s%s'",
                    disposition_filename, fn, ext)
                disposition_filename = fn + ext
                self.filename = self.__episode.local_filename(
                    create=True,
                    force_update=True,
                    template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s',
                                new_mimetype)
                    self.__episode.mime_type = new_mimetype

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                                         os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except urllib.error.ContentTooShortError as ctse:
            self.status = DownloadTask.FAILED
            self.error_message = _('Missing content from server')
        except IOError as ioe:
            logger.error('%s while downloading "%s": %s',
                         ioe.strerror,
                         self.__episode.title,
                         ioe.filename,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': ioe.strerror, 'filename': ioe.filename}
            self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
        except gPodderDownloadHTTPError as gdhe:
            logger.error('HTTP %s while downloading "%s": %s',
                         gdhe.error_code,
                         self.__episode.title,
                         gdhe.error_message,
                         exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'code': gdhe.error_code, 'message': gdhe.error_message}
            self.error_message = _('HTTP Error %(code)s: %(message)s') % d
        except Exception as e:
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(e), exc_info=True)
            self.error_message = _('Error: %s') % (str(e), )

        if self.status == DownloadTask.DOWNLOADING:
            # Everything went well - we're done
            self.status = DownloadTask.DONE
            if self.total_size <= 0:
                self.total_size = util.calculate_size(self.filename)
                logger.info('Total size updated to %d', self.total_size)
            self.progress = 1.0
            gpodder.user_extensions.on_episode_downloaded(self.__episode)
            return True

        self.speed = 0.0

        # We finished, but not successfully (at least not really)
        return False
Example #3
0
    def run(self):
        # Speed calculation (re-)starts here
        self.__start_time = 0
        self.__start_blocks = 0

        # If the download has already been cancelled, skip it
        if self.status == DownloadTask.CANCELLED:
            util.delete_file(self.tempname)
            self.progress = 0.0
            self.speed = 0.0
            return False

        # We only start this download if its status is "downloading"
        if self.status != DownloadTask.DOWNLOADING:
            return False

        # We are downloading this file right now
        self.status = DownloadTask.DOWNLOADING
        self._notification_shown = False

        try:
            # Resolve URL and start downloading the episode
            fmt_ids = youtube.get_fmt_ids(self._config.youtube)
            url = youtube.get_real_download_url(self.__episode.url, fmt_ids)
            url = vimeo.get_real_download_url(url, self._config.vimeo.fileformat)
            url = escapist_videos.get_real_download_url(url)
            url = url.strip()

            url = util.iri_to_url(url)

            downloader = DownloadURLOpener(self.__episode.channel)

            # HTTP Status codes for which we retry the download
            retry_codes = (408, 418, 504, 598, 599)
            max_retries = max(0, self._config.auto.retries)

            # Retry the download on timeout (bug 1013)
            for retry in range(max_retries + 1):
                if retry > 0:
                    logger.info('Retrying download of %s (%d)', url, retry)
                    time.sleep(1)

                try:
                    headers, real_url = downloader.retrieve_resume(url,
                        self.tempname, reporthook=self.status_updated)
                    # If we arrive here, the download was successful
                    break
                except urllib.error.ContentTooShortError as ctse:
                    if retry < max_retries:
                        logger.info('Content too short: %s - will retry.',
                                url)
                        continue
                    raise
                except socket.timeout as tmout:
                    if retry < max_retries:
                        logger.info('Socket timeout: %s - will retry.', url)
                        continue
                    raise
                except gPodderDownloadHTTPError as http:
                    if retry < max_retries and http.error_code in retry_codes:
                        logger.info('HTTP error %d: %s - will retry.',
                                http.error_code, url)
                        continue
                    raise

            new_mimetype = headers.get('content-type', self.__episode.mime_type)
            old_mimetype = self.__episode.mime_type
            _basename, ext = os.path.splitext(self.filename)
            if new_mimetype != old_mimetype or util.wrong_extension(ext):
                logger.info('Updating mime type: %s => %s', old_mimetype, new_mimetype)
                old_extension = self.__episode.extension()
                self.__episode.mime_type = new_mimetype
                new_extension = self.__episode.extension()

                # If the desired filename extension changed due to the new
                # mimetype, we force an update of the local filename to fix the
                # extension.
                if old_extension != new_extension or util.wrong_extension(ext):
                    self.filename = self.__episode.local_filename(create=True, force_update=True)

            # In some cases, the redirect of a URL causes the real filename to
            # be revealed in the final URL (e.g. http://gpodder.org/bug/1423)
            if real_url != url and not util.is_known_redirecter(real_url):
                realname, realext = util.filename_from_url(real_url)

                # Only update from redirect if the redirected-to filename has
                # a proper extension (this is needed for e.g. YouTube)
                if not util.wrong_extension(realext):
                    real_filename = ''.join((realname, realext))
                    self.filename = self.__episode.local_filename(create=True,
                            force_update=True, template=real_filename)
                    logger.info('Download was redirected (%s). New filename: %s',
                            real_url, os.path.basename(self.filename))

            # Look at the Content-disposition header; use if if available
            disposition_filename = get_header_param(headers, 'filename', 'content-disposition')

            # Some servers do send the content-disposition header, but provide
            # an empty filename, resulting in an empty string here (bug 1440)
            if disposition_filename is not None and disposition_filename != '':
                # The server specifies a download filename - try to use it
                disposition_filename = os.path.basename(disposition_filename)
                self.filename = self.__episode.local_filename(create=True,
                        force_update=True, template=disposition_filename)
                new_mimetype, encoding = mimetypes.guess_type(self.filename)
                if new_mimetype is not None:
                    logger.info('Using content-disposition mimetype: %s',
                            new_mimetype)
                    self.__episode.mime_type = new_mimetype

            # Re-evaluate filename and tempname to take care of podcast renames
            # while downloads are running (which will change both file names)
            self.filename = self.__episode.local_filename(create=False)
            self.tempname = os.path.join(os.path.dirname(self.filename),
                    os.path.basename(self.tempname))
            shutil.move(self.tempname, self.filename)

            # Model- and database-related updates after a download has finished
            self.__episode.on_downloaded(self.filename)
        except DownloadCancelledException:
            logger.info('Download has been cancelled/paused: %s', self)
            if self.status == DownloadTask.CANCELLED:
                util.delete_file(self.tempname)
                self.progress = 0.0
                self.speed = 0.0
        except urllib.error.ContentTooShortError as ctse:
            self.status = DownloadTask.FAILED
            self.error_message = _('Missing content from server')
        except IOError as ioe:
            logger.error('%s while downloading "%s": %s', ioe.strerror,
                    self.__episode.title, ioe.filename, exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'error': ioe.strerror, 'filename': ioe.filename}
            self.error_message = _('I/O Error: %(error)s: %(filename)s') % d
        except gPodderDownloadHTTPError as gdhe:
            logger.error('HTTP %s while downloading "%s": %s',
                    gdhe.error_code, self.__episode.title, gdhe.error_message,
                    exc_info=True)
            self.status = DownloadTask.FAILED
            d = {'code': gdhe.error_code, 'message': gdhe.error_message}
            self.error_message = _('HTTP Error %(code)s: %(message)s') % d
        except Exception as e:
            self.status = DownloadTask.FAILED
            logger.error('Download failed: %s', str(e), exc_info=True)
            self.error_message = _('Error: %s') % (str(e),)

        if self.status == DownloadTask.DOWNLOADING:
            # Everything went well - we're done
            self.status = DownloadTask.DONE
            if self.total_size <= 0:
                self.total_size = util.calculate_size(self.filename)
                logger.info('Total size updated to %d', self.total_size)
            self.progress = 1.0
            gpodder.user_extensions.on_episode_downloaded(self.__episode)
            return True

        self.speed = 0.0

        # We finished, but not successfully (at least not really)
        return False