def _attempt_resumable_download(self, key, fp, headers, cb, num_cb,
                                    torrent, version_id, hash_algs):
        """
        Attempts a resumable download.

        Raises ResumableDownloadException if any problems occur.
        """
        cur_file_size = get_cur_file_size(fp, position_to_eof=True)

        if (cur_file_size and self.etag_value_for_current_download and
                self.etag_value_for_current_download == key.etag.strip('"\'')):
            # Try to resume existing transfer.
            if cur_file_size > key.size:
                raise ResumableDownloadException(
                    '%s is larger (%d) than %s (%d).\nDeleting tracker file, so '
                    'if you re-try this download it will start from scratch' %
                    (fp.name, cur_file_size, str(
                        storage_uri_for_key(key)), key.size),
                    ResumableTransferDisposition.ABORT)
            elif cur_file_size == key.size:
                if key.bucket.connection.debug >= 1:
                    print('Download complete.')
                return
            if key.bucket.connection.debug >= 1:
                print('Resuming download.')
            headers = headers.copy()
            headers['Range'] = 'bytes=%d-%d' % (cur_file_size, key.size - 1)
            cb = ByteTranslatingCallbackHandler(cb, cur_file_size).call
            self.download_start_point = cur_file_size
        else:
            if key.bucket.connection.debug >= 1:
                print('Starting new resumable download.')
            self._save_tracker_info(key)
            self.download_start_point = 0
            # Truncate the file, in case a new resumable download is being
            # started atop an existing file.
            fp.truncate(0)

        # Disable AWSAuthConnection-level retry behavior, since that would
        # cause downloads to restart from scratch.
        if isinstance(key, GSKey):
            key.get_file(fp,
                         headers,
                         cb,
                         num_cb,
                         torrent,
                         version_id,
                         override_num_retries=0,
                         hash_algs=hash_algs)
        else:
            key.get_file(fp,
                         headers,
                         cb,
                         num_cb,
                         torrent,
                         version_id,
                         override_num_retries=0)
        fp.flush()
예제 #2
0
 def _save_tracker_info(self, key):
     self.etag_value_for_current_download = key.etag.strip('"\'')
     if not self.tracker_file_name:
         return
     f = None
     try:
         f = open(self.tracker_file_name, 'w')
         f.write('%s\n' % self.etag_value_for_current_download)
     except IOError, e:
         raise ResumableDownloadException(
             'Couldn\'t write tracker file (%s): %s.\nThis can happen'
             'if you\'re using an incorrectly configured download tool\n'
             '(e.g., gsutil configured to save tracker files to an '
             'unwritable directory)' % (self.tracker_file_name, e.strerror),
             ResumableTransferDisposition.ABORT)
예제 #3
0
 def _check_final_md5(self, key, file_name):
     """
     Checks that etag from server agrees with md5 computed after the
     download completes. This is important, since the download could
     have spanned a number of hours and multiple processes (e.g.,
     gsutil runs), and the user could change some of the file and not
     realize they have inconsistent data.
     """
     fp = open(file_name, 'r')
     if key.bucket.connection.debug >= 1:
         print 'Checking md5 against etag.'
     hex_md5 = key.compute_md5(fp)[0]
     if hex_md5 != key.etag.strip('"\''):
         file_name = fp.name
         fp.close()
         os.unlink(file_name)
         raise ResumableDownloadException(
             'File changed during download: md5 signature doesn\'t match '
             'etag (incorrect downloaded file deleted)',
             ResumableTransferDisposition.ABORT)
예제 #4
0
                else:
                    if debug >= 1:
                        print(
                            'Caught ResumableDownloadException (%s) - will '
                            'retry' % e.message)

            # At this point we had a re-tryable failure; see if made progress.
            if get_cur_file_size(fp) > had_file_bytes_before_attempt:
                progress_less_iterations = 0
            else:
                progress_less_iterations += 1

            if progress_less_iterations > self.num_retries:
                # Don't retry any longer in the current process.
                raise ResumableDownloadException(
                    'Too many resumable download attempts failed without '
                    'progress. You might try this download again later',
                    ResumableTransferDisposition.ABORT_CUR_PROCESS)

            # Close the key, in case a previous download died partway
            # through and left data in the underlying key HTTP buffer.
            key.close()

            sleep_time_secs = 2**progress_less_iterations
            if debug >= 1:
                print(
                    'Got retryable failure (%d progress-less in a row).\n'
                    'Sleeping %d seconds before re-trying' %
                    (progress_less_iterations, sleep_time_secs))
            time.sleep(sleep_time_secs)
    def get_file(self, key, fp, headers, cb=None, num_cb=10, torrent=False,
                 version_id=None, hash_algs=None):
        """
        Retrieves a file from a Key
        :type key: :class:`boto.s3.key.Key` or subclass
        :param key: The Key object from which upload is to be downloaded

        :type fp: file
        :param fp: File pointer into which data should be downloaded

        :type headers: string
        :param: headers to send when retrieving the files

        :type cb: function
        :param cb: (optional) a callback function that will be called to report
             progress on the download.  The callback should accept two integer
             parameters, the first representing the number of bytes that have
             been successfully transmitted from the storage service and
             the second representing the total number of bytes that need
             to be transmitted.

        :type num_cb: int
        :param num_cb: (optional) If a callback is specified with the cb
             parameter this parameter determines the granularity of the callback
             by defining the maximum number of times the callback will be
             called during the file transfer.

        :type torrent: bool
        :param torrent: Flag for whether to get a torrent for the file

        :type version_id: string
        :param version_id: The version ID (optional)

        :type hash_algs: dictionary
        :param hash_algs: (optional) Dictionary of hash algorithms and
            corresponding hashing class that implements update() and digest().
            Defaults to {'md5': hashlib/md5.md5}.

        Raises ResumableDownloadException if a problem occurs during
            the transfer.
        """

        debug = key.bucket.connection.debug
        if not headers:
            headers = {}

        # Use num-retries from constructor if one was provided; else check
        # for a value specified in the boto config file; else default to 6.
        if self.num_retries is None:
            self.num_retries = config.getint('Boto', 'num_retries', 6)
        progress_less_iterations = 0

        while True:  # Retry as long as we're making progress.
            had_file_bytes_before_attempt = get_cur_file_size(fp)
            try:
                self._attempt_resumable_download(key, fp, headers, cb, num_cb,
                                                 torrent, version_id, hash_algs)
                # Download succceded, so remove the tracker file (if have one).
                self._remove_tracker_file()
                # Previously, check_final_md5() was called here to validate
                # downloaded file's checksum, however, to be consistent with
                # non-resumable downloads, this call was removed. Checksum
                # validation of file contents should be done by the caller.
                if debug >= 1:
                    print('Resumable download complete.')
                return
            except self.RETRYABLE_EXCEPTIONS as e:
                if debug >= 1:
                    print('Caught exception (%s)' % e.__repr__())
                if isinstance(e, IOError) and e.errno == errno.EPIPE:
                    # Broken pipe error causes httplib to immediately
                    # close the socket (http://bugs.python.org/issue5542),
                    # so we need to close and reopen the key before resuming
                    # the download.
                    if isinstance(key, GSKey):
                      key.get_file(fp, headers, cb, num_cb, torrent, version_id,
                                   override_num_retries=0, hash_algs=hash_algs)
                    else:
                      key.get_file(fp, headers, cb, num_cb, torrent, version_id,
                                   override_num_retries=0)
            except ResumableDownloadException as e:
                if (e.disposition ==
                    ResumableTransferDisposition.ABORT_CUR_PROCESS):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s)' % e.message)
                    raise
                elif (e.disposition ==
                    ResumableTransferDisposition.ABORT):
                    if debug >= 1:
                        print('Caught non-retryable ResumableDownloadException '
                              '(%s); aborting and removing tracker file' %
                              e.message)
                    self._remove_tracker_file()
                    raise
                else:
                    if debug >= 1:
                        print('Caught ResumableDownloadException (%s) - will '
                              'retry' % e.message)

            # At this point we had a re-tryable failure; see if made progress.
            if get_cur_file_size(fp) > had_file_bytes_before_attempt:
                progress_less_iterations = 0
            else:
                progress_less_iterations += 1

            if progress_less_iterations > self.num_retries:
                # Don't retry any longer in the current process.
                raise ResumableDownloadException(
                    'Too many resumable download attempts failed without '
                    'progress. You might try this download again later',
                    ResumableTransferDisposition.ABORT_CUR_PROCESS)

            # Close the key, in case a previous download died partway
            # through and left data in the underlying key HTTP buffer.
            # Do this within a try/except block in case the connection is
            # closed (since key.close() attempts to do a final read, in which
            # case this read attempt would get an IncompleteRead exception,
            # which we can safely ignore.
            try:
                key.close()
            except httplib.IncompleteRead:
                pass

            sleep_time_secs = 2**progress_less_iterations
            if debug >= 1:
                print('Got retryable failure (%d progress-less in a row).\n'
                      'Sleeping %d seconds before re-trying' %
                      (progress_less_iterations, sleep_time_secs))
            time.sleep(sleep_time_secs)