Exemple #1
0
    def _start_new_resumable_upload(self, key, headers=None):
        """
        Starts a new resumable upload.

        Raises ResumableUploadException if any errors occur.
        """
        conn = key.bucket.connection
        if conn.debug >= 1:
            print('Starting new resumable upload.')
        self.server_has_bytes = 0

        # Start a new resumable upload by sending a POST request with an
        # empty body and the "X-Goog-Resumable: start" header. Include any
        # caller-provided headers (e.g., Content-Type) EXCEPT Content-Length
        # (and raise an exception if they tried to pass one, since it's
        # a semantic error to specify it at this point, and if we were to
        # include one now it would cause the server to expect that many
        # bytes; the POST doesn't include the actual file bytes  We set
        # the Content-Length in the subsequent PUT, based on the uploaded
        # file size.
        post_headers = {}
        for k in headers:
            if k.lower() == 'content-length':
                raise ResumableUploadException(
                    'Attempt to specify Content-Length header (disallowed)',
                    ResumableTransferDisposition.ABORT)
            post_headers[k] = headers[k]
        post_headers[conn.provider.resumable_upload_header] = 'start'

        resp = conn.make_request(
            'POST', key.bucket.name, key.name, post_headers)
        # Get tracker URI from response 'Location' header.
        body = resp.read()

        # Check for various status conditions.
        if resp.status in [500, 503]:
            # Retry status 500 and 503 errors after a delay.
            raise ResumableUploadException(
                'Got status %d from attempt to start resumable upload. '
                'Will wait/retry' % resp.status,
                ResumableTransferDisposition.WAIT_BEFORE_RETRY)
        elif resp.status != 200 and resp.status != 201:
            raise ResumableUploadException(
                'Got status %d from attempt to start resumable upload. '
                'Aborting' % resp.status,
                ResumableTransferDisposition.ABORT)

        # Else we got 200 or 201 response code, indicating the resumable
        # upload was created.
        tracker_uri = resp.getheader('Location')
        if not tracker_uri:
            raise ResumableUploadException(
                'No resumable tracker URI found in resumable initiation '
                'POST response (%s)' % body,
                ResumableTransferDisposition.WAIT_BEFORE_RETRY)
        self._set_tracker_uri(tracker_uri)
        self._save_tracker_uri_to_file()
Exemple #2
0
    def _query_server_pos(self, conn, file_length):
        """
        Queries server to find out what bytes it currently has.

        Returns (server_start, server_end), where the values are inclusive.
        For example, (0, 2) would mean that the server has bytes 0, 1, *and* 2.

        Raises ResumableUploadException if problem querying server.
        """
        resp = self._query_server_state(conn, file_length)
        if resp.status == 200:
            # To handle the boundary condition where the server has the complete
            # file, we return (server_start, file_length-1). That way the
            # calling code can always simply read up through server_end. (If we
            # didn't handle this boundary condition here, the caller would have
            # to check whether server_end == file_length and read one fewer byte
            # in that case.)
            return (0, file_length - 1)  # Completed upload.
        if resp.status != 308:
            # This means the server didn't have any state for the given
            # upload ID, which can happen (for example) if the caller saved
            # the tracker URI to a file and then tried to restart the transfer
            # after that upload ID has gone stale. In that case we need to
            # start a new transfer (and the caller will then save the new
            # tracker URI to the tracker file).
            raise ResumableUploadException(
                'Got non-308 response (%s) from server state query' %
                resp.status, ResumableTransferDisposition.START_OVER)
        got_valid_response = False
        range_spec = resp.getheader('range')
        if range_spec:
            # Parse 'bytes=<from>-<to>' range_spec.
            m = re.search('bytes=(\d+)-(\d+)', range_spec)
            if m:
                server_start = long(m.group(1))
                server_end = long(m.group(2))
                got_valid_response = True
        else:
            # No Range header, which means the server does not yet have
            # any bytes. Note that the Range header uses inclusive 'from'
            # and 'to' values. Since Range 0-0 would mean that the server
            # has byte 0, omitting the Range header is used to indicate that
            # the server doesn't have any bytes.
            return self.SERVER_HAS_NOTHING
        if not got_valid_response:
            raise ResumableUploadException(
                'Couldn\'t parse upload server state query response (%s)' %
                str(resp.getheaders()), ResumableTransferDisposition.START_OVER)
        if conn.debug >= 1:
            print('Server has: Range: %d - %d.' % (server_start, server_end))
        return (server_start, server_end)
Exemple #3
0
    def track_progress_less_iterations(self, server_had_bytes_before_attempt,
                                       roll_back_md5=True, debug=0):
        # At this point we had a re-tryable failure; see if made progress.
        if self.server_has_bytes > server_had_bytes_before_attempt:
            self.progress_less_iterations = 0   # If progress, reset counter.
        else:
            self.progress_less_iterations += 1
            if roll_back_md5:
                # Rollback any potential hash updates, as we did not
                # make any progress in this iteration.
                self.digesters = self.digesters_before_attempt

        if self.progress_less_iterations > self.num_retries:
            # Don't retry any longer in the current process.
            raise ResumableUploadException(
                    'Too many resumable upload attempts failed without '
                    'progress. You might try this upload again later',
                    ResumableTransferDisposition.ABORT_CUR_PROCESS)

        # Use binary exponential backoff to desynchronize client requests.
        sleep_time_secs = random.random() * (2**self.progress_less_iterations)
        if debug >= 1:
            print('Got retryable failure (%d progress-less in a row).\n'
                   'Sleeping %3.1f seconds before re-trying' %
                   (self.progress_less_iterations, sleep_time_secs))
        time.sleep(sleep_time_secs)
Exemple #4
0
 def _save_tracker_uri_to_file(self):
     """
     Saves URI to tracker file if one was passed to constructor.
     """
     if not self.tracker_file_name:
         return
     f = None
     try:
         with os.fdopen(os.open(self.tracker_file_name,
                                os.O_WRONLY | os.O_CREAT, 0o600), 'w') as f:
           f.write(self.tracker_uri)
     except IOError as e:
         raise ResumableUploadException(
             'Couldn\'t write URI tracker file (%s): %s.\nThis can happen'
             'if you\'re using an incorrectly configured upload tool\n'
             '(e.g., gsutil configured to save tracker files to an '
             'unwritable directory)' %
             (self.tracker_file_name, e.strerror),
             ResumableTransferDisposition.ABORT)
Exemple #5
0
 def _check_final_md5(self, key, etag):
     """
     Checks that etag from server agrees with md5 computed before upload.
     This is important, since the upload could have spanned a number of
     hours and multiple processes (e.g., gsutil runs), and the user could
     change some of the file and not realize they have inconsistent data.
     """
     if key.bucket.connection.debug >= 1:
         print('Checking md5 against etag.')
     if key.md5 != etag.strip('"\''):
         # Call key.open_read() before attempting to delete the
         # (incorrect-content) key, so we perform that request on a
         # different HTTP connection. This is neededb because httplib
         # will return a "Response not ready" error if you try to perform
         # a second transaction on the connection.
         key.open_read()
         key.close()
         key.delete()
         raise ResumableUploadException(
             'File changed during upload: md5 signature doesn\'t match etag '
             '(incorrect uploaded object deleted)',
             ResumableTransferDisposition.ABORT)
Exemple #6
0
    def _attempt_resumable_upload(self, key, fp, file_length, headers, cb,
                                  num_cb):
        """
        Attempts a resumable upload.

        Returns (etag, generation, metageneration) from server upon success.

        Raises ResumableUploadException if any problems occur.
        """
        (server_start, server_end) = self.SERVER_HAS_NOTHING
        conn = key.bucket.connection
        if self.tracker_uri:
            # Try to resume existing resumable upload.
            try:
                (server_start, server_end) = (
                    self._query_server_pos(conn, file_length))
                self.server_has_bytes = server_start

                if server_end:
                  # If the server already has some of the content, we need to
                  # update the digesters with the bytes that have already been
                  # uploaded to ensure we get a complete hash in the end.
                  print('Catching up hash digest(s) for resumed upload')
                  fp.seek(0)
                  # Read local file's bytes through position server has. For
                  # example, if server has (0, 3) we want to read 3-0+1=4 bytes.
                  bytes_to_go = server_end + 1
                  while bytes_to_go:
                      chunk = fp.read(min(key.BufferSize, bytes_to_go))
                      if not chunk:
                          raise ResumableUploadException(
                              'Hit end of file during resumable upload hash '
                              'catchup. This should not happen under\n'
                              'normal circumstances, as it indicates the '
                              'server has more bytes of this transfer\nthan'
                              ' the current file size. Restarting upload.',
                              ResumableTransferDisposition.START_OVER)
                      for alg in self.digesters:
                          self.digesters[alg].update(chunk)
                      bytes_to_go -= len(chunk)

                if conn.debug >= 1:
                    print('Resuming transfer.')
            except ResumableUploadException as e:
                if conn.debug >= 1:
                    print('Unable to resume transfer (%s).' % e.message)
                self._start_new_resumable_upload(key, headers)
        else:
            self._start_new_resumable_upload(key, headers)

        # upload_start_point allows the code that instantiated the
        # ResumableUploadHandler to find out the point from which it started
        # uploading (e.g., so it can correctly compute throughput).
        if self.upload_start_point is None:
            self.upload_start_point = server_end

        total_bytes_uploaded = server_end + 1
        # Corner case: Don't attempt to seek if we've already uploaded the
        # entire file, because if the file is a stream (e.g., the KeyFile
        # wrapper around input key when copying between providers), attempting
        # to seek to the end of file would result in an InvalidRange error.
        if file_length < total_bytes_uploaded:
          fp.seek(total_bytes_uploaded)
        conn = key.bucket.connection

        # Get a new HTTP connection (vs conn.get_http_connection(), which reuses
        # pool connections) because httplib requires a new HTTP connection per
        # transaction. (Without this, calling http_conn.getresponse() would get
        # "ResponseNotReady".)
        http_conn = conn.new_http_connection(self.tracker_uri_host, conn.port,
                                             conn.is_secure)
        http_conn.set_debuglevel(conn.debug)

        # Make sure to close http_conn at end so if a local file read
        # failure occurs partway through server will terminate current upload
        # and can report that progress on next attempt.
        try:
            return self._upload_file_bytes(conn, http_conn, fp, file_length,
                                           total_bytes_uploaded, cb, num_cb,
                                           headers)
        except (ResumableUploadException, socket.error):
            resp = self._query_server_state(conn, file_length)
            if resp.status == 400:
                raise ResumableUploadException('Got 400 response from server '
                    'state query after failed resumable upload attempt. This '
                    'can happen for various reasons, including specifying an '
                    'invalid request (e.g., an invalid canned ACL) or if the '
                    'file size changed between upload attempts',
                    ResumableTransferDisposition.ABORT)
            else:
                raise
        finally:
            http_conn.close()
Exemple #7
0
    def _upload_file_bytes(self, conn, http_conn, fp, file_length,
                           total_bytes_uploaded, cb, num_cb, headers):
        """
        Makes one attempt to upload file bytes, using an existing resumable
        upload connection.

        Returns (etag, generation, metageneration) from server upon success.

        Raises ResumableUploadException if any problems occur.
        """
        buf = fp.read(self.BUFFER_SIZE)
        if cb:
            # The cb_count represents the number of full buffers to send between
            # cb executions.
            if num_cb > 2:
                cb_count = file_length / self.BUFFER_SIZE / (num_cb-2)
            elif num_cb < 0:
                cb_count = -1
            else:
                cb_count = 0
            i = 0
            cb(total_bytes_uploaded, file_length)

        # Build resumable upload headers for the transfer. Don't send a
        # Content-Range header if the file is 0 bytes long, because the
        # resumable upload protocol uses an *inclusive* end-range (so, sending
        # 'bytes 0-0/1' would actually mean you're sending a 1-byte file).
        if not headers:
          put_headers = {}
        else:
          put_headers = headers.copy()
        if file_length:
            if total_bytes_uploaded == file_length:
                range_header = self._build_content_range_header(
                    '*', file_length)
            else:
                range_header = self._build_content_range_header(
                    '%d-%d' % (total_bytes_uploaded, file_length - 1),
                    file_length)
            put_headers['Content-Range'] = range_header
        # Set Content-Length to the total bytes we'll send with this PUT.
        put_headers['Content-Length'] = str(file_length - total_bytes_uploaded)
        http_request = AWSAuthConnection.build_base_http_request(
            conn, 'PUT', path=self.tracker_uri_path, auth_path=None,
            headers=put_headers, host=self.tracker_uri_host)
        http_conn.putrequest('PUT', http_request.path)
        for k in put_headers:
            http_conn.putheader(k, put_headers[k])
        http_conn.endheaders()

        # Turn off debug on http connection so upload content isn't included
        # in debug stream.
        http_conn.set_debuglevel(0)
        while buf:
            http_conn.send(buf)
            for alg in self.digesters:
                self.digesters[alg].update(buf)
            total_bytes_uploaded += len(buf)
            if cb:
                i += 1
                if i == cb_count or cb_count == -1:
                    cb(total_bytes_uploaded, file_length)
                    i = 0
            buf = fp.read(self.BUFFER_SIZE)
        http_conn.set_debuglevel(conn.debug)
        if cb:
            cb(total_bytes_uploaded, file_length)
        if total_bytes_uploaded != file_length:
            # Abort (and delete the tracker file) so if the user retries
            # they'll start a new resumable upload rather than potentially
            # attempting to pick back up later where we left off.
            raise ResumableUploadException(
                'File changed during upload: EOF at %d bytes of %d byte file.' %
                (total_bytes_uploaded, file_length),
                ResumableTransferDisposition.ABORT)
        resp = http_conn.getresponse()
        # Restore http connection debug level.
        http_conn.set_debuglevel(conn.debug)

        if resp.status == 200:
            # Success.
            return (resp.getheader('etag'),
                    resp.getheader('x-goog-generation'),
                    resp.getheader('x-goog-metageneration'))
        # Retry timeout (408) and status 500 and 503 errors after a delay.
        elif resp.status in [408, 500, 503]:
            disposition = ResumableTransferDisposition.WAIT_BEFORE_RETRY
        else:
            # Catch all for any other error codes.
            disposition = ResumableTransferDisposition.ABORT
        raise ResumableUploadException('Got response code %d while attempting '
                                       'upload (%s)' %
                                       (resp.status, resp.reason), disposition)