def _copy_via_put_post(self, src, dest, metadata=None): """Fallback copy method for older Swift implementations.""" headers = CaseInsensitiveDict() headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name, self.prefix, src) if metadata is not None: # We can't do a direct copy, because during copy we can only update the # metadata, but not replace it. Therefore, we have to make a full copy # followed by a separate request to replace the metadata. To avoid an # inconsistent intermediate state, we use a temporary object. final_dest = dest dest = final_dest + TEMP_SUFFIX headers['X-Delete-After'] = '600' try: self._copy_helper('PUT', '/%s%s' % (self.prefix, dest), headers) except HTTPError as exc: if exc.status == 404: raise NoSuchObject(src) raise if metadata is None: return # Update metadata headers = CaseInsensitiveDict() self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len) self._copy_helper('POST', '/%s%s' % (self.prefix, dest), headers) # Rename object headers = CaseInsensitiveDict() headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name, self.prefix, dest) self._copy_helper('PUT', '/%s%s' % (self.prefix, final_dest), headers)
def _extract_b2_metadata(self, response, obj_key): '''Extract metadata from HTTP response object''' headers = CaseInsensitiveDict() for k, v in response.headers.items(): # we convert to lower case in order to do case-insensitive comparison if k.lower().startswith(info_header_prefix.lower() + 'meta-'): headers[k] = self._b2_url_decode(v) format_ = headers.get('%smeta-format' % info_header_prefix, 'raw') if format_ != 'raw2': # Current metadata format raise CorruptedObjectError('invalid metadata format: %s' % format_) parts = [] for i in count(): part = headers.get('%smeta-%03d' % (info_header_prefix, i), None) if part is None: break parts.append(part) buffer = urllib.parse.unquote(''.join(parts)) meta = literal_eval('{ %s }' % buffer) # Decode bytes values for (k, v) in meta.items(): if not isinstance(v, bytes): continue try: meta[k] = base64.b64decode(v) except binascii.Error: # This should trigger a MD5 mismatch below meta[k] = None # Check MD5. There is a case to be made for treating a mismatch as a # `CorruptedObjectError` rather than a `BadDigestError`, because the MD5 # sum is not calculated on-the-fly by the server but stored with the # object, and therefore does not actually verify what the server has # sent over the wire. However, it seems more likely for the data to get # accidentally corrupted in transit than to get accidentally corrupted # on the server (which hopefully checksums its storage devices). md5 = base64.b64encode(checksum_basic_mapping(meta)).decode('ascii') if md5 != headers.get('%smeta-md5' % info_header_prefix, None): log.warning('MD5 mismatch in metadata for %s', obj_key) # When trying to read file system revision 23 or earlier, we will # get a MD5 error because the checksum was calculated # differently. In order to get a better error message, we special # case the s3ql_passphrase and s3ql_metadata object (which are only # retrieved once at program start). if obj_key in ('s3ql_passphrase', 's3ql_metadata'): raise CorruptedObjectError('Meta MD5 for %s does not match' % obj_key) raise BadDigestError(400, 'bad_digest', 'Meta MD5 for %s does not match' % obj_key) return meta
def _get_access_token(self): log.info('Requesting new access token') headers = CaseInsensitiveDict() headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=utf-8' body = urllib.parse.urlencode({ 'client_id': OAUTH_CLIENT_ID, 'client_secret': OAUTH_CLIENT_SECRET, 'refresh_token': self.password, 'grant_type': 'refresh_token' }) conn = HTTPConnection('accounts.google.com', 443, proxy=self.proxy, ssl_context=self.ssl_context) try: conn.send_request('POST', '/o/oauth2/token', headers=headers, body=body.encode('utf-8')) resp = conn.read_response() json_resp = self._parse_json_response(resp, conn) if resp.status > 299 or resp.status < 200: assert 'error' in json_resp if 'error' in json_resp: raise AuthenticationError(json_resp['error']) else: self.access_token[self.password] = json_resp['access_token'] finally: conn.disconnect()
def open_write(self, key, metadata=None, is_compressed=False, extra_headers=None): """ The returned object will buffer all data and only start the upload when its `close` method is called. """ log.debug('started with %s', key) headers = CaseInsensitiveDict() if extra_headers is not None: headers.update(extra_headers) if metadata is None: metadata = dict() self._add_meta_headers(headers, metadata) return ObjectW(key, self, headers)
def close(self): """Close object and upload data""" log.debug('started with %s', self.key) if self.closed: # still call fh.close, may have generated an error before self.fh.close() return self.fh.seek(0) upload_auth_token, upload_url = self.backend._get_upload_url() upload_url = urllib.parse.urlparse(upload_url) with HTTPConnection(upload_url.hostname, 443, ssl_context=self.backend.ssl_context) as conn_up: headers = CaseInsensitiveDict() headers['X-Bz-File-Name'] = self.backend.prefix + self.key headers['Content-Type'] = 'application/octet-stream' headers['Content-Length'] = self.obj_size headers['X-Bz-Content-Sha1'] = self.sha1.hexdigest() if self.meta is None: self.meta = dict() self.backend._add_meta_headers(headers, self.meta) self.backend._do_request('POST', upload_url.path + '?' + upload_url.query, conn_up, headers=headers, body=self.fh, auth_token=upload_auth_token, body_size=self.obj_size) self.fh.close() self.closed = True
def copy(self, src, dest, metadata=None): log.debug('started with %s, %s', src, dest) if not (metadata is None or isinstance(metadata, dict)): raise TypeError('*metadata*: expected dict or None, got %s' % type(metadata)) headers = CaseInsensitiveDict() if metadata is not None: headers['Content-Type'] = 'application/json; charset="utf-8"' body = json.dumps({'metadata': _wrap_user_meta(metadata)}).encode() else: body = None path = '/storage/v1/b/%s/o/%s/rewriteTo/b/%s/o/%s' % ( urllib.parse.quote(self.bucket_name, safe=''), urllib.parse.quote(self.prefix + src, safe=''), urllib.parse.quote(self.bucket_name, safe=''), urllib.parse.quote(self.prefix + dest, safe='')) try: resp = self._do_request('POST', path, headers=headers, body=body) except RequestError as exc: exc = _map_request_error(exc, src) if exc: raise exc raise json_resp = self._parse_json_response(resp) assert json_resp['done'] assert 'rewriteToken' not in json_resp
def test_put_separate(conn): data = DUMMY_DATA conn.send_request('PUT', '/allgood', body=BodyFollowing(len(data))) conn.write(data) resp = conn.read_response() conn.discard() assert resp.status == 204 assert resp.length == 0 assert resp.reason == 'Ok, but no MD5' headers = CaseInsensitiveDict() headers['Content-MD5'] = b64encode( hashlib.md5(data).digest()).decode('ascii') conn.send_request('PUT', '/allgood', body=BodyFollowing(len(data)), headers=headers) conn.write(data) resp = conn.read_response() conn.discard() assert resp.status == 204 assert resp.length == 0 assert resp.reason == 'MD5 matched' headers['Content-MD5'] = 'nUzaJEag3tOdobQVU/39GA==' conn.send_request('PUT', '/allgood', body=BodyFollowing(len(data)), headers=headers) conn.write(data) resp = conn.read_response() conn.discard() assert resp.status == 400 assert resp.reason.startswith('MD5 mismatch')
def update_meta(self, key, metadata): log.debug('started with %s', key) headers = CaseInsensitiveDict() self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len) self._do_request('POST', '/%s%s' % (self.prefix, key), headers=headers) self.conn.discard()
def _get_conn(self): '''Obtain connection to server and authentication token''' log.debug('started') if 'no-ssl' in self.options: ssl_context = None else: ssl_context = self.ssl_context headers = CaseInsensitiveDict() headers['X-Auth-User'] = self.login headers['X-Auth-Key'] = self.password with HTTPConnection(self.hostname, self.port, proxy=self.proxy, ssl_context=ssl_context) as conn: conn.timeout = int(self.options.get('tcp-timeout', 20)) for auth_path in ('/v1.0', '/auth/v1.0'): log.debug('GET %s', auth_path) conn.send_request('GET', auth_path, headers=headers) resp = conn.read_response() if resp.status in (404, 412): log.debug('auth to %s failed, trying next path', auth_path) conn.discard() continue elif resp.status == 401: raise AuthorizationError(resp.reason) elif resp.status > 299 or resp.status < 200: raise HTTPError(resp.status, resp.reason, resp.headers) # Pylint can't infer SplitResult Types #pylint: disable=E1103 self.auth_token = resp.headers['X-Auth-Token'] o = urlsplit(resp.headers['X-Storage-Url']) self.auth_prefix = urllib.parse.unquote(o.path) if o.scheme == 'https': ssl_context = self.ssl_context elif o.scheme == 'http': ssl_context = None else: # fall through to scheme used for authentication pass conn = HTTPConnection(o.hostname, o.port, proxy=self.proxy, ssl_context=ssl_context) conn.timeout = int(self.options.get('tcp-timeout', 20)) return conn raise RuntimeError('No valid authentication path found')
def copy(self, src, dest, metadata=None, extra_headers=None): log.debug('started with %s, %s', src, dest) headers = CaseInsensitiveDict() if extra_headers is not None: headers.update(extra_headers) headers[self.hdr_prefix + 'copy-source'] = \ urllib.parse.quote('/%s/%s%s' % (self.bucket_name, self.prefix, src)) if metadata is None: headers[self.hdr_prefix + 'metadata-directive'] = 'COPY' else: headers[self.hdr_prefix + 'metadata-directive'] = 'REPLACE' self._add_meta_headers(headers, metadata) try: resp = self._do_request('PUT', '/%s%s' % (self.prefix, dest), headers=headers) except NoSuchKeyError: raise NoSuchObject(src) # When copying, S3 may return error despite a 200 OK status # http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html # https://doc.s3.amazonaws.com/proposals/copy.html if self.options.get('dumb-copy', False): self.conn.discard() return body = self.conn.readall() root = self._parse_xml_response(resp, body) # Some S3 implemenentations do not have a namespace on # CopyObjectResult. if root.tag in [ self.xml_ns_prefix + 'CopyObjectResult', 'CopyObjectResult' ]: return elif root.tag in [self.xml_ns_prefix + 'Error', 'Error']: raise get_S3Error(root.findtext('Code'), root.findtext('Message'), resp.headers) else: log.error('Unexpected server reply to copy operation:\n%s', self._dump_response(resp, body)) raise RuntimeError('Copy response has %s as root tag' % root.tag)
def _do_request(self, method, path, subres=None, query_string=None, headers=None, body=None): '''Send request, read and return response object This method modifies the *headers* dictionary. ''' log.debug('started with %r, %r, %r, %r, %r, %r', method, path, subres, query_string, headers, body) if headers is None: headers = CaseInsensitiveDict() if isinstance(body, (bytes, bytearray, memoryview)): headers['Content-MD5'] = md5sum_b64(body) if self.conn is None: log.debug('no active connection, calling _get_conn()') self.conn = self._get_conn() # Construct full path path = urllib.parse.quote('%s/%s%s' % (self.auth_prefix, self.container_name, path)) if query_string: s = urllib.parse.urlencode(query_string, doseq=True) if subres: path += '?%s&%s' % (subres, s) else: path += '?%s' % s elif subres: path += '?%s' % subres headers['X-Auth-Token'] = self.auth_token try: resp = self._do_request_inner(method, path, body=body, headers=headers) except Exception as exc: if is_temp_network_error(exc) or isinstance(exc, ssl.SSLError): # We probably can't use the connection anymore self.conn.disconnect() raise # Success if resp.status >= 200 and resp.status <= 299: return resp # Expired auth token if resp.status == 401: self._do_authentication_expired(resp.reason) # raises AuthenticationExpired # If method == HEAD, server must not return response body # even in case of errors self.conn.discard() if method.upper() == 'HEAD': raise HTTPError(resp.status, resp.reason, resp.headers) else: raise HTTPError(resp.status, resp.reason, resp.headers)
def _copy_via_copy(self, src, dest, metadata=None): """Copy for more modern Swift implementations that know the X-Fresh-Metadata option and the native COPY method.""" headers = CaseInsensitiveDict() headers['Destination'] = '/%s/%s%s' % (self.container_name, self.prefix, dest) if metadata is not None: self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len) headers['X-Fresh-Metadata'] = 'true' resp = self._do_request('COPY', '/%s%s' % (self.prefix, src), headers=headers) self._assert_empty_response(resp)
def _get_access_token(self): log.info('Requesting new access token') headers = CaseInsensitiveDict() headers['Content-Type'] = 'application/x-www-form-urlencoded; charset=utf-8' body = urlencode({'client_id': OAUTH_CLIENT_ID, 'client_secret': OAUTH_CLIENT_SECRET, 'refresh_token': self.password, 'grant_type': 'refresh_token' }) conn = HTTPConnection('accounts.google.com', 443, proxy=self.proxy, ssl_context=self.ssl_context) try: conn.send_request('POST', '/o/oauth2/token', headers=headers, body=body.encode('utf-8')) resp = conn.read_response() if resp.status > 299 or resp.status < 200: raise HTTPError(resp.status, resp.reason, resp.headers) content_type = resp.headers.get('Content-Type', None) if content_type: hit = re.match(r'application/json(?:; charset="(.+)")?$', resp.headers['Content-Type'], re.IGNORECASE) else: hit = None if not hit: log.error('Unexpected server reply when refreshing access token:\n%s', self._dump_response(resp)) raise RuntimeError('Unable to parse server response') charset = hit.group(1) or 'utf-8' body = conn.readall().decode(charset) resp_json = json.loads(body) if not isinstance(resp_json, dict): log.error('Invalid json server response. Expected dict, got:\n%s', body) raise RuntimeError('Unable to parse server response') if 'error' in resp_json: raise AuthenticationError(resp_json['error']) if 'access_token' not in resp_json: log.error('Unable to find access token in server response:\n%s', body) raise RuntimeError('Unable to parse server response') self.access_token[self.password] = resp_json['access_token'] finally: conn.disconnect()
def copy(self, src, dest, metadata=None, extra_headers=None): log.debug('started with %s, %s', src, dest) headers = CaseInsensitiveDict() if extra_headers is not None: headers.update(extra_headers) headers[self.hdr_prefix + 'copy-source'] = \ urllib.parse.quote('/%s/%s%s' % (self.bucket_name, self.prefix, src)) if metadata is None: headers[self.hdr_prefix + 'metadata-directive'] = 'COPY' else: headers[self.hdr_prefix + 'metadata-directive'] = 'REPLACE' self._add_meta_headers(headers, metadata) try: resp = self._do_request('PUT', '/%s%s' % (self.prefix, dest), headers=headers) except NoSuchKeyError: raise NoSuchObject(src) # When copying, S3 may return error despite a 200 OK status # http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectCOPY.html # https://doc.s3.amazonaws.com/proposals/copy.html if self.options.get('dumb-copy', False): self.conn.discard() return body = self.conn.readall() root = self._parse_xml_response(resp, body) # Some S3 implemenentations do not have a namespace on # CopyObjectResult. if root.tag in [self.xml_ns_prefix + 'CopyObjectResult', 'CopyObjectResult']: return elif root.tag in [self.xml_ns_prefix + 'Error', 'Error']: raise get_S3Error(root.findtext('Code'), root.findtext('Message'), resp.headers) else: log.error('Unexpected server reply to copy operation:\n%s', self._dump_response(resp, body)) raise RuntimeError('Copy response has %s as root tag' % root.tag)
def copy(self, src, dest, metadata=None): log.debug('started with %s, %s', src, dest) if dest.endswith(TEMP_SUFFIX) or src.endswith(TEMP_SUFFIX): raise ValueError('Keys must not end with %s' % TEMP_SUFFIX) headers = CaseInsensitiveDict() headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name, self.prefix, src) if metadata is not None: # We can't do a direct copy, because during copy we can only update the # metadata, but not replace it. Therefore, we have to make a full copy # followed by a separate request to replace the metadata. To avoid an # inconsistent intermediate state, we use a temporary object. final_dest = dest dest = final_dest + TEMP_SUFFIX headers['X-Delete-After'] = '600' try: self._copy_helper('PUT', '/%s%s' % (self.prefix, dest), headers) except HTTPError as exc: if exc.status == 404: raise NoSuchObject(src) raise if metadata is None: return # Update metadata headers = CaseInsensitiveDict() self._add_meta_headers(headers, metadata) self._copy_helper('POST', '/%s%s' % (self.prefix, dest), headers) # Rename object headers = CaseInsensitiveDict() headers['X-Copy-From'] = '/%s/%s%s' % (self.container_name, self.prefix, dest) self._copy_helper('PUT', '/%s%s' % (self.prefix, final_dest), headers)
def _do_request(self, method, path, query_string=None, headers=None, body=None): '''Send request, read and return response object''' log.debug('started with %s %s, qs=%s', method, path, query_string) if headers is None: headers = CaseInsensitiveDict() expect100 = isinstance(body, BodyFollowing) headers['host'] = self.hostname if query_string: s = urllib.parse.urlencode(query_string, doseq=True) path += '?%s' % s # If we have an access token, try to use it. token = self.access_token.get(self.refresh_token, None) if token is not None: headers['Authorization'] = 'Bearer ' + token self.conn.send_request(method, path, body=body, headers=headers, expect100=expect100) resp = self.conn.read_response() if ((expect100 and resp.status == 100) or (not expect100 and 200 <= resp.status <= 299)): return resp elif resp.status != 401: raise self._parse_error_response(resp) self.conn.discard() # If we reach this point, then the access token must have # expired, so we try to get a new one. We use a lock to prevent # multiple threads from refreshing the token simultaneously. with self._refresh_lock: # Don't refresh if another thread has already done so while # we waited for the lock. if token is None or self.access_token.get(self.refresh_token, None) == token: self._get_access_token() # Try request again. If this still fails, propagate the error # (because we have just refreshed the access token). # FIXME: We can't rely on this if e.g. the system hibernated # after refreshing the token, but before reaching this line. headers['Authorization'] = 'Bearer ' + self.access_token[self.refresh_token] self.conn.send_request(method, path, body=body, headers=headers, expect100=expect100) resp = self.conn.read_response() if ((expect100 and resp.status == 100) or (not expect100 and 200 <= resp.status <= 299)): return resp else: raise self._parse_error_response(resp)
def lookup(self, key): log.debug('started with %s', key) key = self._encode_key(key) headers = CaseInsensitiveDict() headers['Range'] = "bytes=0-1" # Only get first byte resp, data = self._do_request('GET', '/file/%s/%s%s' % (self.bucket_name, self.prefix, key), self.conn_download, headers=headers) meta = self._extractmeta(resp, key) return meta
def open_write(self, key, metadata=None, is_compressed=False): ''' The returned object will buffer all data and only start the uploads when its `close` method is called. ''' log.debug('started with %s', key) headers = CaseInsensitiveDict() if metadata is None: metadata = dict() self._add_b2_metadata_to_headers(headers, metadata) return ObjectW(key, self, headers)
def open_write(self, key, metadata=None, is_compressed=False): """ The returned object will buffer all data and only start the upload when its `close` method is called. """ log.debug('started with %s', key) if key.endswith(TEMP_SUFFIX): raise ValueError('Keys must not end with %s' % TEMP_SUFFIX) headers = CaseInsensitiveDict() if metadata is None: metadata = dict() self._add_meta_headers(headers, metadata, chunksize=self.features.max_meta_len) return ObjectW(key, self, headers)
def test_put(conn): data = DUMMY_DATA conn.send_request('PUT', '/allgood', body=data) resp = conn.read_response() conn.discard() assert resp.status == 204 assert resp.length == 0 assert resp.reason == 'MD5 matched' headers = CaseInsensitiveDict() headers['Content-MD5'] = 'nUzaJEag3tOdobQVU/39GA==' conn.send_request('PUT', '/allgood', body=data, headers=headers) resp = conn.read_response() conn.discard() assert resp.status == 400 assert resp.reason.startswith('MD5 mismatch')
def _authorize_account(self): '''Authorize API calls''' authorize_host = 'api.backblazeb2.com' authorize_url = api_url_prefix + 'b2_authorize_account' id_and_key = self.b2_application_key_id + ':' + self.b2_application_key basic_auth_string = 'Basic ' + str( base64.b64encode(bytes(id_and_key, 'UTF-8')), encoding='UTF-8') with HTTPConnection(authorize_host, 443, ssl_context=self.ssl_context) as connection: headers = CaseInsensitiveDict() headers['Authorization'] = basic_auth_string connection.send_request('GET', authorize_url, headers=headers, body=None) response = connection.read_response() response_body = connection.readall() if response.status != 200: raise RuntimeError('Authorization failed.') j = json.loads(response_body.decode('utf-8')) self.account_id = j['accountId'] allowed_info = j.get('allowed') if allowed_info.get('bucketId'): self.bucket_id = allowed_info.get('bucketId') if allowed_info.get('bucketName') != self.bucket_name: raise RuntimeError( 'Provided API key can not access desired bucket.') if not self._check_key_capabilities(allowed_info): raise RuntimeError( 'Provided API key does not have the required capabilities.' ) self.api_url = urlparse(j['apiUrl']) self.download_url = urlparse(j['downloadUrl']) self.authorization_token = j['authorizationToken']
def update_meta(self, key, metadata): headers = CaseInsensitiveDict() headers['Content-Type'] = 'application/json; charset="utf-8"' body = json.dumps({ 'metadata': _wrap_user_meta(metadata), 'acl': [] }).encode() path = '/storage/v1/b/%s/o/%s' % ( urllib.parse.quote(self.bucket_name, safe=''), urllib.parse.quote(self.prefix + key, safe='')) try: resp = self._do_request('PUT', path, headers=headers, body=body) except RequestError as exc: exc = _map_request_error(exc, key) if exc: raise exc raise self._parse_json_response(resp)
def copy(self, src, dest, metadata=None): log.debug('started with %s, %s', src, dest) if not (metadata is None or isinstance(metadata, dict)): raise TypeError('*metadata*: expected dict or None, got %s' % type(metadata)) headers = CaseInsensitiveDict() headers[self.hdr_prefix + 'copy-source'] = \ '/%s/%s%s' % (self.bucket_name, self.prefix, src) if metadata is None: headers[self.hdr_prefix + 'metadata-directive'] = 'COPY' else: headers[self.hdr_prefix + 'metadata-directive'] = 'REPLACE' self._add_meta_headers(headers, metadata) try: self._do_request('PUT', '/%s%s' % (self.prefix, dest), headers=headers) self.conn.discard() except s3c.NoSuchKeyError: raise NoSuchObject(src)
def _get_conn(self): '''Obtain connection to server and authentication token''' log.debug('started') if 'no-ssl' in self.options: ssl_context = None else: ssl_context = self.ssl_context headers = CaseInsensitiveDict() headers['Content-Type'] = 'application/json' headers['Accept'] = 'application/json; charset="utf-8"' if ':' in self.login: (tenant,user) = self.login.split(':') else: tenant = None user = self.login domain = self.options.get('domain', None) if domain: if not tenant: raise ValueError("Tenant is required when Keystone v3 is used") # In simple cases where there's only one domain, the project domain # will be the same as the authentication domain, but this option # allows for them to be different project_domain = self.options.get('project-domain', domain) auth_body = { 'auth': { 'identity': { 'methods': ['password'], 'password': { 'user': { 'name': user, 'domain': { 'id': domain }, 'password': self.password } } }, 'scope': { 'project': { 'id': tenant, 'domain': { 'id': project_domain } } } } } auth_url_path = '/v3/auth/tokens' else: # If a domain is not specified, assume v2 auth_body = { 'auth': { 'passwordCredentials': { 'username': user, 'password': self.password } }} auth_url_path = '/v2.0/tokens' if tenant: auth_body['auth']['tenantName'] = tenant with HTTPConnection(self.hostname, port=self.port, proxy=self.proxy, ssl_context=ssl_context) as conn: conn.timeout = int(self.options.get('tcp-timeout', 20)) conn.send_request('POST', auth_url_path, headers=headers, body=json.dumps(auth_body).encode('utf-8')) resp = conn.read_response() if resp.status == 401: raise AuthorizationError(resp.reason) elif resp.status > 299 or resp.status < 200: raise HTTPError(resp.status, resp.reason, resp.headers) cat = json.loads(conn.read().decode('utf-8')) if self.options.get('domain', None): self.auth_token = resp.headers['X-Subject-Token'] service_catalog = cat['token']['catalog'] else: self.auth_token = cat['access']['token']['id'] service_catalog = cat['access']['serviceCatalog'] avail_regions = [] for service in service_catalog: if service['type'] != 'object-store': continue for endpoint in service['endpoints']: if endpoint['region'] != self.region: avail_regions.append(endpoint['region']) continue if 'publicURL' in endpoint: # The publicURL nomenclature is found in v2 catalogs o = urlsplit(endpoint['publicURL']) else: # Whereas v3 catalogs do 'interface' == 'public' and # 'url' for the URL itself if endpoint['interface'] != 'public': continue o = urlsplit(endpoint['url']) self.auth_prefix = urllib.parse.unquote(o.path) if o.scheme == 'https': ssl_context = self.ssl_context elif o.scheme == 'http': ssl_context = None else: # fall through to scheme used for authentication pass self._detect_features(o.hostname, o.port, ssl_context) conn = HTTPConnection(o.hostname, o.port, proxy=self.proxy, ssl_context=ssl_context) conn.timeout = int(self.options.get('tcp-timeout', 20)) return conn if len(avail_regions) < 10: raise DanglingStorageURLError(self.container_name, 'No accessible object storage service found in region %s' ' (available regions: %s)' % (self.region, ', '.join(avail_regions))) else: raise DanglingStorageURLError(self.container_name, 'No accessible object storage service found in region %s' % self.region)
def _extractmeta(self, resp, obj_key): '''Extract metadata from HTTP response object''' format_ = resp.headers.get('%smeta-format' % self.hdr_prefix, 'raw') if format_ in ('raw', 'pickle'): meta = CaseInsensitiveDict() pattern = re.compile(r'^%smeta-(.+)$' % re.escape(self.hdr_prefix), re.IGNORECASE) for fname in resp.headers: hit = pattern.search(fname) if hit: meta[hit.group(1)] = resp.headers[fname] if format_ == 'raw': return meta # format_ == pickle buf = ''.join(meta[x] for x in sorted(meta) if x.lower().startswith('data-')) if 'md5' in meta and md5sum_b64(buf.encode('us-ascii')) != meta['md5']: log.warning('MD5 mismatch in metadata for %s', obj_key) raise BadDigestError('BadDigest', 'Meta MD5 for %s does not match' % obj_key) try: return safe_unpickle(b64decode(buf), encoding='latin1') except binascii.Error: raise CorruptedObjectError('Corrupted metadata, b64decode failed') except pickle.UnpicklingError as exc: raise CorruptedObjectError('Corrupted metadata, pickle says: %s' % exc) elif format_ != 'raw2': # Current raise RuntimeError('Unknown metadata format %s for key %s' % (format_, obj_key)) parts = [] for i in count(): # Headers is an email.message object, so indexing it # would also give None instead of KeyError part = resp.headers.get('%smeta-%03d' % (self.hdr_prefix, i), None) if part is None: break parts.append(part) buf = unquote(''.join(parts)) meta = literal_eval('{ %s }' % buf) # Decode bytes values for (k,v) in meta.items(): if not isinstance(v, bytes): continue try: meta[k] = b64decode(v) except binascii.Error: # This should trigger a MD5 mismatch below meta[k] = None # Check MD5. There is a case to be made for treating a mismatch as a # `CorruptedObjectError` rather than a `BadDigestError`, because the MD5 # sum is not calculated on-the-fly by the server but stored with the # object, and therefore does not actually verify what the server has # sent over the wire. However, it seems more likely for the data to get # accidentally corrupted in transit than to get accidentally corrupted # on the server (which hopefully checksums its storage devices). md5 = b64encode(checksum_basic_mapping(meta)).decode('ascii') if md5 != resp.headers.get('%smeta-md5' % self.hdr_prefix, None): log.warning('MD5 mismatch in metadata for %s', obj_key) raise BadDigestError('BadDigest', 'Meta MD5 for %s does not match' % obj_key) return meta
def write_fh(self, fh, key: str, md5: bytes, metadata: Optional[Dict[str, Any]] = None, size: Optional[int] = None): '''Write data from byte stream *fh* into *key*. *fh* must be seekable. If *size* is None, *fh* must also implement `fh.fileno()` so that the size can be determined through `os.fstat`. *md5* must be the (binary) md5 checksum of the data. ''' metadata = json.dumps({ 'metadata': _wrap_user_meta(metadata if metadata else {}), 'md5Hash': b64encode(md5).decode(), 'name': self.prefix + key, }) # Google Storage uses Content-Length to read the object data, so we # don't have to worry about the boundary occurring in the object data. boundary = 'foo_bar_baz' headers = CaseInsensitiveDict() headers['Content-Type'] = 'multipart/related; boundary=%s' % boundary body_prefix = '\n'.join( ('--' + boundary, 'Content-Type: application/json; charset=UTF-8', '', metadata, '--' + boundary, 'Content-Type: application/octet-stream', '', '')).encode() body_suffix = ('\n--%s--\n' % boundary).encode() body_size = len(body_prefix) + len(body_suffix) if size is not None: body_size += size else: body_size += os.fstat(fh.fileno()).st_size path = '/upload/storage/v1/b/%s/o' % (urllib.parse.quote( self.bucket_name, safe=''), ) query_string = {'uploadType': 'multipart'} try: resp = self._do_request('POST', path, query_string=query_string, headers=headers, body=BodyFollowing(body_size)) except RequestError as exc: exc = _map_request_error(exc, key) if exc: raise exc raise assert resp.status == 100 fh.seek(0) md5_run = hashlib.md5() try: self.conn.write(body_prefix) while True: buf = fh.read(BUFSIZE) if not buf: break self.conn.write(buf) md5_run.update(buf) self.conn.write(body_suffix) except ConnectionClosed: # Server closed connection while we were writing body data - # but we may still be able to read an error response try: resp = self.conn.read_response() except ConnectionClosed: # No server response available pass else: log.warning( 'Server broke connection during upload, signaled ' '%d %s', resp.status, resp.reason) # Re-raise first ConnectionClosed exception raise if md5_run.digest() != md5: raise ValueError('md5 passed to write_fd does not match fd data') resp = self.conn.read_response() # If we're really unlucky, then the token has expired while we were uploading data. if resp.status == 401: self.conn.discard() raise AccessTokenExpired() elif resp.status != 200: exc = self._parse_error_response(resp) raise _map_request_error(exc, key) or exc self._parse_json_response(resp)
def _get_conn(self): '''Obtain connection to server and authentication token''' log.debug('started') if 'no-ssl' in self.options: ssl_context = None else: ssl_context = self.ssl_context headers = CaseInsensitiveDict() headers['Content-Type'] = 'application/json' headers['Accept'] = 'application/json; charset="utf-8"' if ':' in self.login: (tenant, user) = self.login.split(':') else: tenant = None user = self.login auth_body = { 'auth': { 'passwordCredentials': { 'username': user, 'password': self.password } } } if tenant: auth_body['auth']['tenantName'] = tenant with HTTPConnection(self.hostname, port=self.port, proxy=self.proxy, ssl_context=ssl_context) as conn: conn.timeout = int(self.options.get('tcp-timeout', 20)) conn.send_request('POST', '/v2.0/tokens', headers=headers, body=json.dumps(auth_body).encode('utf-8')) resp = conn.read_response() if resp.status == 401: raise AuthorizationError(resp.reason) elif resp.status > 299 or resp.status < 200: raise HTTPError(resp.status, resp.reason, resp.headers) cat = json.loads(conn.read().decode('utf-8')) self.auth_token = cat['access']['token']['id'] avail_regions = [] for service in cat['access']['serviceCatalog']: if service['type'] != 'object-store': continue for endpoint in service['endpoints']: if endpoint['region'] != self.region: avail_regions.append(endpoint['region']) continue o = urlsplit(endpoint['publicURL']) self.auth_prefix = urllib.parse.unquote(o.path) if o.scheme == 'https': ssl_context = self.ssl_context elif o.scheme == 'http': ssl_context = None else: # fall through to scheme used for authentication pass self._detect_features(o.hostname, o.port, ssl_context) conn = HTTPConnection(o.hostname, o.port, proxy=self.proxy, ssl_context=ssl_context) conn.timeout = int(self.options.get('tcp-timeout', 20)) return conn if len(avail_regions) < 10: raise DanglingStorageURLError( self.container_name, 'No accessible object storage service found in region %s' ' (available regions: %s)' % (self.region, ', '.join(avail_regions))) else: raise DanglingStorageURLError( self.container_name, 'No accessible object storage service found in region %s' % self.region)
def _do_request(self, connection, method, path, headers=None, body=None, download_body=True): '''Send request, read and return response object''' log.debug('started with %s %s', method, path) if headers is None: headers = CaseInsensitiveDict() if self.authorization_token is None: self._authorize_account() if 'Authorization' not in headers: headers['Authorization'] = self.authorization_token if self.test_mode_expire_some_tokens: headers[ 'X-Bz-Test-Mode'] = 'expire_some_account_authorization_tokens' if self.test_mode_force_cap_exceeded: headers['X-Bz-Test-Mode'] = 'force_cap_exceeded' log.debug('REQUEST: %s %s %s', connection.hostname, method, path) if body is None or isinstance(body, (bytes, bytearray, memoryview)): connection.send_request(method, path, headers=headers, body=body) else: body_length = os.fstat(body.fileno()).st_size connection.send_request(method, path, headers=headers, body=BodyFollowing(body_length)) copyfileobj(body, connection, BUFSIZE) response = connection.read_response() if download_body is True or response.status != 200: # Backblaze always returns a json with error information in body response_body = connection.readall() else: response_body = None content_length = response.headers.get('Content-Length', '0') log.debug('RESPONSE: %s %s %s %s', response.method, response.status, response.reason, content_length) if ( response.status == 404 or # File not found (response.status != 200 and method == 'HEAD') ): # HEAD responses do not have a body -> we have to raise a HTTPError with the code raise HTTPError(response.status, response.reason, response.headers) if response.status != 200: json_error_response = json.loads( response_body.decode('utf-8')) if response_body else None code = json_error_response['code'] if json_error_response else None message = json_error_response[ 'message'] if json_error_response else response.reason b2_error = B2Error(json_error_response['status'], code, message, response.headers) raise b2_error return response, response_body
def _send_request(self, method, path, headers, subres=None, query_string=None, body=None): '''Add authentication and send request Returns the response object. ''' if not isinstance(headers, CaseInsensitiveDict): headers = CaseInsensitiveDict(headers) self._authorize_request(method, path, headers, subres) # Construct full path if not self.hostname.startswith(self.bucket_name): path = '/%s%s' % (self.bucket_name, path) path = urllib.parse.quote(path) if query_string: s = urllib.parse.urlencode(query_string, doseq=True) if subres: path += '?%s&%s' % (subres, s) else: path += '?%s' % s elif subres: path += '?%s' % subres # We can probably remove the assertions at some point and # call self.conn.read_response() directly def read_response(): resp = self.conn.read_response() assert resp.method == method assert resp.path == path return resp use_expect_100c = not self.options.get('disable-expect100', False) try: log.debug('sending %s %s', method, path) if body is None or isinstance(body, (bytes, bytearray, memoryview)): self.conn.send_request(method, path, body=body, headers=headers) else: body_len = os.fstat(body.fileno()).st_size self.conn.send_request(method, path, expect100=use_expect_100c, headers=headers, body=BodyFollowing(body_len)) if use_expect_100c: resp = read_response() if resp.status != 100: # Error return resp try: copyfileobj(body, self.conn, BUFSIZE) except ConnectionClosed: # Server closed connection while we were writing body data - # but we may still be able to read an error response try: resp = read_response() except ConnectionClosed: # No server response available pass else: if resp.status >= 400: # Got error response return resp log.warning( 'Server broke connection during upload, but signaled ' '%d %s', resp.status, resp.reason) # Re-raise first ConnectionClosed exception raise return read_response() except Exception as exc: if is_temp_network_error(exc): # We probably can't use the connection anymore self.conn.disconnect() raise
def _do_request(self, method, path, subres=None, query_string=None, headers=None, body=None): '''Send request, read and return response object''' log.debug('started with %s %s?%s, qs=%s', method, path, subres, query_string) if headers is None: headers = CaseInsensitiveDict() if isinstance(body, (bytes, bytearray, memoryview)): headers['Content-MD5'] = md5sum_b64(body) redirect_count = 0 this_method = method while True: resp = self._send_request(this_method, path, headers=headers, subres=subres, query_string=query_string, body=body) if (resp.status < 300 or resp.status > 399): break # Assume redirect redirect_count += 1 if redirect_count > 10: raise RuntimeError('Too many chained redirections') # First try location header... new_url = resp.headers['Location'] if new_url: # Discard body self.conn.discard() # Pylint can't infer SplitResult Types #pylint: disable=E1103 o = urlsplit(new_url) if o.scheme: if self.ssl_context and o.scheme != 'https': raise RuntimeError('Redirect to non-https URL') elif not self.ssl_context and o.scheme != 'http': raise RuntimeError('Redirect to non-http URL') if o.hostname != self.hostname or o.port != self.port: self.hostname = o.hostname self.port = o.port self.conn.disconnect() self.conn = self._get_conn() else: raise RuntimeError( 'Redirect to different path on same host') # ..but endpoint may also be hidden in message body. # If we have done a HEAD request, we have to change to GET # to actually retrieve the body. elif resp.method == 'HEAD': log.debug('Switching from HEAD to GET to read redirect body') this_method = 'GET' # Try to read new URL from request body else: tree = self._parse_xml_response(resp) new_url = tree.findtext('Endpoint') if not new_url: raise get_S3Error(tree.findtext('Code'), tree.findtext('Message'), resp.headers) self.hostname = new_url self.conn.disconnect() self.conn = self._get_conn() # Update method this_method = method log.info('_do_request(): redirected to %s', self.conn.hostname) if body and not isinstance(body, (bytes, bytearray, memoryview)): body.seek(0) # At the end, the request should have gone out with the right # method if this_method != method: raise RuntimeError( 'Dazed and confused - HEAD fails but GET works?') # Success if resp.status >= 200 and resp.status <= 299: return resp # Error self._parse_error_response(resp)
def _do_request(self, method, path, conn, headers=None, body=None, auth_token=None, download_body=True, body_size=None): """Send request, read and return response object This method modifies the *headers* dictionary. conn must by a HTTPConnection When download_body is True, need to receive data before making new connection """ def _debug_body(b): if isinstance(b, str): return b elif b is None: return "None" else: return 'byte_body' def _debug_hostname(c): try: return c.hostname except: return "None" log.debug('started with %r, %r, %r, %r, %r', method, _debug_hostname(conn), path, headers, _debug_body(body)) if headers is None: headers = CaseInsensitiveDict() if auth_token is None: headers['Authorization'] = self.auth_token else: headers['Authorization'] = auth_token if self.test_string: headers['X-Bz-Test-Mode'] = self.test_string try: if isinstance(body, io.FileIO): if body_size is None: raise ValueError( "Body size is necessary when uploading from file") conn.send_request(method, path, headers=headers, body=BodyFollowing(body_size)) while True: buf = body.read(BUFSIZE) if not buf: break conn.write(buf) else: conn.send_request(method, path, headers=headers, body=body) resp = conn.read_response() if download_body or resp.status != 200: body = conn.readall() else: # caller need to download body itself before making new request body = None except Exception as exc: if is_temp_network_error(exc): # We probably can't use the connection anymore conn.disconnect() raise if resp.status == 200 or resp.status == 206: return resp, body try: # error code is in body j = json.loads(str(body, encoding='UTF-8')) except ValueError: raise HTTPError(resp.status, resp.reason, resp.headers) # Expired auth token if resp.status == 401: if j['code'] == 'expired_auth_token': log.info( 'BackBlaze auth token seems to have expired, requesting new one.' ) self.conn_api.disconnect() self.conn_download.disconnect() # Force constructing a new connection with a new token, otherwise # the connection will be reestablished with the same token. self.conn_api = None self.conn_download = None self._login() raise AuthenticationExpired(j['message']) else: raise AuthorizationError(j['message']) # File not found if resp.status == 404: raise NoSuchObject(path) # Backend error raise B2Error(j['status'], j['code'], j['message'], headers=headers)