def switch_host_with_param(request, param_name): request_json = json.loads(request.data.decode('utf-8')) if request_json.get(param_name): new_endpoint = request_json[param_name] new_endpoint_components = urlsplit(new_endpoint) original_endpoint = request.url original_endpoint_components = urlsplit(original_endpoint) final_endpoint_components = (new_endpoint_components.scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, '') final_endpoint = urlunsplit(final_endpoint_components) request.url = final_endpoint
def _get_new_endpoint(original_endpoint, new_endpoint, use_new_scheme=True): new_endpoint_components = urlsplit(new_endpoint) original_endpoint_components = urlsplit(original_endpoint) scheme = original_endpoint_components.scheme if use_new_scheme: scheme = new_endpoint_components.scheme final_endpoint_components = (scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, '') final_endpoint = urlunsplit(final_endpoint_components) logger.debug('Updating URI from %s to %s' % (original_endpoint, final_endpoint)) return final_endpoint
def _switch_hosts(request, new_endpoint, use_new_scheme=True): new_endpoint_components = urlsplit(new_endpoint) original_endpoint = request.url original_endpoint_components = urlsplit(original_endpoint) scheme = original_endpoint_components.scheme if use_new_scheme: scheme = new_endpoint_components.scheme final_endpoint_components = (scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, '') final_endpoint = urlunsplit(final_endpoint_components) logger.debug('Updating URI from %s to %s' % (request.url, final_endpoint)) request.url = final_endpoint
def _apply_signing_changes(self, aws_request, signed_crt_request): # Apply changes from signed CRT request to the AWSRequest super()._apply_signing_changes(aws_request, signed_crt_request) signed_query = urlsplit(signed_crt_request.path).query p = urlsplit(aws_request.url) # urlsplit() returns a tuple (and therefore immutable) so we # need to create new url with the new query string. # <part> - <index> # scheme - 0 # netloc - 1 # path - 2 # query - 3 <-- we're replacing this. # fragment - 4 aws_request.url = urlunsplit((p[0], p[1], p[2], signed_query, p[4]))
def _get_new_endpoint(original_endpoint, new_endpoint, use_new_scheme=True): new_endpoint_components = urlsplit(new_endpoint) original_endpoint_components = urlsplit(original_endpoint) scheme = original_endpoint_components.scheme if use_new_scheme: scheme = new_endpoint_components.scheme final_endpoint_components = ( scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, "", ) final_endpoint = urlunsplit(final_endpoint_components) logger.debug("Updating URI from %s to %s" % (original_endpoint, final_endpoint)) return final_endpoint
def add_auth(self, request): split = urlsplit(request.url) logger.debug('Method: %s' % request.method) signature = self.get_signature(request.method, split, request.headers) request.headers['Authorization'] = ("AWS %s:%s" % (self.credentials.access_key, signature))
def _crt_request_from_aws_request(self, aws_request): url_parts = urlsplit(aws_request.url) crt_path = url_parts.path if url_parts.path else '/' if aws_request.params: array = [] for (param, value) in aws_request.params.items(): value = str(value) array.append('%s=%s' % (param, value)) crt_path = crt_path + '?' + '&'.join(array) elif url_parts.query: crt_path = '%s?%s' % (crt_path, url_parts.query) crt_headers = awscrt.http.HttpHeaders(aws_request.headers.items()) # CRT requires body (if it exists) to be an I/O stream. crt_body_stream = None if aws_request.body: if hasattr(aws_request.body, 'seek'): crt_body_stream = aws_request.body else: crt_body_stream = BytesIO(aws_request.body) crt_request = awscrt.http.HttpRequest(method=aws_request.method, path=crt_path, headers=crt_headers, body_stream=crt_body_stream) return crt_request
def _urljoin(endpoint_url, url_path, host_prefix): p = urlsplit(endpoint_url) # <part> - <index> # scheme - p[0] # netloc - p[1] # path - p[2] # query - p[3] # fragment - p[4] if not url_path or url_path == '/': # If there's no path component, ensure the URL ends with # a '/' for backwards compatibility. if not p[2]: new_path = '/' else: new_path = p[2] elif p[2].endswith('/') and url_path.startswith('/'): new_path = p[2][:-1] + url_path else: new_path = p[2] + url_path new_netloc = p[1] if host_prefix is not None: new_netloc = host_prefix + new_netloc reconstructed = urlunsplit((p[0], new_netloc, new_path, p[3], p[4])) return reconstructed
def switch_host_with_param(request, param_name): request_json = json.loads(request.data.decode('utf-8')) if request_json.get(param_name): new_endpoint = request_json[param_name] new_endpoint_components = urlsplit(new_endpoint) original_endpoint = request.url original_endpoint_components = urlsplit(original_endpoint) final_endpoint_components = ( new_endpoint_components.scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, '' ) final_endpoint = urlunsplit(final_endpoint_components) request.url = final_endpoint
def _crt_request_from_aws_request(self, aws_request): url_parts = urlsplit(aws_request.url) crt_path = url_parts.path if url_parts.query: crt_path = '%s?%s' % (crt_path, url_parts.query) headers_list = [] for name, value in aws_request.headers.items(): if isinstance(value, str): headers_list.append((name, value)) else: headers_list.append((name, str(value, 'utf-8'))) crt_headers = awscrt.http.HttpHeaders(headers_list) # CRT requires body (if it exists) to be an I/O stream. crt_body_stream = None if aws_request.body: if hasattr(aws_request.body, 'seek'): crt_body_stream = aws_request.body else: crt_body_stream = BytesIO(aws_request.body) crt_request = awscrt.http.HttpRequest( method=aws_request.method, path=crt_path, headers=crt_headers, body_stream=crt_body_stream) return crt_request
def _is_s3_accelerate(self, endpoint_url, s3_config): # Accelerate has been explicitly configured. if s3_config is not None and s3_config.get('use_accelerate_endpoint'): return True # Accelerate mode is turned on automatically if an endpoint url is # provided that matches the accelerate scheme. if endpoint_url is None: return False # Accelerate is only valid for Amazon endpoints. netloc = urlsplit(endpoint_url).netloc if not netloc.endswith('amazonaws.com'): return False # The first part of the url should always be s3-accelerate. parts = netloc.split('.') if parts[0] != 's3-accelerate': return False # Url parts between 's3-accelerate' and 'amazonaws.com' which # represent different url features. feature_parts = parts[1:-2] # There should be no duplicate url parts. if len(feature_parts) != len(set(feature_parts)): return False # Remaining parts must all be in the whitelist. return all(p in S3_ACCELERATE_WHITELIST for p in feature_parts)
def authenticate_presign_url_signv2(method, path, headers, data, url, query_params, request_dict): # Calculating Signature aws_request = create_request_object(request_dict) credentials = Credentials(access_key=TEST_AWS_ACCESS_KEY_ID, secret_key=TEST_AWS_SECRET_ACCESS_KEY) auth = HmacV1QueryAuth(credentials=credentials, expires=query_params['Expires'][0]) split = urlsplit(aws_request.url) string_to_sign = auth.get_string_to_sign(method=method, split=split, headers=aws_request.headers) signature = auth.get_signature(string_to_sign=string_to_sign) # Comparing the signature in url with signature we calculated query_sig = urlparse.unquote(query_params['Signature'][0]) if query_sig != signature: return requests_error_response_xml_signature_calculation( code=403, code_string='SignatureDoesNotMatch', aws_access_token=TEST_AWS_ACCESS_KEY_ID, string_to_sign=string_to_sign, signature=signature, message='The request signature we calculated does not match the signature you provided. \ Check your key and signing method.') # Checking whether the url is expired or not if int(query_params['Expires'][0]) < time.time(): return requests_error_response_xml_signature_calculation( code=403, code_string='AccessDenied', message='Request has expired', expires=query_params['Expires'][0] )
def _modify_request_before_signing(self, request): # We automatically set this header, so if it's the auto-set value we # want to get rid of it since it doesn't make sense for presigned urls. content_type = request.headers.get('content-type') blacklisted_content_type = ( 'application/x-www-form-urlencoded; charset=utf-8' ) if content_type == blacklisted_content_type: del request.headers['content-type'] # Note that we're not including X-Amz-Signature. # From the docs: "The Canonical Query String must include all the query # parameters from the preceding table except for X-Amz-Signature. signed_headers = self.signed_headers(self.headers_to_sign(request)) auth_params = { 'X-Amz-Algorithm': 'AWS4-HMAC-SHA256', 'X-Amz-Credential': self.scope(request), 'X-Amz-Date': request.context['timestamp'], 'X-Amz-Expires': self._expires, 'X-Amz-SignedHeaders': signed_headers, } if self.credentials.token is not None: auth_params['X-Amz-Security-Token'] = self.credentials.token # Now parse the original query string to a dict, inject our new query # params, and serialize back to a query string. url_parts = urlsplit(request.url) # parse_qs makes each value a list, but in our case we know we won't # have repeated keys so we know we have single element lists which we # can convert back to scalar values. query_dict = dict( [(k, v[0]) for k, v in parse_qs(url_parts.query, keep_blank_values=True).items()]) # The spec is particular about this. It *has* to be: # https://<endpoint>?<operation params>&<auth params> # You can't mix the two types of params together, i.e just keep doing # new_query_params.update(op_params) # new_query_params.update(auth_params) # percent_encode_sequence(new_query_params) operation_params = '' if request.data: # We also need to move the body params into the query string. To # do this, we first have to convert it to a dict. query_dict.update(self._get_body_as_dict(request)) request.data = '' if query_dict: operation_params = percent_encode_sequence(query_dict) + '&' new_query_string = (operation_params + percent_encode_sequence(auth_params)) # url_parts is a tuple (and therefore immutable) so we need to create # a new url_parts with the new query string. # <part> - <index> # scheme - 0 # netloc - 1 # path - 2 # query - 3 <-- we're replacing this. # fragment - 4 p = url_parts new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def add_auth(self, request): logger.debug("Calculating signature using hmacv1 auth.") split = urlsplit(request.url) logger.debug('HTTP request method: %s', request.method) signature = self.get_signature(request.method, split, request.headers) request.headers['Authorization'] = ( "AWS %s:%s" % (self.credentials.access_key, signature))
def _inject_signature(self, request, signature): query_dict = {} query_dict['AWSAccessKeyId'] = self.credentials.access_key query_dict['Signature'] = signature for header_key in request.headers: lk = header_key.lower() # For query string requests, Expires is used instead of the # Date header. if header_key == 'Date': query_dict['Expires'] = request.headers['Date'] # We only want to include relevant headers in the query string. # These can be anything that starts with x-amz, is Content-MD5, # or is Content-Type. elif lk.startswith('x-amz-') or lk in ['content-md5', 'content-type']: query_dict[lk] = request.headers[lk] # Combine all of the identified headers into an encoded # query string new_query_string = percent_encode_sequence(query_dict) # Create a new url with the presigned url. p = urlsplit(request.url) if p[3]: # If there was a pre-existing query string, we should # add that back before injecting the new query string. new_query_string ='%s&%s' % (p[3], new_query_string) new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def _inject_signature(self, request, signature): query_dict = {} query_dict['AWSAccessKeyId'] = self.credentials.access_key query_dict['Signature'] = signature for header_key in request.headers: lk = header_key.lower() # For query string requests, Expires is used instead of the # Date header. if header_key == 'Date': query_dict['Expires'] = request.headers['Date'] # We only want to include relevant headers in the query string. # These can be anything that starts with x-amz, is Content-MD5, # or is Content-Type. elif lk.startswith('x-amz-') or lk in [ 'content-md5', 'content-type' ]: query_dict[lk] = request.headers[lk] # Combine all of the identified headers into an encoded # query string new_query_string = percent_encode_sequence(query_dict) # Create a new url with the presigned url. p = urlsplit(request.url) if p[3]: # If there was a pre-existing query string, we should # add that back before injecting the new query string. new_query_string = '%s&%s' % (p[3], new_query_string) new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def create_request_from_raw_request(raw_request): request = AWSRequest() raw = RawHTTPRequest(raw_request) if raw.error_code is not None: raise Exception(raw.error_message) request.method = raw.command datetime_now = DATE request.context['timestamp'] = datetime_now.strftime('%Y%m%dT%H%M%SZ') for key, val in raw.headers.items(): request.headers[key] = val request.data = raw.rfile.read() host = raw.headers.get('host', '') # For whatever reason, the BaseHTTPRequestHandler encodes # the first line of the response as 'iso-8859-1', # so we need decode this into utf-8. if isinstance(raw.path, six.text_type): raw.path = raw.path.encode('iso-8859-1').decode('utf-8') url = 'https://%s%s' % (host, raw.path) if '?' in url: split_url = urlsplit(url) params = dict(parse_qsl(split_url.query)) request.url = split_url.path request.params = params else: request.url = url return request
def fix_s3_host(event_name, endpoint, request, auth, **kwargs): """ This handler looks at S3 requests just before they are signed. If there is a bucket name on the path (true for everything except ListAllBuckets) it checks to see if that bucket name conforms to the DNS naming conventions. If it does, it alters the request to use ``virtual hosting`` style addressing rather than ``path-style`` addressing. This allows us to avoid 301 redirects for all bucket names that can be CNAME'd. """ parts = urlsplit(request.url) auth.auth_path = parts.path path_parts = parts.path.split('/') if len(path_parts) > 1: bucket_name = path_parts[1] logger.debug('Checking for DNS compatible bucket for: %s', request.url) if check_dns_name(bucket_name) and _allowed_region(endpoint.region_name): # If the operation is on a bucket, the auth_path must be # terminated with a '/' character. if len(path_parts) == 2: if auth.auth_path[-1] != '/': auth.auth_path += '/' path_parts.remove(bucket_name) host = bucket_name + '.' + endpoint.service.global_endpoint new_tuple = (parts.scheme, host, '/'.join(path_parts), parts.query, '') new_uri = urlunsplit(new_tuple) request.url = new_uri logger.debug('URI updated to: %s', new_uri) else: logger.debug('Not changing URI, bucket is not DNS compatible: %s', bucket_name)
def retrieve_saml_assertion(self, config): self._validate_config_values(config) endpoint = config['saml_endpoint'] hostname = urlsplit(endpoint).netloc auth_url = 'https://%s/api/v1/authn' % hostname username = config['saml_username'] password = self._password_prompter("Password: ") logger.info( 'Sending HTTP POST with username (%s) and password to Okta API ' 'endpoint: %s', username, auth_url) response = self._requests_session.post(auth_url, headers={ 'Content-Type': 'application/json', 'Accept': 'application/json' }, data=json.dumps({ 'username': username, 'password': password })) parsed = json.loads(response.text) session_token = parsed['sessionToken'] saml_url = endpoint + '?sessionToken=%s' % session_token response = self._requests_session.get(saml_url) logger.info('Received HTTP response of status code: %s', response.status_code) r = self._extract_saml_assertion_from_response(response.text) logger.info('Received the following SAML assertion: \n%s', r, extra={'is_saml_assertion': True}) return r
def calc_signature(self, request, params): logger.debug("Calculating signature using v2 auth.") split = urlsplit(request.url) path = split.path if len(path) == 0: path = '/' string_to_sign = '%s\n%s\n%s\n' % (request.method, split.netloc, path) lhmac = hmac.new(self.credentials.secret_key.encode('utf-8'), digestmod=sha256) pairs = [] for key in sorted(params): # Any previous signature should not be a part of this # one, so we skip that particular key. This prevents # issues during retries. if key == 'Signature': continue value = six.text_type(params[key]) pairs.append( quote(key.encode('utf-8'), safe='') + '=' + quote(value.encode('utf-8'), safe='-_~')) qs = '&'.join(pairs) string_to_sign += qs logger.debug('String to sign: %s', string_to_sign) lhmac.update(string_to_sign.encode('utf-8')) b64 = base64.b64encode(lhmac.digest()).strip().decode('utf-8') return (qs, b64)
def calc_signature(self, request, params): logger.debug("Calculating signature using v2 auth.") split = urlsplit(request.url) path = split.path if len(path) == 0: path = '/' string_to_sign = '%s\n%s\n%s\n' % (request.method, split.netloc, path) lhmac = hmac.new(self.credentials.secret_key.encode('utf-8'), digestmod=sha256) pairs = [] for key in sorted(params): # Any previous signature should not be a part of this # one, so we skip that particular key. This prevents # issues during retries. if key == 'Signature': continue value = six.text_type(params[key]) pairs.append(quote(key.encode('utf-8'), safe='') + '=' + quote(value.encode('utf-8'), safe='-_~')) qs = '&'.join(pairs) string_to_sign += qs logger.debug('String to sign: %s', string_to_sign) lhmac.update(string_to_sign.encode('utf-8')) b64 = base64.b64encode(lhmac.digest()).strip().decode('utf-8') return (qs, b64)
def fix_s3_host(event_name, endpoint, request, auth, **kwargs): """ This handler looks at S3 requests just before they are signed. If there is a bucket name on the path (true for everything except ListAllBuckets) it checks to see if that bucket name conforms to the DNS naming conventions. If it does, it alters the request to use ``virtual hosting`` style addressing rather than ``path-style`` addressing. This allows us to avoid 301 redirects for all bucket names that can be CNAME'd. """ parts = urlsplit(request.url) auth.auth_path = parts.path path_parts = parts.path.split('/') if isinstance(auth, botocore.auth.SigV4Auth): return if len(path_parts) > 1: bucket_name = path_parts[1] logger.debug('Checking for DNS compatible bucket for: %s', request.url) if check_dns_name(bucket_name) and _allowed_region( endpoint.region_name): # If the operation is on a bucket, the auth_path must be # terminated with a '/' character. if len(path_parts) == 2: if auth.auth_path[-1] != '/': auth.auth_path += '/' path_parts.remove(bucket_name) host = bucket_name + '.' + endpoint.service.global_endpoint new_tuple = (parts.scheme, host, '/'.join(path_parts), parts.query, '') new_uri = urlunsplit(new_tuple) request.url = new_uri logger.debug('URI updated to: %s', new_uri) else: logger.debug('Not changing URI, bucket is not DNS compatible: %s', bucket_name)
def add_auth(self, request): logger.debug("Calculating signature using hmacv1 auth.") split = urlsplit(request.url) logger.debug('HTTP request method: %s', request.method) signature = self.get_signature(request.method, split, request.headers) request.headers['Authorization'] = ("AWS %s:%s" % (self.credentials.access_key, signature))
def get_parsed_query_string(self, request): query_string_dict = parse_qs(urlsplit(request.url).query) # Also, parse_qs sets each value in the dict to be a list, but # because we know that we won't have repeated keys, we simplify # the dict and convert it back to a single value. for key in query_string_dict: query_string_dict[key] = query_string_dict[key][0] return query_string_dict
def _switch_hosts(request, new_endpoint, use_new_scheme=True): new_endpoint_components = urlsplit(new_endpoint) original_endpoint = request.url original_endpoint_components = urlsplit(original_endpoint) scheme = original_endpoint_components.scheme if use_new_scheme: scheme = new_endpoint_components.scheme final_endpoint_components = ( scheme, new_endpoint_components.netloc, original_endpoint_components.path, original_endpoint_components.query, '' ) final_endpoint = urlunsplit(final_endpoint_components) logger.debug('Updating URI from %s to %s' % (request.url, final_endpoint)) request.url = final_endpoint
def test_query_string(self): split = urlsplit('/quotes/nelson?uploads') pairs = [ ('Date', 'Thu, 17 Nov 2005 18:49:58 GMT'), ] sig = self.hmacv1.get_signature('PUT', split, HTTPHeaders.from_pairs(pairs)) self.assertEqual(sig, 'P7pBz3Z4p3GxysRSJ/gR8nk7D4o=')
def add_auth(self, request): if self.credentials is None: raise NoCredentialsError logger.debug("Calculating signature using hmacv1 auth.") split = urlsplit(request.url) logger.debug("HTTP request method: %s", request.method) signature = self.get_signature(request.method, split, request.headers) request.headers["Authorization"] = "AWS %s:%s" % (self.credentials.access_key, signature)
def canonical_query_string(self, request): # The query string can come from two parts. One is the # params attribute of the request. The other is from the request # url (in which case we have to re-split the url into its components # and parse out the query string component). if request.params: return self._canonical_query_string_params(request.params) else: return self._canonical_query_string_url(urlsplit(request.url))
def switch_to_virtual_host_style(request, signature_version, default_endpoint_url=None, **kwargs): """ This is a handler to force virtual host style s3 addressing no matter the signature version (which is taken in consideration for the default case). If the bucket is not DNS compatible an InvalidDNSName is thrown. :param request: A AWSRequest object that is about to be sent. :param signature_version: The signature version to sign with :param default_endpoint_url: The endpoint to use when switching to a virtual style. If None is supplied, the virtual host will be constructed from the url of the request. """ if request.auth_path is not None: # The auth_path has already been applied (this may be a # retried request). We don't need to perform this # customization again. return elif _is_get_bucket_location_request(request): # For the GetBucketLocation response, we should not be using # the virtual host style addressing so we can avoid any sigv4 # issues. logger.debug("Request is GetBucketLocation operation, not checking " "for DNS compatibility.") return parts = urlsplit(request.url) request.auth_path = parts.path path_parts = parts.path.split("/") # Retrieve what the endpoint we will be prepending the bucket name to. if default_endpoint_url is None: default_endpoint_url = parts.netloc if len(path_parts) > 1: bucket_name = path_parts[1] if not bucket_name: # If the bucket name is empty we should not be checking for # dns compatibility. return logger.debug("Checking for DNS compatible bucket for: %s", request.url) if check_dns_name(bucket_name): # If the operation is on a bucket, the auth_path must be # terminated with a '/' character. if len(path_parts) == 2: if request.auth_path[-1] != "/": request.auth_path += "/" path_parts.remove(bucket_name) # At the very least the path must be a '/', such as with the # CreateBucket operation when DNS style is being used. If this # is not used you will get an empty path which is incorrect. path = "/".join(path_parts) or "/" global_endpoint = default_endpoint_url host = bucket_name + "." + global_endpoint new_tuple = (parts.scheme, host, path, parts.query, "") new_uri = urlunsplit(new_tuple) request.url = new_uri logger.debug("URI updated to: %s", new_uri) else: raise InvalidDNSNameError(bucket_name=bucket_name)
def _canonical_host(self, url): url_parts = urlsplit(url) default_ports = {'http': 80, 'https': 443} if any(url_parts.scheme == scheme and url_parts.port == port for scheme, port in default_ports.items()): # No need to include the port if it's the default port. return url_parts.hostname # Strip out auth if it's present in the netloc. return url_parts.netloc.rsplit('@', 1)[-1]
def _prepend_to_host(self, url, prefix): url_components = urlsplit(url) parts = url_components.netloc.split('.') parts = [prefix] + parts new_netloc = '.'.join(parts) new_components = (url_components.scheme, new_netloc, url_components.path, url_components.query, '') new_url = urlunsplit(new_components) return new_url
def canonical_request(self, request): cr = [request.method.upper()] path = normalize_url_path(urlsplit(request.url).path) cr.append(path) cr.append(self.canonical_query_string(request)) headers_to_sign = self.headers_to_sign(request) cr.append(self.canonical_headers(headers_to_sign) + '\n') cr.append(self.signed_headers(headers_to_sign)) cr.append(self.payload(request)) return '\n'.join(cr)
def add_auth(self, request): if self.credentials is None: raise NoCredentialsError logger.debug("Calculating signature using hmacv1 auth.") split = urlsplit(request.url) logger.debug('HTTP request method: %s', request.method) signature = self.get_signature(request.method, split, request.headers, auth_path=request.auth_path) self._inject_signature(request, signature)
def quote_source_header(params, **kwargs): if params['headers'] and 'x-amz-copy-source' in params['headers']: value = params['headers']['x-amz-copy-source'] p = urlsplit(value) # We only want to quote the path. If the user specified # extra parts, say '?versionId=myversionid' then that part # should not be quoted. quoted = quote(p[2].encode('utf-8'), '/~') final_source = urlunsplit((p[0], p[1], quoted, p[3], p[4])) params['headers']['x-amz-copy-source'] = final_source
def _convert_to_crt_http_request(self, botocore_http_request): # Logic that does CRTUtils.crt_request_from_aws_request crt_request = self._crt_request_from_aws_request( botocore_http_request) if crt_request.headers.get("host") is None: # If host is not set, set it for the request before using CRT s3 url_parts = urlsplit(botocore_http_request.url) crt_request.headers.set("host", url_parts.netloc) if crt_request.headers.get('Content-MD5') is not None: crt_request.headers.remove("Content-MD5") return crt_request
def test_bucket_operations(self): # Check that the standard operations on buckets that are # specified as query strings end up in the canonical resource. operations = ('acl', 'cors', 'lifecycle', 'policy', 'notification', 'logging', 'tagging', 'requestPayment', 'versioning', 'website') for operation in operations: url = '/quotes?%s' % operation split = urlsplit(url) cr = self.hmacv1.canonical_resource(split) self.assertEqual(cr, '/quotes?%s' % operation)
def _modify_request_before_signing(self, request): # This is our chance to add additional query params we need # before we go about calculating the signature. request.headers = {} request.method = 'GET' # Note that we're not including X-Amz-Signature. # From the docs: "The Canonical Query String must include all the query # parameters from the preceding table except for X-Amz-Signature. auth_params = { 'X-Amz-Algorithm': 'AWS4-HMAC-SHA256', 'X-Amz-Credential': self.scope(request), 'X-Amz-Date': self.timestamp, 'X-Amz-Expires': self._expires, 'X-Amz-SignedHeaders': 'host', } if self.credentials.token is not None: auth_params['X-Amz-Security-Token'] = self.credentials.token # Now parse the original query string to a dict, inject our new query # params, and serialize back to a query string. url_parts = urlsplit(request.url) # parse_qs makes each value a list, but in our case we know we won't # have repeated keys so we know we have single element lists which we # can convert back to scalar values. query_dict = dict( [(k, v[0]) for k, v in parse_qs(url_parts.query).items()]) # The spec is particular about this. It *has* to be: # https://<endpoint>?<operation params>&<auth params> # You can't mix the two types of params together, i.e just keep doing # new_query_params.update(op_params) # new_query_params.update(auth_params) # percent_encode_sequence(new_query_params) operation_params = '' if request.data: # We also need to move the body params into the query string. # request.data will be populated, for example, with query services # which normally form encode the params into the body. # This means that request.data is a dict() of the operation params. query_dict.update(request.data) request.data = '' if query_dict: operation_params = percent_encode_sequence(query_dict) + '&' new_query_string = (operation_params + percent_encode_sequence(auth_params)) # url_parts is a tuple (and therefore immutable) so we need to create # a new url_parts with the new query string. # <part> - <index> # scheme - 0 # netloc - 1 # path - 2 # query - 3 <-- we're replacing this. # fragment - 4 p = url_parts new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def retrieve_saml_assertion(self, config): self._validate_config_values(config) endpoint = config['saml_endpoint'] hostname = urlsplit(endpoint).netloc auth_url = 'https://%s/api/v1/authn' % hostname username = config['saml_username'] password = self._password_prompter("Password: "******"SUCCESS": session_token = parsed['sessionToken'] elif parsed['status'] == "MFA_REQUIRED": okta_mfa = OktaMFA(parsed) session_token = okta_mfa.get_session_token() # print "the session_token is: %s" % session_token else: logger.info('Cannot proceed with authentication.', parsed['status']) # end new code saml_url = endpoint + '?sessionToken=%s' % session_token response = self._requests_session.get(saml_url) logger.info('Received HTTP response of status code: %s', response.status_code) r = self._extract_saml_assertion_from_response(response.text) logger.info('Received the following SAML assertion: \n%s', r, extra={'is_saml_assertion': True}) return r
def _modify_request_before_signing(self, request): # This is our chance to add additional query params we need # before we go about calculating the signature. request.headers = {} request.method = 'GET' # Note that we're not including X-Amz-Signature. # From the docs: "The Canonical Query String must include all the query # parameters from the preceding table except for X-Amz-Signature. auth_params = { 'X-Amz-Algorithm': 'AWS4-HMAC-SHA256', 'X-Amz-Credential': self.scope(request), 'X-Amz-Date': request.context['timestamp'], 'X-Amz-Expires': self._expires, 'X-Amz-SignedHeaders': 'host', } if self.credentials.token is not None: auth_params['X-Amz-Security-Token'] = self.credentials.token # Now parse the original query string to a dict, inject our new query # params, and serialize back to a query string. url_parts = urlsplit(request.url) # parse_qs makes each value a list, but in our case we know we won't # have repeated keys so we know we have single element lists which we # can convert back to scalar values. query_dict = dict([(k, v[0]) for k, v in parse_qs(url_parts.query).items()]) # The spec is particular about this. It *has* to be: # https://<endpoint>?<operation params>&<auth params> # You can't mix the two types of params together, i.e just keep doing # new_query_params.update(op_params) # new_query_params.update(auth_params) # percent_encode_sequence(new_query_params) operation_params = '' if request.data: # We also need to move the body params into the query string. # request.data will be populated, for example, with query services # which normally form encode the params into the body. # This means that request.data is a dict() of the operation params. query_dict.update(request.data) request.data = '' if query_dict: operation_params = percent_encode_sequence(query_dict) + '&' new_query_string = (operation_params + percent_encode_sequence(auth_params)) # url_parts is a tuple (and therefore immutable) so we need to create # a new url_parts with the new query string. # <part> - <index> # scheme - 0 # netloc - 1 # path - 2 # query - 3 <-- we're replacing this. # fragment - 4 p = url_parts new_url_parts = (p[0], p[1], p[2], new_query_string, p[4]) request.url = urlunsplit(new_url_parts)
def _canonical_host(self, url): url_parts = urlsplit(url) default_ports = { 'http': 80, 'https': 443 } if any(url_parts.scheme == scheme and url_parts.port == port for scheme, port in default_ports.items()): # No need to include the port if it's the default port. return url_parts.hostname # Strip out auth if it's present in the netloc. return url_parts.netloc.rsplit('@', 1)[-1]
def test_duplicate_headers(self): pairs = [('Date', 'Thu, 17 Nov 2005 18:49:58 GMT'), ('Content-Md5', 'c8fdb181845a4ca6b8fec737b3581d76'), ('Content-Type', 'text/html'), ('X-Amz-Meta-Author', '*****@*****.**'), ('X-Amz-Meta-Author', '*****@*****.**'), ('X-Amz-Magic', 'abracadabra')] http_headers = HTTPHeaders.from_pairs(pairs) split = urlsplit('/quotes/nelson') sig = self.hmacv1.get_signature('PUT', split, http_headers) self.assertEqual(sig, 'kIdMxyiYB+F+83zYGR6sSb3ICcE=')
def authenticate_presign_url_signv2(method, path, headers, data, url, query_params, request_dict): # Calculating Signature aws_request = create_request_object(request_dict) credentials = Credentials( access_key=TEST_AWS_ACCESS_KEY_ID, secret_key=TEST_AWS_SECRET_ACCESS_KEY, token=query_params.get("X-Amz-Security-Token", None), ) auth = HmacV1QueryAuth(credentials=credentials, expires=query_params["Expires"][0]) split = urlsplit(aws_request.url) string_to_sign = auth.get_string_to_sign(method=method, split=split, headers=aws_request.headers) signature = auth.get_signature(string_to_sign=string_to_sign) # Comparing the signature in url with signature we calculated query_sig = urlparse.unquote(query_params["Signature"][0]) if config.S3_SKIP_SIGNATURE_VALIDATION: if query_sig != signature: LOGGER.warning( "Signatures do not match, but not raising an error, as S3_SKIP_SIGNATURE_VALIDATION=1" ) signature = query_sig if query_sig != signature: return requests_error_response_xml_signature_calculation( code=403, code_string="SignatureDoesNotMatch", aws_access_token=TEST_AWS_ACCESS_KEY_ID, string_to_sign=string_to_sign, signature=signature, message= "The request signature we calculated does not match the signature you provided. \ Check your key and signing method.", ) # Checking whether the url is expired or not if int(query_params["Expires"][0]) < time.time(): if config.S3_SKIP_SIGNATURE_VALIDATION: LOGGER.warning( "Signature is expired, but not raising an error, as S3_SKIP_SIGNATURE_VALIDATION=1" ) else: return requests_error_response_xml_signature_calculation( code=403, code_string="AccessDenied", message="Request has expired", expires=query_params["Expires"][0], )
def headers_to_sign(self, request): """ Select the headers from the request that need to be included in the StringToSign. """ header_map = HTTPHeaders() split = urlsplit(request.url) for name, value in request.headers.items(): lname = name.lower() header_map[lname] = value if 'host' not in header_map: header_map['host'] = split.netloc return header_map
def canonical_request(self, request): cr = [request.method.upper()] path = self._normalize_url_path(urlsplit(request.url).path) cr.append(path) cr.append(self.canonical_query_string(request)) headers_to_sign = self.headers_to_sign(request) cr.append(self.canonical_headers(headers_to_sign) + '\n') cr.append(self.signed_headers(headers_to_sign)) if 'X-Amz-Content-SHA256' in request.headers: body_checksum = request.headers['X-Amz-Content-SHA256'] else: body_checksum = self.payload(request) cr.append(body_checksum) return '\n'.join(cr)
def _prepend_to_host(self, url, prefix): url_components = urlsplit(url) parts = url_components.netloc.split('.') parts = [prefix] + parts new_netloc = '.'.join(parts) new_components = ( url_components.scheme, new_netloc, url_components.path, url_components.query, '' ) new_url = urlunsplit(new_components) return new_url
def assert_presigned_url_matches(self, actual_url, expected_match): """Verify generated presigned URL matches expected dict. This method compares an actual URL against a dict of expected values. The reason that the "expected_match" is a dict instead of the expected presigned URL is because the query params are unordered so we can't guarantee an expected query param ordering. """ parts = urlsplit(actual_url) self.assertEqual(parts.netloc, expected_match['hostname']) self.assertEqual(parts.path, expected_match['path']) query_params = self.parse_query_string(parts.query) self.assertEqual(query_params, expected_match['query_params'])
def fix_s3_host(event_name, endpoint, request, auth, **kwargs): """ This handler looks at S3 requests just before they are signed. If there is a bucket name on the path (true for everything except ListAllBuckets) it checks to see if that bucket name conforms to the DNS naming conventions. If it does, it alters the request to use ``virtual hosting`` style addressing rather than ``path-style`` addressing. This allows us to avoid 301 redirects for all bucket names that can be CNAME'd. """ if request.auth_path is not None: # The auth_path has already been applied (this may be a # retried request). We don't need to perform this # customization again. return elif _is_get_bucket_location_request(request): # For the GetBucketLocation response, we should not be using # the virtual host style addressing so we can avoid any sigv4 # issues. logger.debug("Request is GetBucketLocation operation, not checking " "for DNS compatibility.") return parts = urlsplit(request.url) request.auth_path = parts.path path_parts = parts.path.split('/') if isinstance(auth, botocore.auth.SigV4Auth): return if len(path_parts) > 1: bucket_name = path_parts[1] logger.debug('Checking for DNS compatible bucket for: %s', request.url) if check_dns_name(bucket_name) and _allowed_region(endpoint.region_name): # If the operation is on a bucket, the auth_path must be # terminated with a '/' character. if len(path_parts) == 2: if request.auth_path[-1] != '/': request.auth_path += '/' path_parts.remove(bucket_name) global_endpoint = 's3.amazonaws.com' host = bucket_name + '.' + global_endpoint new_tuple = (parts.scheme, host, '/'.join(path_parts), parts.query, '') new_uri = urlunsplit(new_tuple) request.url = new_uri logger.debug('URI updated to: %s', new_uri) else: logger.debug('Not changing URI, bucket is not DNS compatible: %s', bucket_name)
def add_auth(self, request): if self.credentials is None: raise NoCredentialsError logger.debug("Calculating signature using hmacv1 auth.") split = urlsplit(request.url) logger.debug("HTTP request method: %s", request.method) signature = self.get_signature(request.method, split, request.headers, auth_path=request.auth_path) if "Authorization" in request.headers: # We have to do this because request.headers is not # normal dictionary. It has the (unintuitive) behavior # of aggregating repeated setattr calls for the same # key value. For example: # headers['foo'] = 'a'; headers['foo'] = 'b' # list(headers) will print ['foo', 'foo']. del request.headers["Authorization"] request.headers["Authorization"] = "AWS %s:%s" % (self.credentials.access_key, signature)
def _verify_presigned_url_addressing(region, bucket, key, s3_config, is_secure=True, customer_provided_endpoint=None, expected_url=None, signature_version=None): s3 = _create_s3_client(region=region, is_secure=is_secure, endpoint_url=customer_provided_endpoint, s3_config=s3_config, signature_version=signature_version) url = s3.generate_presigned_url( 'get_object', {'Bucket': bucket, 'Key': key}) # We're not trying to verify the params for URL presigning, # those are tested elsewhere. We just care about the hostname/path. parts = urlsplit(url) actual = '%s://%s%s' % parts[:3] assert_equal(actual, expected_url)
def test_put(self): headers = {'Date': 'Thu, 17 Nov 2005 18:49:58 GMT', 'Content-Md5': 'c8fdb181845a4ca6b8fec737b3581d76', 'Content-Type': 'text/html', 'X-Amz-Meta-Author': '*****@*****.**', 'X-Amz-Magic': 'abracadabra'} http_headers = HTTPHeaders.from_dict(headers) split = urlsplit('/quotes/nelson') cs = self.hmacv1.canonical_string('PUT', split, http_headers) expected_canonical = ( "PUT\nc8fdb181845a4ca6b8fec737b3581d76\ntext/html\n" "Thu, 17 Nov 2005 18:49:58 GMT\nx-amz-magic:abracadabra\n" "x-amz-meta-author:[email protected]\n/quotes/nelson") expected_signature = 'jZNOcbfWmD/A/f3hSvVzXZjM2HU=' self.assertEqual(cs, expected_canonical) sig = self.hmacv1.get_signature('PUT', split, http_headers) self.assertEqual(sig, expected_signature)