def test_use_StorageBucket(): bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") assert bucket.name == "some-bucket" assert bucket.endpoint_url is None assert bucket.region is None assert bucket.private # because it's the default assert bucket.base_url == "https://s3.amazonaws.com/some-bucket" bucket = StorageBucket( "https://s3.amazonaws.com/some-bucket?access=public") assert bucket.name == "some-bucket" assert bucket.endpoint_url is None assert bucket.region is None assert not bucket.private assert bucket.base_url == "https://s3.amazonaws.com/some-bucket" bucket = StorageBucket("https://s3-eu-west-2.amazonaws.com/some-bucket") assert bucket.name == "some-bucket" assert bucket.endpoint_url is None assert bucket.region == "eu-west-2" assert bucket.base_url == "https://s3-eu-west-2.amazonaws.com/some-bucket" bucket = StorageBucket("http://s3.example.com/buck/prfx") assert bucket.name == "buck" assert bucket.endpoint_url == "http://s3.example.com" assert bucket.region is None assert bucket.prefix == "prfx" assert bucket.base_url == "http://s3.example.com/buck" # Just check that __repr__ it works at all assert repr(bucket)
def test_exists_s3(botomock): """exists() returns True when then S3 API returns 200.""" def return_200(self, operation_name, api_params): assert operation_name == "HeadBucket" return {"ReponseMetadata": {"HTTPStatusCode": 200}} bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") with botomock(return_200): assert bucket.exists()
def test_exists_s3_non_client_error_raises(botomock): """exists() raises StorageError when the S3 API raises a non-client error.""" def raise_conn_error(self, operation_name, api_params): assert operation_name == "HeadBucket" raise EndpointConnectionError(endpoint_url="https://s3.amazonaws.com/") bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") with botomock(raise_conn_error), pytest.raises(StorageError): bucket.exists()
def test_region_checking(): bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") assert bucket.region is None # a known and classic one bucket = StorageBucket("https://s3-us-west-2.amazonaws.com/some-bucket") assert bucket.region == "us-west-2" with pytest.raises(ValueError): StorageBucket("https://s3-unheardof.amazonaws.com/some-bucket")
def test_exists_s3_forbidden_raises(botomock): """exists() raises StorageError when the S3 API raises a 403 ClientError.""" def raise_forbidden(self, operation_name, api_params): assert operation_name == "HeadBucket" parsed_response = {"Error": {"Code": "403", "Message": "Forbidden"}} raise ClientError(parsed_response, operation_name) bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") with botomock(raise_forbidden), pytest.raises(StorageError): bucket.exists()
def get_bucket_info(user, try_symbols=None, preferred_bucket_name=None): """return an object that has 'bucket', 'endpoint_url', 'region'. Only 'bucket' is mandatory in the response object. """ if try_symbols is None: # If it wasn't explicitly passed, we need to figure this out by # looking at the user who uploads. # Namely, we're going to see if the user has the permission # 'upload.upload_symbols'. If the user does, it means the user intends # to *not* upload Try build symbols. # This is based on the axiom that, if the upload is made with an # API token, that API token can't have *both* the # 'upload.upload_symbols' permission *and* the # 'upload.upload_try_symbols' permission. # If the user uploads via the web the user has a choice to check # a checkbox that is off by default. If doing so, the user isn't # using an API token, so the user might have BOTH permissions. # Then the default falls on this NOT being a Try upload. try_symbols = not user.has_perm("upload.upload_symbols") if try_symbols: url = settings.UPLOAD_TRY_SYMBOLS_URL else: url = settings.UPLOAD_DEFAULT_URL exceptions = settings.UPLOAD_URL_EXCEPTIONS if preferred_bucket_name: # If the user has indicated a preferred bucket name, check that they have # permission to use it. for url, _ in get_possible_bucket_urls(user): if preferred_bucket_name in url: return StorageBucket(url, try_symbols=try_symbols) raise NoPossibleBucketName(preferred_bucket_name) else: if user.email.lower() in exceptions: # easy exception = exceptions[user.email.lower()] else: # match against every possible wildcard exception = None # assume no match for email_or_wildcard in settings.UPLOAD_URL_EXCEPTIONS: if fnmatch.fnmatch(user.email.lower(), email_or_wildcard.lower()): # a match! exception = settings.UPLOAD_URL_EXCEPTIONS[ email_or_wildcard] break if exception: url = exception return StorageBucket(url, try_symbols=try_symbols)
def test_exists_s3_not_found(botomock): """exists() returns False when the S3 API raises a 404 ClientError.""" def raise_not_found(self, operation_name, api_params): assert operation_name == "HeadBucket" parsed_response = { "Error": { "Code": "404", "Message": "The specified bucket does not exist" } } raise ClientError(parsed_response, operation_name) bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") with botomock(raise_not_found): assert not bucket.exists()
def upload_microsoft_symbol(symbol, debugid, file_path, download_obj): filename = os.path.splitext(symbol)[0] uri = f"{symbol}/{debugid}/{filename}.sym" key_name = os.path.join(settings.SYMBOL_FILE_PREFIX, uri) bucket_info = StorageBucket(settings.UPLOAD_DEFAULT_URL) s3_client = bucket_info.client bucket_name = bucket_info.name # The upload_file_upload creates an instance but doesn't save it file_upload = upload_file_upload(s3_client, bucket_name, key_name, file_path, microsoft_download=True) # The _create_file_upload() function might return None # which means it decided there is no need to make an upload # of this specific file. if file_upload: download_obj.skipped = False download_obj.file_upload = file_upload metrics.incr("download_microsoft_download_file_upload_upload", 1) else: download_obj.skipped = True logger.info(f"Skipped key {key_name}") metrics.incr("download_microsoft_download_file_upload_skip", 1) download_obj.completed_at = timezone.now() download_obj.save() # We need to inform the symbolicate app, that some new symbols # were uploaded. symbol_key = (symbol, debugid) invalidate_symbolicate_cache([symbol_key])
def _check_storage_urls(): """If you use minio to functionally test S3, since it's ephemeral the buckets you create disappear after a restart. Make sure they exist. That's what we expect to happen with the real production S3 buckets. """ _all_possible_urls = set( list(settings.SYMBOL_URLS) + [settings.UPLOAD_DEFAULT_URL, settings.UPLOAD_TRY_SYMBOLS_URL] + list(settings.UPLOAD_URL_EXCEPTIONS.values()) ) for url in _all_possible_urls: if not url or "minio" not in urlparse(url).netloc: continue bucket = StorageBucket(url) try: bucket.client.head_bucket(Bucket=bucket.name) except ClientError as exception: if exception.response["Error"]["Code"] == "404": bucket.client.create_bucket(Bucket=bucket.name) logger.info(f"Created minio bucket {bucket.name!r}") else: # The most comment problem is that the S3 doesn't match # the AWS credentials configured. # If that's the case, this will raise a 403 Forbidden # ClientError. raise
def test_check_storage_urls_storageerror(exception, settings): fake_bucket = StorageBucket(url=settings.SYMBOL_URLS[0]) error = StorageError(bucket=fake_bucket, backend_error=exception) with patch("tecken.storage.StorageBucket.exists", side_effect=error): errors = dockerflow_extra.check_storage_urls(None) assert len(errors) == 2 for error in errors: assert str(exception) in error.msg assert error.id == "tecken.health.E002"
def test_init(url, expected): """The URL is processed during initialization.""" bucket = StorageBucket(url) assert bucket.backend == expected["backend"] assert bucket.base_url == expected["base_url"] assert bucket.endpoint_url == expected["endpoint_url"] assert bucket.name == expected["name"] assert bucket.prefix == expected["prefix"] assert bucket.private == expected["private"] assert bucket.region == expected["region"] assert repr(bucket)
def test_storageerror_msg(): """The StorageError message includes the URL and the backend error message.""" bucket = StorageBucket( "https://s3.amazonaws.com/some-bucket?access=public") parsed_response = {"Error": {"Code": "403", "Message": "Forbidden"}} backend_error = ClientError(parsed_response, "HeadBucket") error = StorageError(bucket, backend_error) expected = ( "s3 backend (https://s3.amazonaws.com/some-bucket?access=public)" " raised ClientError: An error occurred (403) when calling the HeadBucket" " operation: Forbidden") assert str(error) == expected
def check_url(url, setting_key): if url in checked: return bucket = StorageBucket(url) if not bucket.private: return try: if not bucket.exists(): errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}), " f"because bucket not found", id="tecken.health.E001", )) except StorageError as error: errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}), " f"due to {error.backend_msg}", id="tecken.health.E002", )) else: checked.append(url)
def test_StorageBucket_client(): mock_session = mock.Mock() client_kwargs_calls = [] client_args_calls = [] def get_client(*args, **kwargs): client_args_calls.append(args) client_kwargs_calls.append(kwargs) return mock.Mock() mock_session.client.side_effect = get_client def new_session(): return mock_session with mock.patch("tecken.storage.boto3.session.Session", new=new_session): bucket = StorageBucket("https://s3.amazonaws.com/some-bucket") client = bucket.client client_again = bucket.client assert client_again is client # Only 1 session should have been created assert len(mock_session.mock_calls) == 1 assert "endpoint_url" not in client_kwargs_calls[-1] # make a client that requires an endpoint_url bucket = StorageBucket("http://s3.example.com/buck/prefix") bucket.client assert client_kwargs_calls[-1]["endpoint_url"] == ( "http://s3.example.com") # make a client that requires a different region bucket = StorageBucket( "https://s3-eu-west-2.amazonaws.com/some-bucket") bucket.client assert client_kwargs_calls[-1]["region_name"] == ("eu-west-2")
def check_url(url, setting_key): if url in checked: return bucket = StorageBucket(url) if not bucket.private: return if bucket.is_google_cloud_storage: try: bucket.client.get_bucket(bucket.name) checked.append(url) except google_BadRequest as exception: errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}, " f"because bucket not found due to {exception}", id="tecken.health.E003", )) else: try: bucket.client.head_bucket(Bucket=bucket.name) except ClientError as exception: if exception.response["Error"]["Code"] in ("403", "404"): errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}, " f"found in settings.{setting_key}) due to " f"ClientError ({exception.response!r})", id="tecken.health.E002", )) else: raise except EndpointConnectionError: errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}, " f"found in settings.{setting_key}) due to " f"EndpointConnectionError", id="tecken.health.E001", )) else: checked.append(url)
def _get_sources(self): for url in self.urls: # The URL is expected to have the bucket name as the first # part of the pathname. # In the future we might expand to a more elaborate scheme. yield StorageBucket(url, file_prefix=self.file_prefix)
def caching_vs_boto(request): """ Measure the time it takes to do lots of reads with our caches. And contrast that with how long it takes to do boto queries. """ if not settings.BENCHMARKING_ENABLED and not request.user.is_superuser: raise PermissionDenied("benchmarking disabled") form = forms.CachingVsBotoForm(request.GET, all_measure=["boto", "default", "store"]) if not form.is_valid(): return http.JsonResponse({"errors": form.errors}, status=400) # Benchmarking parameters. iterations = form.cleaned_data["iterations"] symbol_path = form.cleaned_data["symbol_path"] measure = form.cleaned_data["measure"] # Setting up for boto lookup. s3_key = f"{settings.SYMBOL_FILE_PREFIX}/{symbol_path}" s3_info = StorageBucket(settings.SYMBOL_URLS[0]) s3_client = s3_info.s3_client bucket_name = s3_info.name def lookup_boto(key): response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=key) for obj in response.get("Contents", []): if obj["Key"] == key: # It exists! return True return False context = {} # Run it once. found = lookup_boto(s3_key) # Prime the caches with this finding. cache_key = hashlib.md5( force_bytes(f"benchmarking:caching_vs_boto:{s3_key}")).hexdigest() cache_configs = ("default", "store") for cache_config in cache_configs: if cache_config in measure: caches[cache_config].set(cache_key, found, 60) # Now run it 'iterations' times and measure. times = {key: [] for key in measure} for _ in range(iterations): if "boto" in measure: with metrics.timer("benchmarking_cachingvsboto_boto"): t0 = time.time() lookup_boto(s3_key) t1 = time.time() times["boto"].append(t1 - t0) for cache_config in cache_configs: if cache_config not in measure: continue with metrics.timer(f"benchmarking_cachingvsboto_{cache_config}"): t0 = time.time() caches[cache_config].get(cache_key) t1 = time.time() times[cache_config].append(t1 - t0) def summorize(numbers): return { "calls": len(numbers), "sum": sum(numbers), "mean": statistics.mean(numbers), "median": statistics.median(numbers), } context["found_in_s3"] = found context["measure"] = measure context["results"] = {key: summorize(times[key]) for key in measure} return http.JsonResponse(context)
def test_google_cloud_storage_client(gcsmock): bucket = StorageBucket("https://storage.googleapis.com/foo-bar-bucket") assert bucket.name == "foo-bar-bucket" client = bucket.get_storage_client() assert isinstance(client, google_Client)
def test_init_file_prefix(url, file_prefix, prefix): """A file_prefix is optionally combined with the URL prefix.""" bucket = StorageBucket(url, file_prefix=file_prefix) assert bucket.prefix == prefix
def test_init_unknown_backend_raises(): """An exception is raised if the backend can't be determined from the URL.""" with pytest.raises(ValueError): StorageBucket("https://unknown-backend.example.com/some-bucket")
def test_init_unknown_region_raises(): """An exception is raised by a S3 URL with an unknown region.""" with pytest.raises(ValueError): StorageBucket("https://s3-unheardof.amazonaws.com/some-bucket")
def test_google_cloud_storage_client_with_prefix(): bucket = StorageBucket( "https://storage.googleapis.com/foo-bar-bucket/myprefix") assert bucket.name == "foo-bar-bucket" assert bucket.prefix == "myprefix"