Example #1
0
def test_use_StorageBucket():
    bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
    assert bucket.name == "some-bucket"
    assert bucket.endpoint_url is None
    assert bucket.region is None
    assert bucket.private  # because it's the default
    assert bucket.base_url == "https://s3.amazonaws.com/some-bucket"

    bucket = StorageBucket(
        "https://s3.amazonaws.com/some-bucket?access=public")
    assert bucket.name == "some-bucket"
    assert bucket.endpoint_url is None
    assert bucket.region is None
    assert not bucket.private
    assert bucket.base_url == "https://s3.amazonaws.com/some-bucket"

    bucket = StorageBucket("https://s3-eu-west-2.amazonaws.com/some-bucket")
    assert bucket.name == "some-bucket"
    assert bucket.endpoint_url is None
    assert bucket.region == "eu-west-2"
    assert bucket.base_url == "https://s3-eu-west-2.amazonaws.com/some-bucket"

    bucket = StorageBucket("http://s3.example.com/buck/prfx")
    assert bucket.name == "buck"
    assert bucket.endpoint_url == "http://s3.example.com"
    assert bucket.region is None
    assert bucket.prefix == "prfx"
    assert bucket.base_url == "http://s3.example.com/buck"

    # Just check that __repr__ it works at all
    assert repr(bucket)
Example #2
0
def test_exists_s3(botomock):
    """exists() returns True when then S3 API returns 200."""
    def return_200(self, operation_name, api_params):
        assert operation_name == "HeadBucket"
        return {"ReponseMetadata": {"HTTPStatusCode": 200}}

    bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
    with botomock(return_200):
        assert bucket.exists()
Example #3
0
def test_exists_s3_non_client_error_raises(botomock):
    """exists() raises StorageError when the S3 API raises a non-client error."""
    def raise_conn_error(self, operation_name, api_params):
        assert operation_name == "HeadBucket"
        raise EndpointConnectionError(endpoint_url="https://s3.amazonaws.com/")

    bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
    with botomock(raise_conn_error), pytest.raises(StorageError):
        bucket.exists()
Example #4
0
def test_region_checking():
    bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
    assert bucket.region is None

    # a known and classic one
    bucket = StorageBucket("https://s3-us-west-2.amazonaws.com/some-bucket")
    assert bucket.region == "us-west-2"

    with pytest.raises(ValueError):
        StorageBucket("https://s3-unheardof.amazonaws.com/some-bucket")
Example #5
0
def test_exists_s3_forbidden_raises(botomock):
    """exists() raises StorageError when the S3 API raises a 403 ClientError."""
    def raise_forbidden(self, operation_name, api_params):
        assert operation_name == "HeadBucket"
        parsed_response = {"Error": {"Code": "403", "Message": "Forbidden"}}
        raise ClientError(parsed_response, operation_name)

    bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
    with botomock(raise_forbidden), pytest.raises(StorageError):
        bucket.exists()
Example #6
0
def get_bucket_info(user, try_symbols=None, preferred_bucket_name=None):
    """return an object that has 'bucket', 'endpoint_url',
    'region'.
    Only 'bucket' is mandatory in the response object.
    """

    if try_symbols is None:
        # If it wasn't explicitly passed, we need to figure this out by
        # looking at the user who uploads.
        # Namely, we're going to see if the user has the permission
        # 'upload.upload_symbols'. If the user does, it means the user intends
        # to *not* upload Try build symbols.
        # This is based on the axiom that, if the upload is made with an
        # API token, that API token can't have *both* the
        # 'upload.upload_symbols' permission *and* the
        # 'upload.upload_try_symbols' permission.
        # If the user uploads via the web the user has a choice to check
        # a checkbox that is off by default. If doing so, the user isn't
        # using an API token, so the user might have BOTH permissions.
        # Then the default falls on this NOT being a Try upload.
        try_symbols = not user.has_perm("upload.upload_symbols")

    if try_symbols:
        url = settings.UPLOAD_TRY_SYMBOLS_URL
    else:
        url = settings.UPLOAD_DEFAULT_URL

    exceptions = settings.UPLOAD_URL_EXCEPTIONS
    if preferred_bucket_name:
        # If the user has indicated a preferred bucket name, check that they have
        # permission to use it.
        for url, _ in get_possible_bucket_urls(user):
            if preferred_bucket_name in url:
                return StorageBucket(url, try_symbols=try_symbols)
        raise NoPossibleBucketName(preferred_bucket_name)
    else:
        if user.email.lower() in exceptions:
            # easy
            exception = exceptions[user.email.lower()]
        else:
            # match against every possible wildcard
            exception = None  # assume no match
            for email_or_wildcard in settings.UPLOAD_URL_EXCEPTIONS:
                if fnmatch.fnmatch(user.email.lower(),
                                   email_or_wildcard.lower()):
                    # a match!
                    exception = settings.UPLOAD_URL_EXCEPTIONS[
                        email_or_wildcard]
                    break
        if exception:
            url = exception

    return StorageBucket(url, try_symbols=try_symbols)
Example #7
0
def test_exists_s3_not_found(botomock):
    """exists() returns False when the S3 API raises a 404 ClientError."""
    def raise_not_found(self, operation_name, api_params):
        assert operation_name == "HeadBucket"
        parsed_response = {
            "Error": {
                "Code": "404",
                "Message": "The specified bucket does not exist"
            }
        }
        raise ClientError(parsed_response, operation_name)

    bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
    with botomock(raise_not_found):
        assert not bucket.exists()
Example #8
0
def upload_microsoft_symbol(symbol, debugid, file_path, download_obj):
    filename = os.path.splitext(symbol)[0]
    uri = f"{symbol}/{debugid}/{filename}.sym"
    key_name = os.path.join(settings.SYMBOL_FILE_PREFIX, uri)

    bucket_info = StorageBucket(settings.UPLOAD_DEFAULT_URL)
    s3_client = bucket_info.client
    bucket_name = bucket_info.name

    # The upload_file_upload creates an instance but doesn't save it
    file_upload = upload_file_upload(s3_client,
                                     bucket_name,
                                     key_name,
                                     file_path,
                                     microsoft_download=True)
    # The _create_file_upload() function might return None
    # which means it decided there is no need to make an upload
    # of this specific file.
    if file_upload:
        download_obj.skipped = False
        download_obj.file_upload = file_upload
        metrics.incr("download_microsoft_download_file_upload_upload", 1)
    else:
        download_obj.skipped = True
        logger.info(f"Skipped key {key_name}")
        metrics.incr("download_microsoft_download_file_upload_skip", 1)
    download_obj.completed_at = timezone.now()
    download_obj.save()

    # We need to inform the symbolicate app, that some new symbols
    # were uploaded.
    symbol_key = (symbol, debugid)
    invalidate_symbolicate_cache([symbol_key])
Example #9
0
 def _check_storage_urls():
     """If you use minio to functionally test S3, since it's
     ephemeral the buckets you create disappear after a restart.
     Make sure they exist. That's what we expect to happen with the
     real production S3 buckets.
     """
     _all_possible_urls = set(
         list(settings.SYMBOL_URLS)
         + [settings.UPLOAD_DEFAULT_URL, settings.UPLOAD_TRY_SYMBOLS_URL]
         + list(settings.UPLOAD_URL_EXCEPTIONS.values())
     )
     for url in _all_possible_urls:
         if not url or "minio" not in urlparse(url).netloc:
             continue
         bucket = StorageBucket(url)
         try:
             bucket.client.head_bucket(Bucket=bucket.name)
         except ClientError as exception:
             if exception.response["Error"]["Code"] == "404":
                 bucket.client.create_bucket(Bucket=bucket.name)
                 logger.info(f"Created minio bucket {bucket.name!r}")
             else:
                 # The most comment problem is that the S3 doesn't match
                 # the AWS credentials configured.
                 # If that's the case, this will raise a 403 Forbidden
                 # ClientError.
                 raise
def test_check_storage_urls_storageerror(exception, settings):
    fake_bucket = StorageBucket(url=settings.SYMBOL_URLS[0])
    error = StorageError(bucket=fake_bucket, backend_error=exception)
    with patch("tecken.storage.StorageBucket.exists", side_effect=error):
        errors = dockerflow_extra.check_storage_urls(None)
    assert len(errors) == 2
    for error in errors:
        assert str(exception) in error.msg
        assert error.id == "tecken.health.E002"
Example #11
0
def test_init(url, expected):
    """The URL is processed during initialization."""
    bucket = StorageBucket(url)
    assert bucket.backend == expected["backend"]
    assert bucket.base_url == expected["base_url"]
    assert bucket.endpoint_url == expected["endpoint_url"]
    assert bucket.name == expected["name"]
    assert bucket.prefix == expected["prefix"]
    assert bucket.private == expected["private"]
    assert bucket.region == expected["region"]
    assert repr(bucket)
Example #12
0
def test_storageerror_msg():
    """The StorageError message includes the URL and the backend error message."""
    bucket = StorageBucket(
        "https://s3.amazonaws.com/some-bucket?access=public")
    parsed_response = {"Error": {"Code": "403", "Message": "Forbidden"}}
    backend_error = ClientError(parsed_response, "HeadBucket")
    error = StorageError(bucket, backend_error)
    expected = (
        "s3 backend (https://s3.amazonaws.com/some-bucket?access=public)"
        " raised ClientError: An error occurred (403) when calling the HeadBucket"
        " operation: Forbidden")
    assert str(error) == expected
Example #13
0
 def check_url(url, setting_key):
     if url in checked:
         return
     bucket = StorageBucket(url)
     if not bucket.private:
         return
     try:
         if not bucket.exists():
             errors.append(
                 checks.Error(
                     f"Unable to connect to {url} (bucket={bucket.name!r}), "
                     f"because bucket not found",
                     id="tecken.health.E001",
                 ))
     except StorageError as error:
         errors.append(
             checks.Error(
                 f"Unable to connect to {url} (bucket={bucket.name!r}), "
                 f"due to {error.backend_msg}",
                 id="tecken.health.E002",
             ))
     else:
         checked.append(url)
Example #14
0
def test_StorageBucket_client():

    mock_session = mock.Mock()

    client_kwargs_calls = []
    client_args_calls = []

    def get_client(*args, **kwargs):
        client_args_calls.append(args)
        client_kwargs_calls.append(kwargs)
        return mock.Mock()

    mock_session.client.side_effect = get_client

    def new_session():
        return mock_session

    with mock.patch("tecken.storage.boto3.session.Session", new=new_session):
        bucket = StorageBucket("https://s3.amazonaws.com/some-bucket")
        client = bucket.client
        client_again = bucket.client
        assert client_again is client
        # Only 1 session should have been created
        assert len(mock_session.mock_calls) == 1
        assert "endpoint_url" not in client_kwargs_calls[-1]

        # make a client that requires an endpoint_url
        bucket = StorageBucket("http://s3.example.com/buck/prefix")
        bucket.client
        assert client_kwargs_calls[-1]["endpoint_url"] == (
            "http://s3.example.com")

        # make a client that requires a different region
        bucket = StorageBucket(
            "https://s3-eu-west-2.amazonaws.com/some-bucket")
        bucket.client
        assert client_kwargs_calls[-1]["region_name"] == ("eu-west-2")
Example #15
0
 def check_url(url, setting_key):
     if url in checked:
         return
     bucket = StorageBucket(url)
     if not bucket.private:
         return
     if bucket.is_google_cloud_storage:
         try:
             bucket.client.get_bucket(bucket.name)
             checked.append(url)
         except google_BadRequest as exception:
             errors.append(
                 checks.Error(
                     f"Unable to connect to {url} (bucket={bucket.name!r}, "
                     f"because bucket not found due to {exception}",
                     id="tecken.health.E003",
                 ))
     else:
         try:
             bucket.client.head_bucket(Bucket=bucket.name)
         except ClientError as exception:
             if exception.response["Error"]["Code"] in ("403", "404"):
                 errors.append(
                     checks.Error(
                         f"Unable to connect to {url} (bucket={bucket.name!r}, "
                         f"found in settings.{setting_key}) due to "
                         f"ClientError ({exception.response!r})",
                         id="tecken.health.E002",
                     ))
             else:
                 raise
         except EndpointConnectionError:
             errors.append(
                 checks.Error(
                     f"Unable to connect to {url} (bucket={bucket.name!r}, "
                     f"found in settings.{setting_key}) due to "
                     f"EndpointConnectionError",
                     id="tecken.health.E001",
                 ))
         else:
             checked.append(url)
Example #16
0
 def _get_sources(self):
     for url in self.urls:
         # The URL is expected to have the bucket name as the first
         # part of the pathname.
         # In the future we might expand to a more elaborate scheme.
         yield StorageBucket(url, file_prefix=self.file_prefix)
Example #17
0
def caching_vs_boto(request):
    """
    Measure the time it takes to do lots of reads with our caches.
    And contrast that with how long it takes to do boto queries.
    """
    if not settings.BENCHMARKING_ENABLED and not request.user.is_superuser:
        raise PermissionDenied("benchmarking disabled")

    form = forms.CachingVsBotoForm(request.GET,
                                   all_measure=["boto", "default", "store"])
    if not form.is_valid():
        return http.JsonResponse({"errors": form.errors}, status=400)

    # Benchmarking parameters.
    iterations = form.cleaned_data["iterations"]
    symbol_path = form.cleaned_data["symbol_path"]
    measure = form.cleaned_data["measure"]

    # Setting up for boto lookup.
    s3_key = f"{settings.SYMBOL_FILE_PREFIX}/{symbol_path}"
    s3_info = StorageBucket(settings.SYMBOL_URLS[0])
    s3_client = s3_info.s3_client
    bucket_name = s3_info.name

    def lookup_boto(key):
        response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix=key)
        for obj in response.get("Contents", []):
            if obj["Key"] == key:
                # It exists!
                return True
        return False

    context = {}

    # Run it once.
    found = lookup_boto(s3_key)
    # Prime the caches with this finding.
    cache_key = hashlib.md5(
        force_bytes(f"benchmarking:caching_vs_boto:{s3_key}")).hexdigest()

    cache_configs = ("default", "store")

    for cache_config in cache_configs:
        if cache_config in measure:
            caches[cache_config].set(cache_key, found, 60)

    # Now run it 'iterations' times and measure.
    times = {key: [] for key in measure}
    for _ in range(iterations):
        if "boto" in measure:
            with metrics.timer("benchmarking_cachingvsboto_boto"):
                t0 = time.time()
                lookup_boto(s3_key)
                t1 = time.time()
                times["boto"].append(t1 - t0)
        for cache_config in cache_configs:
            if cache_config not in measure:
                continue
            with metrics.timer(f"benchmarking_cachingvsboto_{cache_config}"):
                t0 = time.time()
                caches[cache_config].get(cache_key)
                t1 = time.time()
                times[cache_config].append(t1 - t0)

    def summorize(numbers):
        return {
            "calls": len(numbers),
            "sum": sum(numbers),
            "mean": statistics.mean(numbers),
            "median": statistics.median(numbers),
        }

    context["found_in_s3"] = found
    context["measure"] = measure
    context["results"] = {key: summorize(times[key]) for key in measure}

    return http.JsonResponse(context)
Example #18
0
def test_google_cloud_storage_client(gcsmock):
    bucket = StorageBucket("https://storage.googleapis.com/foo-bar-bucket")
    assert bucket.name == "foo-bar-bucket"
    client = bucket.get_storage_client()
    assert isinstance(client, google_Client)
Example #19
0
def test_init_file_prefix(url, file_prefix, prefix):
    """A file_prefix is optionally combined with the URL prefix."""
    bucket = StorageBucket(url, file_prefix=file_prefix)
    assert bucket.prefix == prefix
Example #20
0
def test_init_unknown_backend_raises():
    """An exception is raised if the backend can't be determined from the URL."""
    with pytest.raises(ValueError):
        StorageBucket("https://unknown-backend.example.com/some-bucket")
Example #21
0
def test_init_unknown_region_raises():
    """An exception is raised by a S3 URL with an unknown region."""
    with pytest.raises(ValueError):
        StorageBucket("https://s3-unheardof.amazonaws.com/some-bucket")
Example #22
0
def test_google_cloud_storage_client_with_prefix():
    bucket = StorageBucket(
        "https://storage.googleapis.com/foo-bar-bucket/myprefix")
    assert bucket.name == "foo-bar-bucket"
    assert bucket.prefix == "myprefix"