Ejemplo n.º 1
0
def test_s3bucket_client():

    mock_session = mock.Mock()

    client_kwargs_calls = []
    client_args_calls = []

    def get_client(*args, **kwargs):
        client_args_calls.append(args)
        client_kwargs_calls.append(kwargs)
        return mock.Mock()

    mock_session.client.side_effect = get_client

    def new_session():
        return mock_session

    with mock.patch("tecken.s3.boto3.session.Session", new=new_session):
        bucket = S3Bucket("https://s3.amazonaws.com/some-bucket")
        client = bucket.s3_client
        client_again = bucket.s3_client
        assert client_again is client
        # Only 1 session should have been created
        assert len(mock_session.mock_calls) == 1
        assert "endpoint_url" not in client_kwargs_calls[-1]

        # make a client that requires an endpoint_url
        bucket = S3Bucket("http://s3.example.com/buck/prefix")
        bucket.s3_client
        assert client_kwargs_calls[-1]["endpoint_url"] == ("http://s3.example.com")

        # make a client that requires a different region
        bucket = S3Bucket("https://s3-eu-west-2.amazonaws.com/some-bucket")
        bucket.s3_client
        assert client_kwargs_calls[-1]["region_name"] == ("eu-west-2")
Ejemplo n.º 2
0
def test_use_s3bucket():
    bucket = S3Bucket("https://s3.amazonaws.com/some-bucket")
    assert bucket.name == "some-bucket"
    assert bucket.endpoint_url is None
    assert bucket.region is None
    assert bucket.private  # because it's the default
    assert bucket.base_url == "https://s3.amazonaws.com/some-bucket"

    bucket = S3Bucket("https://s3.amazonaws.com/some-bucket?access=public")
    assert bucket.name == "some-bucket"
    assert bucket.endpoint_url is None
    assert bucket.region is None
    assert not bucket.private
    assert bucket.base_url == "https://s3.amazonaws.com/some-bucket"

    bucket = S3Bucket("https://s3-eu-west-2.amazonaws.com/some-bucket")
    assert bucket.name == "some-bucket"
    assert bucket.endpoint_url is None
    assert bucket.region == "eu-west-2"
    assert bucket.base_url == "https://s3-eu-west-2.amazonaws.com/some-bucket"

    bucket = S3Bucket("http://s3.example.com/buck/prfx")
    assert bucket.name == "buck"
    assert bucket.endpoint_url == "http://s3.example.com"
    assert bucket.region is None
    assert bucket.prefix == "prfx"
    assert bucket.base_url == "http://s3.example.com/buck"

    # Just check that __repr__ it works at all
    assert repr(bucket)
Ejemplo n.º 3
0
def test_use_s3bucket():
    bucket = S3Bucket('https://s3.amazonaws.com/some-bucket')
    assert bucket.name == 'some-bucket'
    assert bucket.endpoint_url is None
    assert bucket.region is None
    assert bucket.private  # because it's the default
    assert bucket.base_url == 'https://s3.amazonaws.com/some-bucket'

    bucket = S3Bucket('https://s3.amazonaws.com/some-bucket?access=public')
    assert bucket.name == 'some-bucket'
    assert bucket.endpoint_url is None
    assert bucket.region is None
    assert not bucket.private
    assert bucket.base_url == 'https://s3.amazonaws.com/some-bucket'

    bucket = S3Bucket('https://s3-eu-west-2.amazonaws.com/some-bucket')
    assert bucket.name == 'some-bucket'
    assert bucket.endpoint_url is None
    assert bucket.region == 'eu-west-2'
    assert bucket.base_url == 'https://s3-eu-west-2.amazonaws.com/some-bucket'

    bucket = S3Bucket('http://s3.example.com/buck/prfx')
    assert bucket.name == 'buck'
    assert bucket.endpoint_url == 'http://s3.example.com'
    assert bucket.region is None
    assert bucket.prefix == 'prfx'
    assert bucket.base_url == 'http://s3.example.com/buck'

    # Just check that __repr__ it works at all
    assert repr(bucket)
Ejemplo n.º 4
0
def test_region_checking():
    bucket = S3Bucket("https://s3.amazonaws.com/some-bucket")
    assert bucket.region is None

    # a known and classic one
    bucket = S3Bucket("https://s3-us-west-2.amazonaws.com/some-bucket")
    assert bucket.region == "us-west-2"

    with pytest.raises(ValueError):
        S3Bucket("https://s3-unheardof.amazonaws.com/some-bucket")
Ejemplo n.º 5
0
 def _check_s3_urls():
     """If you use minio to functionally test S3, since it's
     ephemeral the buckets you create disappear after a restart.
     Make sure they exist. That's what we expect to happen with the
     real production S3 buckets.
     """
     _all_possible_urls = set(
         list(settings.SYMBOL_URLS) +
         [settings.UPLOAD_DEFAULT_URL, settings.UPLOAD_TRY_SYMBOLS_URL] +
         list(settings.UPLOAD_URL_EXCEPTIONS.values()))
     for url in _all_possible_urls:
         if not url or "minio" not in urlparse(url).netloc:
             continue
         bucket = S3Bucket(url)
         try:
             bucket.s3_client.head_bucket(Bucket=bucket.name)
         except ClientError as exception:
             if exception.response["Error"]["Code"] == "404":
                 bucket.s3_client.create_bucket(Bucket=bucket.name)
                 logger.info(f"Created minio bucket {bucket.name!r}")
             else:
                 # The most comment problem is that the S3 doesn't match
                 # the AWS credentials configured.
                 # If that's the case, this will raise a 403 Forbidden
                 # ClientError.
                 raise
Ejemplo n.º 6
0
def upload_microsoft_symbol(symbol, debugid, file_path, download_obj):
    filename = os.path.splitext(symbol)[0]
    uri = f"{symbol}/{debugid}/{filename}.sym"
    key_name = os.path.join(settings.SYMBOL_FILE_PREFIX, uri)

    bucket_info = S3Bucket(settings.UPLOAD_DEFAULT_URL)
    s3_client = bucket_info.s3_client
    bucket_name = bucket_info.name

    # The upload_file_upload creates an instance but doesn't save it
    file_upload = upload_file_upload(s3_client,
                                     bucket_name,
                                     key_name,
                                     file_path,
                                     microsoft_download=True)

    # The _create_file_upload() function might return None
    # which means it decided there is no need to make an upload
    # of this specific file.
    if file_upload:
        download_obj.skipped = False
        download_obj.file_upload = file_upload
        metrics.incr("download_microsoft_download_file_upload_upload", 1)
    else:
        download_obj.skipped = True
        logger.info(f"Skipped key {key_name}")
        metrics.incr("download_microsoft_download_file_upload_skip", 1)
    download_obj.completed_at = timezone.now()
    download_obj.save()

    # We need to inform the symbolicate app, that some new symbols
    # were uploaded.
    symbol_key = (symbol, debugid)
    invalidate_symbolicate_cache([symbol_key])
Ejemplo n.º 7
0
def test_exists_in_source(botomock, settings):

    mock_api_calls = []

    def mock_api_call(self, operation_name, api_params):
        mock_api_calls.append(api_params)
        assert operation_name == 'ListObjectsV2'
        if api_params['Prefix'].endswith('xxx.sym'):
            return {}
        return {
            'Contents': [{
                'Key': api_params['Prefix'],
            }],
        }

    bucket = S3Bucket('https://s3.example.com/private')
    with botomock(mock_api_call):
        assert not exists_in_source(bucket, 'xxx.sym')
        assert exists_in_source(bucket, 'xul.sym')
        assert len(mock_api_calls) == 2

        # again
        assert not exists_in_source(bucket, 'xxx.sym')
        assert exists_in_source(bucket, 'xul.sym')
        assert len(mock_api_calls) == 2
Ejemplo n.º 8
0
 def check_url(url, setting_key):
     if url in checked:
         return
     bucket = S3Bucket(url)
     if not bucket.private:
         return
     try:
         bucket.s3_client.head_bucket(Bucket=bucket.name)
     except ClientError as exception:
         if exception.response["Error"]["Code"] in ("403", "404"):
             errors.append(
                 checks.Error(
                     f"Unable to connect to {url} (bucket={bucket.name!r}, "
                     f"found in settings.{setting_key}) due to "
                     f"ClientError ({exception.response!r})",
                     id="tecken.health.E002",
                 ))
         else:
             raise
     except EndpointConnectionError:
         errors.append(
             checks.Error(
                 f"Unable to connect to {url} (bucket={bucket.name!r}, "
                 f"found in settings.{setting_key}) due to "
                 f"EndpointConnectionError",
                 id="tecken.health.E001",
             ))
     else:
         checked.append(url)
Ejemplo n.º 9
0
 def check_url(url, setting_key):
     if url in checked:
         return
     bucket = S3Bucket(url)
     if not bucket.private:
         return
     try:
         bucket.s3_client.head_bucket(Bucket=bucket.name)
     except ClientError as exception:
         if exception.response['Error']['Code'] in ('403', '404'):
             errors.append(checks.Error(
                 f'Unable to connect to {url} (bucket={bucket.name!r}, '
                 f'found in settings.{setting_key}) due to '
                 f'ClientError ({exception.response!r})',
                 id='tecken.health.E002'
             ))
         else:
             raise
     except EndpointConnectionError:
         errors.append(checks.Error(
             f'Unable to connect to {url} (bucket={bucket.name!r}, '
             f'found in settings.{setting_key}) due to '
             f'EndpointConnectionError',
             id='tecken.health.E001'
         ))
     else:
         checked.append(url)
Ejemplo n.º 10
0
def get_bucket_info(user, try_symbols=None):
    """return an object that has 'bucket', 'endpoint_url',
    'region'.
    Only 'bucket' is mandatory in the response object.
    """

    if try_symbols is None:
        # If it wasn't explicitly passed, we need to figure this out by
        # looking at the user who uploads.
        # Namely, we're going to see if the user has the permission
        # 'upload.upload_symbols'. If the user does, it means the user intends
        # to *not* upload Try build symbols.
        # This is based on the axiom that, if the upload is made with an
        # API token, that API token can't have *both* the
        # 'upload.upload_symbols' permission *and* the
        # 'upload.upload_try_symbols' permission.
        # If the user uploads via the web the user has a choice to check
        # a checkbox that is off by default. If doing so, the user isn't
        # using an API token, so the user might have BOTH permissions.
        # Then the default falls on this NOT being a Try upload.
        try_symbols = not user.has_perm("upload.upload_symbols")

    if try_symbols:
        url = settings.UPLOAD_TRY_SYMBOLS_URL
    else:
        url = settings.UPLOAD_DEFAULT_URL

    exceptions = settings.UPLOAD_URL_EXCEPTIONS
    if user.email.lower() in exceptions:
        # easy
        exception = exceptions[user.email.lower()]
    else:
        # match against every possible wildcard
        exception = None  # assume no match
        for email_or_wildcard in settings.UPLOAD_URL_EXCEPTIONS:
            if fnmatch.fnmatch(user.email.lower(), email_or_wildcard.lower()):
                # a match!
                exception = settings.UPLOAD_URL_EXCEPTIONS[email_or_wildcard]
                break

    if exception:
        url = exception

    return S3Bucket(url, try_symbols=try_symbols)
Ejemplo n.º 11
0
def test_exists_in_source(botomock, settings):

    mock_api_calls = []

    def mock_api_call(self, operation_name, api_params):
        mock_api_calls.append(api_params)
        assert operation_name == "ListObjectsV2"
        if api_params["Prefix"].endswith("xxx.sym"):
            return {}
        return {"Contents": [{"Key": api_params["Prefix"]}]}

    bucket = S3Bucket("https://s3.example.com/private")
    with botomock(mock_api_call):
        assert not exists_in_source(bucket, "xxx.sym")
        assert exists_in_source(bucket, "xul.sym")
        assert len(mock_api_calls) == 2

        # again
        assert not exists_in_source(bucket, "xxx.sym")
        assert exists_in_source(bucket, "xul.sym")
        assert len(mock_api_calls) == 2
Ejemplo n.º 12
0
def caching_vs_boto(request):
    """
    Measure the time it takes to do lots of reads with our caches.
    And contrast that with how long it takes to do boto queries.
    """
    if (
        not settings.BENCHMARKING_ENABLED and
        not request.user.is_superuser
    ):
        raise PermissionDenied('benchmarking disabled')

    form = forms.CachingVsBotoForm(
        request.GET,
        all_measure=['boto', 'default', 'store'],
    )
    if not form.is_valid():
        return http.JsonResponse({'errors': form.errors}, status=400)

    # Benchmarking parameters.
    iterations = form.cleaned_data['iterations']
    symbol_path = form.cleaned_data['symbol_path']
    measure = form.cleaned_data['measure']

    # Setting up for boto lookup.
    s3_key = f'{settings.SYMBOL_FILE_PREFIX}/{symbol_path}'
    s3_info = S3Bucket(settings.SYMBOL_URLS[0])
    s3_client = s3_info.s3_client
    bucket_name = s3_info.name

    def lookup_boto(key):
        response = s3_client.list_objects_v2(
            Bucket=bucket_name,
            Prefix=key,
        )
        for obj in response.get('Contents', []):
            if obj['Key'] == key:
                # It exists!
                return True
        return False

    context = {}

    # Run it once.
    found = lookup_boto(s3_key)
    # Prime the caches with this finding.
    cache_key = hashlib.md5(force_bytes(
        f'benchmarking:caching_vs_boto:{s3_key}'
    )).hexdigest()

    cache_configs = ('default', 'store')

    for cache_config in cache_configs:
        if cache_config in measure:
            caches[cache_config].set(cache_key, found, 60)

    # Now run it 'iterations' times and measure.
    times = {
        key: [] for key in measure
    }
    for _ in range(iterations):
        if 'boto' in measure:
            with metrics.timer('benchmarking_cachingvsboto_boto'):
                t0 = time.time()
                lookup_boto(s3_key)
                t1 = time.time()
                times['boto'].append(t1 - t0)
        for cache_config in cache_configs:
            if cache_config not in measure:
                continue
            with metrics.timer(f'benchmarking_cachingvsboto_{cache_config}'):
                t0 = time.time()
                caches[cache_config].get(cache_key)
                t1 = time.time()
                times[cache_config].append(t1 - t0)

    def summorize(numbers):
        return {
            'calls': len(numbers),
            'sum': sum(numbers),
            'mean': statistics.mean(numbers),
            'median': statistics.median(numbers),
        }
    context['found_in_s3'] = found
    context['measure'] = measure
    context['results'] = {
        key: summorize(times[key]) for key in measure
    }

    return http.JsonResponse(context)
Ejemplo n.º 13
0
 def _get_sources(self):
     for url in self.urls:
         # The URL is expected to have the bucket name as the first
         # part of the pathname.
         # In the future we might expand to a more elaborate scheme.
         yield S3Bucket(url, file_prefix=self.file_prefix)