def test_s3bucket_client(): mock_session = mock.Mock() client_kwargs_calls = [] client_args_calls = [] def get_client(*args, **kwargs): client_args_calls.append(args) client_kwargs_calls.append(kwargs) return mock.Mock() mock_session.client.side_effect = get_client def new_session(): return mock_session with mock.patch("tecken.s3.boto3.session.Session", new=new_session): bucket = S3Bucket("https://s3.amazonaws.com/some-bucket") client = bucket.s3_client client_again = bucket.s3_client assert client_again is client # Only 1 session should have been created assert len(mock_session.mock_calls) == 1 assert "endpoint_url" not in client_kwargs_calls[-1] # make a client that requires an endpoint_url bucket = S3Bucket("http://s3.example.com/buck/prefix") bucket.s3_client assert client_kwargs_calls[-1]["endpoint_url"] == ("http://s3.example.com") # make a client that requires a different region bucket = S3Bucket("https://s3-eu-west-2.amazonaws.com/some-bucket") bucket.s3_client assert client_kwargs_calls[-1]["region_name"] == ("eu-west-2")
def test_use_s3bucket(): bucket = S3Bucket("https://s3.amazonaws.com/some-bucket") assert bucket.name == "some-bucket" assert bucket.endpoint_url is None assert bucket.region is None assert bucket.private # because it's the default assert bucket.base_url == "https://s3.amazonaws.com/some-bucket" bucket = S3Bucket("https://s3.amazonaws.com/some-bucket?access=public") assert bucket.name == "some-bucket" assert bucket.endpoint_url is None assert bucket.region is None assert not bucket.private assert bucket.base_url == "https://s3.amazonaws.com/some-bucket" bucket = S3Bucket("https://s3-eu-west-2.amazonaws.com/some-bucket") assert bucket.name == "some-bucket" assert bucket.endpoint_url is None assert bucket.region == "eu-west-2" assert bucket.base_url == "https://s3-eu-west-2.amazonaws.com/some-bucket" bucket = S3Bucket("http://s3.example.com/buck/prfx") assert bucket.name == "buck" assert bucket.endpoint_url == "http://s3.example.com" assert bucket.region is None assert bucket.prefix == "prfx" assert bucket.base_url == "http://s3.example.com/buck" # Just check that __repr__ it works at all assert repr(bucket)
def test_use_s3bucket(): bucket = S3Bucket('https://s3.amazonaws.com/some-bucket') assert bucket.name == 'some-bucket' assert bucket.endpoint_url is None assert bucket.region is None assert bucket.private # because it's the default assert bucket.base_url == 'https://s3.amazonaws.com/some-bucket' bucket = S3Bucket('https://s3.amazonaws.com/some-bucket?access=public') assert bucket.name == 'some-bucket' assert bucket.endpoint_url is None assert bucket.region is None assert not bucket.private assert bucket.base_url == 'https://s3.amazonaws.com/some-bucket' bucket = S3Bucket('https://s3-eu-west-2.amazonaws.com/some-bucket') assert bucket.name == 'some-bucket' assert bucket.endpoint_url is None assert bucket.region == 'eu-west-2' assert bucket.base_url == 'https://s3-eu-west-2.amazonaws.com/some-bucket' bucket = S3Bucket('http://s3.example.com/buck/prfx') assert bucket.name == 'buck' assert bucket.endpoint_url == 'http://s3.example.com' assert bucket.region is None assert bucket.prefix == 'prfx' assert bucket.base_url == 'http://s3.example.com/buck' # Just check that __repr__ it works at all assert repr(bucket)
def test_region_checking(): bucket = S3Bucket("https://s3.amazonaws.com/some-bucket") assert bucket.region is None # a known and classic one bucket = S3Bucket("https://s3-us-west-2.amazonaws.com/some-bucket") assert bucket.region == "us-west-2" with pytest.raises(ValueError): S3Bucket("https://s3-unheardof.amazonaws.com/some-bucket")
def _check_s3_urls(): """If you use minio to functionally test S3, since it's ephemeral the buckets you create disappear after a restart. Make sure they exist. That's what we expect to happen with the real production S3 buckets. """ _all_possible_urls = set( list(settings.SYMBOL_URLS) + [settings.UPLOAD_DEFAULT_URL, settings.UPLOAD_TRY_SYMBOLS_URL] + list(settings.UPLOAD_URL_EXCEPTIONS.values())) for url in _all_possible_urls: if not url or "minio" not in urlparse(url).netloc: continue bucket = S3Bucket(url) try: bucket.s3_client.head_bucket(Bucket=bucket.name) except ClientError as exception: if exception.response["Error"]["Code"] == "404": bucket.s3_client.create_bucket(Bucket=bucket.name) logger.info(f"Created minio bucket {bucket.name!r}") else: # The most comment problem is that the S3 doesn't match # the AWS credentials configured. # If that's the case, this will raise a 403 Forbidden # ClientError. raise
def upload_microsoft_symbol(symbol, debugid, file_path, download_obj): filename = os.path.splitext(symbol)[0] uri = f"{symbol}/{debugid}/{filename}.sym" key_name = os.path.join(settings.SYMBOL_FILE_PREFIX, uri) bucket_info = S3Bucket(settings.UPLOAD_DEFAULT_URL) s3_client = bucket_info.s3_client bucket_name = bucket_info.name # The upload_file_upload creates an instance but doesn't save it file_upload = upload_file_upload(s3_client, bucket_name, key_name, file_path, microsoft_download=True) # The _create_file_upload() function might return None # which means it decided there is no need to make an upload # of this specific file. if file_upload: download_obj.skipped = False download_obj.file_upload = file_upload metrics.incr("download_microsoft_download_file_upload_upload", 1) else: download_obj.skipped = True logger.info(f"Skipped key {key_name}") metrics.incr("download_microsoft_download_file_upload_skip", 1) download_obj.completed_at = timezone.now() download_obj.save() # We need to inform the symbolicate app, that some new symbols # were uploaded. symbol_key = (symbol, debugid) invalidate_symbolicate_cache([symbol_key])
def test_exists_in_source(botomock, settings): mock_api_calls = [] def mock_api_call(self, operation_name, api_params): mock_api_calls.append(api_params) assert operation_name == 'ListObjectsV2' if api_params['Prefix'].endswith('xxx.sym'): return {} return { 'Contents': [{ 'Key': api_params['Prefix'], }], } bucket = S3Bucket('https://s3.example.com/private') with botomock(mock_api_call): assert not exists_in_source(bucket, 'xxx.sym') assert exists_in_source(bucket, 'xul.sym') assert len(mock_api_calls) == 2 # again assert not exists_in_source(bucket, 'xxx.sym') assert exists_in_source(bucket, 'xul.sym') assert len(mock_api_calls) == 2
def check_url(url, setting_key): if url in checked: return bucket = S3Bucket(url) if not bucket.private: return try: bucket.s3_client.head_bucket(Bucket=bucket.name) except ClientError as exception: if exception.response["Error"]["Code"] in ("403", "404"): errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}, " f"found in settings.{setting_key}) due to " f"ClientError ({exception.response!r})", id="tecken.health.E002", )) else: raise except EndpointConnectionError: errors.append( checks.Error( f"Unable to connect to {url} (bucket={bucket.name!r}, " f"found in settings.{setting_key}) due to " f"EndpointConnectionError", id="tecken.health.E001", )) else: checked.append(url)
def check_url(url, setting_key): if url in checked: return bucket = S3Bucket(url) if not bucket.private: return try: bucket.s3_client.head_bucket(Bucket=bucket.name) except ClientError as exception: if exception.response['Error']['Code'] in ('403', '404'): errors.append(checks.Error( f'Unable to connect to {url} (bucket={bucket.name!r}, ' f'found in settings.{setting_key}) due to ' f'ClientError ({exception.response!r})', id='tecken.health.E002' )) else: raise except EndpointConnectionError: errors.append(checks.Error( f'Unable to connect to {url} (bucket={bucket.name!r}, ' f'found in settings.{setting_key}) due to ' f'EndpointConnectionError', id='tecken.health.E001' )) else: checked.append(url)
def get_bucket_info(user, try_symbols=None): """return an object that has 'bucket', 'endpoint_url', 'region'. Only 'bucket' is mandatory in the response object. """ if try_symbols is None: # If it wasn't explicitly passed, we need to figure this out by # looking at the user who uploads. # Namely, we're going to see if the user has the permission # 'upload.upload_symbols'. If the user does, it means the user intends # to *not* upload Try build symbols. # This is based on the axiom that, if the upload is made with an # API token, that API token can't have *both* the # 'upload.upload_symbols' permission *and* the # 'upload.upload_try_symbols' permission. # If the user uploads via the web the user has a choice to check # a checkbox that is off by default. If doing so, the user isn't # using an API token, so the user might have BOTH permissions. # Then the default falls on this NOT being a Try upload. try_symbols = not user.has_perm("upload.upload_symbols") if try_symbols: url = settings.UPLOAD_TRY_SYMBOLS_URL else: url = settings.UPLOAD_DEFAULT_URL exceptions = settings.UPLOAD_URL_EXCEPTIONS if user.email.lower() in exceptions: # easy exception = exceptions[user.email.lower()] else: # match against every possible wildcard exception = None # assume no match for email_or_wildcard in settings.UPLOAD_URL_EXCEPTIONS: if fnmatch.fnmatch(user.email.lower(), email_or_wildcard.lower()): # a match! exception = settings.UPLOAD_URL_EXCEPTIONS[email_or_wildcard] break if exception: url = exception return S3Bucket(url, try_symbols=try_symbols)
def test_exists_in_source(botomock, settings): mock_api_calls = [] def mock_api_call(self, operation_name, api_params): mock_api_calls.append(api_params) assert operation_name == "ListObjectsV2" if api_params["Prefix"].endswith("xxx.sym"): return {} return {"Contents": [{"Key": api_params["Prefix"]}]} bucket = S3Bucket("https://s3.example.com/private") with botomock(mock_api_call): assert not exists_in_source(bucket, "xxx.sym") assert exists_in_source(bucket, "xul.sym") assert len(mock_api_calls) == 2 # again assert not exists_in_source(bucket, "xxx.sym") assert exists_in_source(bucket, "xul.sym") assert len(mock_api_calls) == 2
def caching_vs_boto(request): """ Measure the time it takes to do lots of reads with our caches. And contrast that with how long it takes to do boto queries. """ if ( not settings.BENCHMARKING_ENABLED and not request.user.is_superuser ): raise PermissionDenied('benchmarking disabled') form = forms.CachingVsBotoForm( request.GET, all_measure=['boto', 'default', 'store'], ) if not form.is_valid(): return http.JsonResponse({'errors': form.errors}, status=400) # Benchmarking parameters. iterations = form.cleaned_data['iterations'] symbol_path = form.cleaned_data['symbol_path'] measure = form.cleaned_data['measure'] # Setting up for boto lookup. s3_key = f'{settings.SYMBOL_FILE_PREFIX}/{symbol_path}' s3_info = S3Bucket(settings.SYMBOL_URLS[0]) s3_client = s3_info.s3_client bucket_name = s3_info.name def lookup_boto(key): response = s3_client.list_objects_v2( Bucket=bucket_name, Prefix=key, ) for obj in response.get('Contents', []): if obj['Key'] == key: # It exists! return True return False context = {} # Run it once. found = lookup_boto(s3_key) # Prime the caches with this finding. cache_key = hashlib.md5(force_bytes( f'benchmarking:caching_vs_boto:{s3_key}' )).hexdigest() cache_configs = ('default', 'store') for cache_config in cache_configs: if cache_config in measure: caches[cache_config].set(cache_key, found, 60) # Now run it 'iterations' times and measure. times = { key: [] for key in measure } for _ in range(iterations): if 'boto' in measure: with metrics.timer('benchmarking_cachingvsboto_boto'): t0 = time.time() lookup_boto(s3_key) t1 = time.time() times['boto'].append(t1 - t0) for cache_config in cache_configs: if cache_config not in measure: continue with metrics.timer(f'benchmarking_cachingvsboto_{cache_config}'): t0 = time.time() caches[cache_config].get(cache_key) t1 = time.time() times[cache_config].append(t1 - t0) def summorize(numbers): return { 'calls': len(numbers), 'sum': sum(numbers), 'mean': statistics.mean(numbers), 'median': statistics.median(numbers), } context['found_in_s3'] = found context['measure'] = measure context['results'] = { key: summorize(times[key]) for key in measure } return http.JsonResponse(context)
def _get_sources(self): for url in self.urls: # The URL is expected to have the bucket name as the first # part of the pathname. # In the future we might expand to a more elaborate scheme. yield S3Bucket(url, file_prefix=self.file_prefix)