def test_cached_health(self): storage_service = StorageService() storage_service.create_bucket() # No health object is available in S3 bucket, yielding an error with ResponsesHelper() as helper: helper.add_passthru(self.base_url) response = requests.get(self.base_url + '/health/cached') self.assertEqual(500, response.status_code) self.assertEqual( 'ChaliceViewError: Cached health object does not exist', response.json()['Message']) # A successful response is obtained when all the systems are functional self._create_mock_queues() endpoint_states = self._endpoint_states() app = load_app_module(self.lambda_name()) with ResponsesHelper() as helper: helper.add_passthru(self.base_url) with self._mock_service_endpoints(helper, endpoint_states): app.update_health_cache(MagicMock(), MagicMock()) response = requests.get(self.base_url + '/health/cached') self.assertEqual(200, response.status_code) # Another failure is observed when the cache health object is older than 2 minutes future_time = time.time() + 3 * 60 with ResponsesHelper() as helper: helper.add_passthru(self.base_url) with patch('time.time', new=lambda: future_time): response = requests.get(self.base_url + '/health/cached') self.assertEqual(500, response.status_code) self.assertEqual( 'ChaliceViewError: Cached health object is stale', response.json()['Message'])
def test_multipart_upload_error_inside_thread_with_nothing_pushed(self): sample_key = 'foo-multipart-upload-error' sample_content_parts = [b'a' * 5242880, b'b' * 5242880] storage_service = StorageService() storage_service.create_bucket() with patch.object(MultipartUploadHandler, '_upload_part', side_effect=RuntimeError('test')): with self.assertRaises(MultipartUploadError): with MultipartUploadHandler(sample_key) as upload: for part in sample_content_parts: upload.push(part)
def test_multipart_upload_ok_with_one_part(self): sample_key = 'foo-multipart-upload' sample_content_parts = [ b'a' * 1024 # The last part can be smaller than the limit. ] expected_content = b"".join(sample_content_parts) storage_service = StorageService() storage_service.create_bucket() with MultipartUploadHandler(sample_key) as upload: for part in sample_content_parts: upload.push(part) self.assertEqual(expected_content, storage_service.get(sample_key))
def test_multipart_upload_error_inside_context_with_nothing_pushed(self): sample_key = 'foo-multipart-upload-error' sample_content_parts = [ b'a' * 5242880, b'b' * 5242880, 1234567, # This should cause an error. b'c' * 1024 ] storage_service = StorageService() storage_service.create_bucket() with self.assertRaises(MultipartUploadError): with MultipartUploadHandler(sample_key) as upload: for part in sample_content_parts: upload.push(part)
def test_multipart_upload_error_with_out_of_bound_part(self): sample_key = 'foo-multipart-upload' sample_content_parts = [ b'a' * 1024, # This part will cause an error raised by MPU. b'b' * 5242880, b'c' * 1024 ] storage_service = StorageService() storage_service.create_bucket() with self.assertRaises(MultipartUploadError): with MultipartUploadHandler(sample_key) as upload: for part in sample_content_parts: upload.push(part)
def test_shortened_url_collision(self): """ URL shortener should increase the key length by one for each time there is a key collision on non-matching URLs, raising an exception if an entire key matches another """ with mock.patch.object(self.app_module, 'hash_url') as hash_url: hash_url.return_value = 'abcde' StorageService().create_bucket( config.url_redirect_full_domain_name) self.assertEqual( self._shorten_query_url('https://singlecell.gi.ucsc.edu') ['url'], f'http://{config.url_redirect_full_domain_name}/abc') self.assertEqual( self._shorten_query_url('https://singlecell.gi.ucsc.edu/2') ['url'], f'http://{config.url_redirect_full_domain_name}/abcd') self.assertEqual( self._shorten_query_url( 'https://singlecell.gi.ucsc.edu/3')['url'], f'http://{config.url_redirect_full_domain_name}/abcde') self._shorten_query_url('https://singlecell.gi.ucsc.edu/4', expect_status=500)
def test_multipart_upload_ok_with_n_parts(self): sample_key = 'foo-multipart-upload' sample_content_parts = [ b'a' * 5242880, # The minimum file size for multipart upload is 5 MB. b'b' * 5242880, b'c' * 1024 # The last part can be smaller than the limit. ] expected_content = b''.join(sample_content_parts) storage_service = StorageService() storage_service.create_bucket() with MultipartUploadHandler(sample_key) as upload: for part in sample_content_parts: upload.push(part) self.assertEqual(expected_content, storage_service.get(sample_key))
def test_presigned_url(self): sample_key = 'foo-presigned-url' sample_content = json.dumps({"a": 1}) storage_service = StorageService() storage_service.create_bucket() storage_service.put(sample_key, sample_content.encode()) for file_name in None, 'foo.json': with self.subTest(file_name=file_name): presigned_url = storage_service.get_presigned_url( sample_key, file_name=file_name) response = requests.get(presigned_url) if file_name is None: self.assertNotIn('Content-Disposition', response.headers) else: # noinspection PyUnreachableCode if False: # no coverage # Unfortunately, moto does not support emulating S3's mechanism of specifying response headers # via request parameters (https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectGET.html, # section Request Parameters). self.assertEqual( response.headers['Content-Disposition'], f'attachment;filename="{file_name}"') self.assertEqual(sample_content, response.text)
def test_simple_get_put(self): sample_key = 'foo-simple' sample_content = b'bar' storage_service = StorageService() storage_service.create_bucket() # NOTE: Ensure that the key does not exist before writing. with self.assertRaises(storage_service.client.exceptions.NoSuchKey): storage_service.get(sample_key) storage_service.put(sample_key, sample_content) self.assertEqual(sample_content, storage_service.get(sample_key))
def test_shortened_url_matching(self): """ URL shortener should return the same response URL for identical input URLs """ url = 'https://singlecell.gi.ucsc.edu' StorageService().create_bucket(config.url_redirect_full_domain_name) shortened_url1 = self._shorten_query_url(url) shortened_url2 = self._shorten_query_url(url) self.assertEqual(shortened_url1, shortened_url2)
def test_invalid_url(self): """ URL shortener should reject any non-URL argument and any non-HCA URL """ urls = [ 'https://asinglecell.gi.ucsc.edu', 'https://singlecll.gi.ucsc.edu', 'https://singlecell.gi.ucsc.edut', 'http://singlecell.gi.xyz.edu', 'singlecell.gi.ucsc.edu' ] StorageService().create_bucket(config.url_redirect_full_domain_name) for url in urls: with self.subTest(url=url): self._shorten_query_url(url, expect_status=400)
def test_simple_get_unknown_item(self): sample_key = 'foo-simple' storage_service = StorageService() storage_service.create_bucket() with self.assertRaises(storage_service.client.exceptions.NoSuchKey): storage_service.get(sample_key)
def test_whitelisting(self): """ URL shortener should accept any humancellatlas domain """ urls = [ 'https://singlecell.gi.ucsc.edu', 'http://singlecell.gi.ucsc.edu', 'https://singlecell.gi.ucsc.edu/', 'https://singlecell.gi.ucsc.edu/abc', 'https://subdomain.singlecell.gi.ucsc.edu/', 'https://sub.subdomain.singlecell.gi.ucsc.edu/abc/def' ] StorageService().create_bucket(config.url_redirect_full_domain_name) for url in urls: with self.subTest(url=url): self._shorten_query_url(url)
def test_valid_url(self, storage_service_get, storage_service_put): """ Passing in a valid url should create an object in s3 and return a link that redirects to the given url """ storage_service_get.side_effect = StorageService( ).client.exceptions.NoSuchKey({}, "") response = self._shorten_query_url( 'https://dev.singlecell.gi.ucsc.edu/explore/specimens' '?filter=%5B%7B%22facetName%22%3A%22organ%22%2C%22terms%22%3A%5B%22bone%22%5D%7D%5D' ) self.assertEqual( {'url': f'http://{config.url_redirect_full_domain_name}/pv9'}, response) storage_service_put.assert_called_once()
def storage_service(self) -> StorageService: return StorageService()
def service(self) -> ManifestService: return ManifestService(StorageService(), self.file_url_func)
def __init__(self, lambda_name: str): self.lambda_name = lambda_name self.storage_service = StorageService()
class HealthController: """ Encapsulates information about the health status of an Azul deployment. All aspects of health are exposed as lazily loaded properties. Instantiating the class does not examine any resources, only accessing the individual properties does, or using the `to_json` method. """ def __init__(self, lambda_name: str): self.lambda_name = lambda_name self.storage_service = StorageService() def as_json(self, keys: Iterable[str]) -> JSON: keys = set(keys) if keys: require(keys.issubset(self.all_keys())) else: keys = self.all_keys() json = {k: getattr(self, k) for k in keys} json['up'] = all(v['up'] for v in json.values()) return json @health_property def other_lambdas(self): """ Indicates whether the companion REST API responds to HTTP requests. """ response = { lambda_name: self._lambda(lambda_name) for lambda_name in config.lambda_names() if lambda_name != self.lambda_name } response['up'] = all(v['up'] for v in response.values()) return response @health_property def queues(self): """ Returns information about the SQS queues used by the indexer. """ sqs = aws.resource('sqs') response = {'up': True} for queue in config.all_queue_names: try: queue_instance = sqs.get_queue_by_name(QueueName=queue).attributes except ClientError as ex: response[queue] = { 'up': False, 'error': ex.response['Error']['Message'] } response['up'] = False else: response[queue] = { 'up': True, 'messages': { 'delayed': int(queue_instance['ApproximateNumberOfMessagesDelayed']), 'invisible': int(queue_instance['ApproximateNumberOfMessagesNotVisible']), 'queued': int(queue_instance['ApproximateNumberOfMessages']) } } return response @health_property def progress(self) -> JSON: """ The number of Data Store bundles pending to be indexed and the number of index documents in need of updating. """ return { 'up': True, 'unindexed_bundles': sum(self.queues[config.notifications_queue_name()].get('messages', {}).values()), 'unindexed_documents': sum(chain.from_iterable( self.queues[config.tallies_queue_name(retry=retry)].get('messages', {}).values() for retry in (False, True) )) } def _api_endpoint(self, path: str) -> Tuple[str, JSON]: url = config.service_endpoint() + path response = requests.head(url) try: response.raise_for_status() except requests.exceptions.HTTPError as e: return url, {'up': False, 'error': repr(e)} else: return url, {'up': True} @health_property def api_endpoints(self): """ Indicates whether important service API endpoints are operational. """ endpoints = [ f'/index/{entity_type}?size=1' for entity_type in ('projects', 'samples', 'files', 'bundles') ] with ThreadPoolExecutor(len(endpoints)) as tpe: status = dict(tpe.map(self._api_endpoint, endpoints)) status['up'] = all(v['up'] for v in status.values()) return status @health_property def elasticsearch(self): """ Indicates whether the Elasticsearch cluster is responsive. """ return { 'up': ESClientFactory.get().ping(), } @lru_cache def _lambda(self, lambda_name) -> JSON: try: response = requests.get(config.lambda_endpoint(lambda_name) + '/health/basic') response.raise_for_status() up = response.json()['up'] except Exception as e: return { 'up': False, 'error': repr(e) } else: return { 'up': up, } def _make_response(self, body: JSON) -> Response: try: up = body['up'] except KeyError: status = 400 else: status = 200 if up else 503 return Response(body=json.dumps(body), status_code=status) def basic_health(self) -> Response: return self._make_response({'up': True}) def health(self) -> Response: return self._make_response(self.as_json(self.all_keys())) def custom_health(self, keys) -> Response: if keys is None: body = self.as_json(self.all_keys()) elif isinstance(keys, str): assert keys # Chalice maps empty string to None keys = keys.split(',') try: body = self.as_json(keys) except RequirementError: body = {'Message': 'Invalid health keys'} else: body = {'Message': 'Invalid health keys'} return self._make_response(body) def fast_health(self) -> Response: return self._make_response(self._as_json_fast()) def cached_health(self) -> JSON: try: cache = json.loads(self.storage_service.get(f'health/{self.lambda_name}')) except self.storage_service.client.exceptions.NoSuchKey: raise ChaliceViewError('Cached health object does not exist') else: max_age = 2 * 60 if time.time() - cache['time'] > max_age: raise ChaliceViewError('Cached health object is stale') else: body = cache['health'] return body fast_properties: Mapping[str, Iterable[health_property]] = { 'indexer': ( elasticsearch, queues, progress ), 'service': ( elasticsearch, api_endpoints, ) } def _as_json_fast(self) -> JSON: return self.as_json(p.key for p in self.fast_properties[self.lambda_name]) def update_cache(self) -> None: health_object = dict(time=time.time(), health=self._as_json_fast()) self.storage_service.put(object_key=f'health/{self.lambda_name}', data=json.dumps(health_object).encode()) all_properties: Iterable[health_property] = tuple( p for p in locals().values() if isinstance(p, health_property) ) @classmethod def all_keys(cls) -> AbstractSet[str]: return frozenset(p.key for p in cls.all_properties)