Esempio n. 1
0
    def test_cached_health(self):
        storage_service = StorageService()
        storage_service.create_bucket()
        # No health object is available in S3 bucket, yielding an error
        with ResponsesHelper() as helper:
            helper.add_passthru(self.base_url)
            response = requests.get(self.base_url + '/health/cached')
            self.assertEqual(500, response.status_code)
            self.assertEqual(
                'ChaliceViewError: Cached health object does not exist',
                response.json()['Message'])

        # A successful response is obtained when all the systems are functional
        self._create_mock_queues()
        endpoint_states = self._endpoint_states()
        app = load_app_module(self.lambda_name())
        with ResponsesHelper() as helper:
            helper.add_passthru(self.base_url)
            with self._mock_service_endpoints(helper, endpoint_states):
                app.update_health_cache(MagicMock(), MagicMock())
                response = requests.get(self.base_url + '/health/cached')
                self.assertEqual(200, response.status_code)

        # Another failure is observed when the cache health object is older than 2 minutes
        future_time = time.time() + 3 * 60
        with ResponsesHelper() as helper:
            helper.add_passthru(self.base_url)
            with patch('time.time', new=lambda: future_time):
                response = requests.get(self.base_url + '/health/cached')
                self.assertEqual(500, response.status_code)
                self.assertEqual(
                    'ChaliceViewError: Cached health object is stale',
                    response.json()['Message'])
Esempio n. 2
0
    def test_multipart_upload_error_inside_thread_with_nothing_pushed(self):
        sample_key = 'foo-multipart-upload-error'
        sample_content_parts = [b'a' * 5242880, b'b' * 5242880]

        storage_service = StorageService()
        storage_service.create_bucket()
        with patch.object(MultipartUploadHandler,
                          '_upload_part',
                          side_effect=RuntimeError('test')):
            with self.assertRaises(MultipartUploadError):
                with MultipartUploadHandler(sample_key) as upload:
                    for part in sample_content_parts:
                        upload.push(part)
Esempio n. 3
0
    def test_multipart_upload_ok_with_one_part(self):
        sample_key = 'foo-multipart-upload'
        sample_content_parts = [
            b'a' * 1024  # The last part can be smaller than the limit.
        ]
        expected_content = b"".join(sample_content_parts)

        storage_service = StorageService()
        storage_service.create_bucket()
        with MultipartUploadHandler(sample_key) as upload:
            for part in sample_content_parts:
                upload.push(part)

        self.assertEqual(expected_content, storage_service.get(sample_key))
Esempio n. 4
0
    def test_multipart_upload_error_inside_context_with_nothing_pushed(self):
        sample_key = 'foo-multipart-upload-error'
        sample_content_parts = [
            b'a' * 5242880,
            b'b' * 5242880,
            1234567,  # This should cause an error.
            b'c' * 1024
        ]

        storage_service = StorageService()
        storage_service.create_bucket()
        with self.assertRaises(MultipartUploadError):
            with MultipartUploadHandler(sample_key) as upload:
                for part in sample_content_parts:
                    upload.push(part)
Esempio n. 5
0
    def test_multipart_upload_error_with_out_of_bound_part(self):
        sample_key = 'foo-multipart-upload'
        sample_content_parts = [
            b'a' * 1024,  # This part will cause an error raised by MPU.
            b'b' * 5242880,
            b'c' * 1024
        ]

        storage_service = StorageService()
        storage_service.create_bucket()

        with self.assertRaises(MultipartUploadError):
            with MultipartUploadHandler(sample_key) as upload:
                for part in sample_content_parts:
                    upload.push(part)
Esempio n. 6
0
    def test_shortened_url_collision(self):
        """
        URL shortener should increase the key length by one for each time there is a key collision on
        non-matching URLs, raising an exception if an entire key matches another
        """
        with mock.patch.object(self.app_module, 'hash_url') as hash_url:
            hash_url.return_value = 'abcde'
            StorageService().create_bucket(
                config.url_redirect_full_domain_name)

            self.assertEqual(
                self._shorten_query_url('https://singlecell.gi.ucsc.edu')
                ['url'], f'http://{config.url_redirect_full_domain_name}/abc')

            self.assertEqual(
                self._shorten_query_url('https://singlecell.gi.ucsc.edu/2')
                ['url'], f'http://{config.url_redirect_full_domain_name}/abcd')

            self.assertEqual(
                self._shorten_query_url(
                    'https://singlecell.gi.ucsc.edu/3')['url'],
                f'http://{config.url_redirect_full_domain_name}/abcde')

            self._shorten_query_url('https://singlecell.gi.ucsc.edu/4',
                                    expect_status=500)
Esempio n. 7
0
    def test_multipart_upload_ok_with_n_parts(self):
        sample_key = 'foo-multipart-upload'
        sample_content_parts = [
            b'a' *
            5242880,  # The minimum file size for multipart upload is 5 MB.
            b'b' * 5242880,
            b'c' * 1024  # The last part can be smaller than the limit.
        ]
        expected_content = b''.join(sample_content_parts)

        storage_service = StorageService()
        storage_service.create_bucket()
        with MultipartUploadHandler(sample_key) as upload:
            for part in sample_content_parts:
                upload.push(part)

        self.assertEqual(expected_content, storage_service.get(sample_key))
Esempio n. 8
0
    def test_presigned_url(self):
        sample_key = 'foo-presigned-url'
        sample_content = json.dumps({"a": 1})

        storage_service = StorageService()
        storage_service.create_bucket()
        storage_service.put(sample_key, sample_content.encode())

        for file_name in None, 'foo.json':
            with self.subTest(file_name=file_name):
                presigned_url = storage_service.get_presigned_url(
                    sample_key, file_name=file_name)
                response = requests.get(presigned_url)
                if file_name is None:
                    self.assertNotIn('Content-Disposition', response.headers)
                else:
                    # noinspection PyUnreachableCode
                    if False:  # no coverage
                        # Unfortunately, moto does not support emulating S3's mechanism of specifying response headers
                        # via request parameters (https://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectGET.html,
                        # section Request Parameters).
                        self.assertEqual(
                            response.headers['Content-Disposition'],
                            f'attachment;filename="{file_name}"')
                self.assertEqual(sample_content, response.text)
Esempio n. 9
0
    def test_simple_get_put(self):
        sample_key = 'foo-simple'
        sample_content = b'bar'

        storage_service = StorageService()
        storage_service.create_bucket()

        # NOTE: Ensure that the key does not exist before writing.
        with self.assertRaises(storage_service.client.exceptions.NoSuchKey):
            storage_service.get(sample_key)

        storage_service.put(sample_key, sample_content)

        self.assertEqual(sample_content, storage_service.get(sample_key))
Esempio n. 10
0
 def test_shortened_url_matching(self):
     """
     URL shortener should return the same response URL for identical input URLs
     """
     url = 'https://singlecell.gi.ucsc.edu'
     StorageService().create_bucket(config.url_redirect_full_domain_name)
     shortened_url1 = self._shorten_query_url(url)
     shortened_url2 = self._shorten_query_url(url)
     self.assertEqual(shortened_url1, shortened_url2)
Esempio n. 11
0
 def test_invalid_url(self):
     """
     URL shortener should reject any non-URL argument and any non-HCA URL
     """
     urls = [
         'https://asinglecell.gi.ucsc.edu', 'https://singlecll.gi.ucsc.edu',
         'https://singlecell.gi.ucsc.edut', 'http://singlecell.gi.xyz.edu',
         'singlecell.gi.ucsc.edu'
     ]
     StorageService().create_bucket(config.url_redirect_full_domain_name)
     for url in urls:
         with self.subTest(url=url):
             self._shorten_query_url(url, expect_status=400)
Esempio n. 12
0
    def test_simple_get_unknown_item(self):
        sample_key = 'foo-simple'

        storage_service = StorageService()
        storage_service.create_bucket()

        with self.assertRaises(storage_service.client.exceptions.NoSuchKey):
            storage_service.get(sample_key)
Esempio n. 13
0
 def test_whitelisting(self):
     """
     URL shortener should accept any humancellatlas domain
     """
     urls = [
         'https://singlecell.gi.ucsc.edu', 'http://singlecell.gi.ucsc.edu',
         'https://singlecell.gi.ucsc.edu/',
         'https://singlecell.gi.ucsc.edu/abc',
         'https://subdomain.singlecell.gi.ucsc.edu/',
         'https://sub.subdomain.singlecell.gi.ucsc.edu/abc/def'
     ]
     StorageService().create_bucket(config.url_redirect_full_domain_name)
     for url in urls:
         with self.subTest(url=url):
             self._shorten_query_url(url)
Esempio n. 14
0
 def test_valid_url(self, storage_service_get, storage_service_put):
     """
     Passing in a valid url should create an object in s3 and return a link
     that redirects to the given url
     """
     storage_service_get.side_effect = StorageService(
     ).client.exceptions.NoSuchKey({}, "")
     response = self._shorten_query_url(
         'https://dev.singlecell.gi.ucsc.edu/explore/specimens'
         '?filter=%5B%7B%22facetName%22%3A%22organ%22%2C%22terms%22%3A%5B%22bone%22%5D%7D%5D'
     )
     self.assertEqual(
         {'url': f'http://{config.url_redirect_full_domain_name}/pv9'},
         response)
     storage_service_put.assert_called_once()
Esempio n. 15
0
 def storage_service(self) -> StorageService:
     return StorageService()
Esempio n. 16
0
 def service(self) -> ManifestService:
     return ManifestService(StorageService(), self.file_url_func)
Esempio n. 17
0
 def __init__(self, lambda_name: str):
     self.lambda_name = lambda_name
     self.storage_service = StorageService()
Esempio n. 18
0
class HealthController:
    """
    Encapsulates information about the health status of an Azul deployment. All
    aspects of health are exposed as lazily loaded properties. Instantiating the
    class does not examine any resources, only accessing the individual
    properties does, or using the `to_json` method.
    """

    def __init__(self, lambda_name: str):
        self.lambda_name = lambda_name
        self.storage_service = StorageService()

    def as_json(self, keys: Iterable[str]) -> JSON:
        keys = set(keys)
        if keys:
            require(keys.issubset(self.all_keys()))
        else:
            keys = self.all_keys()
        json = {k: getattr(self, k) for k in keys}
        json['up'] = all(v['up'] for v in json.values())
        return json

    @health_property
    def other_lambdas(self):
        """
        Indicates whether the companion REST API responds to HTTP requests.
        """
        response = {
            lambda_name: self._lambda(lambda_name)
            for lambda_name in config.lambda_names()
            if lambda_name != self.lambda_name
        }
        response['up'] = all(v['up'] for v in response.values())
        return response

    @health_property
    def queues(self):
        """
        Returns information about the SQS queues used by the indexer.
        """
        sqs = aws.resource('sqs')
        response = {'up': True}
        for queue in config.all_queue_names:
            try:
                queue_instance = sqs.get_queue_by_name(QueueName=queue).attributes
            except ClientError as ex:
                response[queue] = {
                    'up': False,
                    'error': ex.response['Error']['Message']
                }
                response['up'] = False
            else:
                response[queue] = {
                    'up': True,
                    'messages': {
                        'delayed': int(queue_instance['ApproximateNumberOfMessagesDelayed']),
                        'invisible': int(queue_instance['ApproximateNumberOfMessagesNotVisible']),
                        'queued': int(queue_instance['ApproximateNumberOfMessages'])
                    }
                }
        return response

    @health_property
    def progress(self) -> JSON:
        """
        The number of Data Store bundles pending to be indexed and the number
        of index documents in need of updating.
        """
        return {
            'up': True,
            'unindexed_bundles': sum(self.queues[config.notifications_queue_name()].get('messages', {}).values()),
            'unindexed_documents': sum(chain.from_iterable(
                self.queues[config.tallies_queue_name(retry=retry)].get('messages', {}).values()
                for retry in (False, True)
            ))
        }

    def _api_endpoint(self, path: str) -> Tuple[str, JSON]:
        url = config.service_endpoint() + path
        response = requests.head(url)
        try:
            response.raise_for_status()
        except requests.exceptions.HTTPError as e:
            return url, {'up': False, 'error': repr(e)}
        else:
            return url, {'up': True}

    @health_property
    def api_endpoints(self):
        """
        Indicates whether important service API endpoints are operational.
        """
        endpoints = [
            f'/index/{entity_type}?size=1'
            for entity_type in ('projects', 'samples', 'files', 'bundles')
        ]
        with ThreadPoolExecutor(len(endpoints)) as tpe:
            status = dict(tpe.map(self._api_endpoint, endpoints))
        status['up'] = all(v['up'] for v in status.values())
        return status

    @health_property
    def elasticsearch(self):
        """
        Indicates whether the Elasticsearch cluster is responsive.
        """
        return {
            'up': ESClientFactory.get().ping(),
        }

    @lru_cache
    def _lambda(self, lambda_name) -> JSON:
        try:
            response = requests.get(config.lambda_endpoint(lambda_name) + '/health/basic')
            response.raise_for_status()
            up = response.json()['up']
        except Exception as e:
            return {
                'up': False,
                'error': repr(e)
            }
        else:
            return {
                'up': up,
            }

    def _make_response(self, body: JSON) -> Response:
        try:
            up = body['up']
        except KeyError:
            status = 400
        else:
            status = 200 if up else 503
        return Response(body=json.dumps(body), status_code=status)

    def basic_health(self) -> Response:
        return self._make_response({'up': True})

    def health(self) -> Response:
        return self._make_response(self.as_json(self.all_keys()))

    def custom_health(self, keys) -> Response:
        if keys is None:
            body = self.as_json(self.all_keys())
        elif isinstance(keys, str):
            assert keys  # Chalice maps empty string to None
            keys = keys.split(',')
            try:
                body = self.as_json(keys)
            except RequirementError:
                body = {'Message': 'Invalid health keys'}
        else:
            body = {'Message': 'Invalid health keys'}
        return self._make_response(body)

    def fast_health(self) -> Response:
        return self._make_response(self._as_json_fast())

    def cached_health(self) -> JSON:
        try:
            cache = json.loads(self.storage_service.get(f'health/{self.lambda_name}'))
        except self.storage_service.client.exceptions.NoSuchKey:
            raise ChaliceViewError('Cached health object does not exist')
        else:
            max_age = 2 * 60
            if time.time() - cache['time'] > max_age:
                raise ChaliceViewError('Cached health object is stale')
            else:
                body = cache['health']
        return body

    fast_properties: Mapping[str, Iterable[health_property]] = {
        'indexer': (
            elasticsearch,
            queues,
            progress
        ),
        'service': (
            elasticsearch,
            api_endpoints,
        )
    }

    def _as_json_fast(self) -> JSON:
        return self.as_json(p.key for p in self.fast_properties[self.lambda_name])

    def update_cache(self) -> None:
        health_object = dict(time=time.time(), health=self._as_json_fast())
        self.storage_service.put(object_key=f'health/{self.lambda_name}',
                                 data=json.dumps(health_object).encode())

    all_properties: Iterable[health_property] = tuple(
        p for p in locals().values() if isinstance(p, health_property)
    )

    @classmethod
    def all_keys(cls) -> AbstractSet[str]:
        return frozenset(p.key for p in cls.all_properties)