Beispiel #1
0
    def _test_file_get_not_found(self, replica: Replica):
        file_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee"

        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
            "replica", replica.name))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertGetResponse(url,
                                   requests.codes.not_found,
                                   headers=get_auth_header(),
                                   expected_error=ExpectedErrorFields(
                                       code="not_found",
                                       status=requests.codes.not_found,
                                       expect_stacktrace=True))

        version = "2017-06-16T193604.240704Z"
        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
            "replica", replica.name).add_query("version", version))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertGetResponse(url,
                                   requests.codes.not_found,
                                   headers=get_auth_header(),
                                   expected_error=ExpectedErrorFields(
                                       code="not_found",
                                       status=requests.codes.not_found,
                                       expect_stacktrace=True))
    def _test_bundle_get_not_found(self, replica: Replica):
        bundle_uuid = str(uuid.uuid4())

        url = str(UrlBuilder()
                  .set(path="/v1/bundles/" + bundle_uuid)
                  .add_query("replica", replica.name))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertGetResponse(
                url,
                requests.codes.not_found,
                expected_error=ExpectedErrorFields(
                    code="not_found",
                    status=requests.codes.not_found)
            )

        version = "2017-06-16T193604.240704Z"
        url = str(UrlBuilder()
                  .set(path="/v1/bundles/" + bundle_uuid)
                  .add_query("replica", replica.name)
                  .add_query("version", version))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertGetResponse(
                url,
                requests.codes.not_found,
                expected_error=ExpectedErrorFields(
                    code="not_found",
                    status=requests.codes.not_found)
            )
Beispiel #3
0
 def _test_file_head(self, replica: Replica):
     file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd"
     version = "2017-06-16T193604.240704Z"
     headers = {
         'X-DSS-CREATOR-UID':
         '4321',
         'X-DSS-VERSION':
         version,
         'X-DSS-CONTENT-TYPE':
         'text/plain',
         'X-DSS-SIZE':
         '11358',
         'X-DSS-CRC32C':
         'e16e07b9',
         'X-DSS-S3-ETAG':
         '3b83ef96387f14655fc854ddc3c6bd57',
         'X-DSS-SHA1':
         '2b8b815229aa8a61e483fb4ba0588b8b6c491890',
         'X-DSS-SHA256':
         'cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30',
     }
     url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
         "replica", replica.name).add_query("version", version))
     with override_bucket_config(BucketConfig.TEST_FIXTURE):
         resp_obj = self.assertHeadResponse(url, [requests.codes.ok],
                                            headers=get_auth_header())
         self.assertHeaders(resp_obj.response, headers)
Beispiel #4
0
    def _test_file_get_latest(self, replica: Replica):
        """
        Verify we can successfully fetch the latest version of a file UUID.
        """
        file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd"

        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
            "replica", replica.name))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            resp_obj = self.assertGetResponse(
                url,
                requests.codes.found,
                headers=get_auth_header(),
                redirect_follow_retries=FILE_GET_RETRY_COUNT,
                min_retry_interval_header=RETRY_AFTER_INTERVAL,
                override_retry_interval=1,
            )

            # TODO: (ttung) verify more of the headers
            url = resp_obj.response.headers['Location']
            sha1 = resp_obj.response.headers['X-DSS-SHA1']
            data = requests.get(url)
            self.assertEqual(len(data.content), 8685)
            self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], '8685')

            # verify that the downloaded data matches the stated checksum
            hasher = hashlib.sha1()
            hasher.update(data.content)
            self.assertEqual(hasher.hexdigest(), sha1)
    def _test_bundle_get(self, replica: Replica):
        bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198"
        version = "2017-06-20T214506.766634Z"

        url = str(UrlBuilder()
                  .set(path="/v1/bundles/" + bundle_uuid)
                  .add_query("replica", replica.name)
                  .add_query("version", version))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            resp_obj = self.assertGetResponse(
                url,
                requests.codes.ok)

        self.assertEqual(resp_obj.json['bundle']['uuid'], bundle_uuid)
        self.assertEqual(resp_obj.json['bundle']['version'], version)
        self.assertEqual(resp_obj.json['bundle']['creator_uid'], 12345)
        self.assertEqual(resp_obj.json['bundle']['files'][0]['content-type'], "text/plain")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['size'], 11358)
        self.assertEqual(resp_obj.json['bundle']['files'][0]['crc32c'], "e16e07b9")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['name'], "LICENSE")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['s3_etag'], "3b83ef96387f14655fc854ddc3c6bd57")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['sha1'], "2b8b815229aa8a61e483fb4ba0588b8b6c491890")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['sha256'],
                         "cfc7749b96f63bd31c3c42b5c471bf756814053e847c10f3eb003417bc523d30")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['uuid'], "ce55fd51-7833-469b-be0b-5da88ebebfcd")
        self.assertEqual(resp_obj.json['bundle']['files'][0]['version'], "2017-06-16T193604.240704Z")
    def _test_file_get_latest(self, replica: Replica):
        """
        Verify we can successfully fetch the latest version of a file UUID.
        """
        file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd"

        url = str(UrlBuilder()
                  .set(path="/v1/files/" + file_uuid)
                  .add_query("replica", replica.name))

        for i in range(FILE_GET_RETRY_COUNT):
            with override_bucket_config(BucketConfig.TEST_FIXTURE):
                resp_obj = self.assertGetResponse(
                    url,
                    [requests.codes.found, requests.codes.moved]
                )
                if resp_obj.response.status_code == requests.codes.found:
                    url = resp_obj.response.headers['Location']
                    sha1 = resp_obj.response.headers['X-DSS-SHA1']
                    data = requests.get(url)
                    self.assertEqual(len(data.content), 8685)
                    self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], '8685')

                    # verify that the downloaded data matches the stated checksum
                    hasher = hashlib.sha1()
                    hasher.update(data.content)
                    self.assertEqual(hasher.hexdigest(), sha1)

                    # TODO: (ttung) verify more of the headers
                    return
                elif resp_obj.response.status_code == requests.codes.moved:
                    retryAfter = int(resp_obj.response.headers['Retry-After'])
                    self.assertEqual(retryAfter, RETRY_AFTER_INTERVAL)
                    self.assertIn(url, resp_obj.response.headers['Location'])
        self.fail(f"Failed after {FILE_GET_RETRY_COUNT} retries.")
    def _test_bundle_get_directaccess(self, replica: Replica):
        schema = replica.storage_schema

        bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198"
        version = "2017-06-20T214506.766634Z"

        url = str(UrlBuilder()
                  .set(path="/v1/bundles/" + bundle_uuid)
                  .add_query("replica", replica.name)
                  .add_query("version", version)
                  .add_query("directurls", "true"))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            resp_obj = self.assertGetResponse(
                url,
                requests.codes.ok)

        url = resp_obj.json['bundle']['files'][0]['url']
        splitted = urllib.parse.urlparse(url)
        self.assertEqual(splitted.scheme, schema)
        bucket = splitted.netloc
        key = splitted.path[1:]  # ignore the / part of the path.

        handle = Config.get_blobstore_handle(replica)
        contents = handle.get(bucket, key)

        hasher = hashlib.sha1()
        hasher.update(contents)
        sha1 = hasher.hexdigest()
        self.assertEqual(sha1, "2b8b815229aa8a61e483fb4ba0588b8b6c491890")
 def test_manifest_files(self):
     bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198"
     version = "2017-06-20T214506.766634Z"
     replica = Replica.aws
     file_count = 0
     with override_bucket_config(BucketConfig.TEST_FIXTURE):
         for _ in get_manifest_files(bundle_uuid, version, replica):
             file_count += 1
     self.assertEqual(file_count, 1)
 def test_status_fail(self):
     for replica in Replica:
         exec_arn = self.launch_checkout('e47114c9-bb96-480f-b6f5-c3e07aae399f', replica)
         url = str(UrlBuilder().set(path="/v1/bundles/checkout/" + exec_arn))
         with override_bucket_config(BucketConfig.TEST_FIXTURE):
             resp_obj = self.assertGetResponse(
                 url,
                 requests.codes.ok
             )
         status = resp_obj.json.get('status')
         self.assertIsNotNone(status)
         self.assertIn(status, ['RUNNING', 'FAILED'])
 def test_status_success(self):
     for replica in Replica:
         exec_arn = self.launch_checkout(replica.checkout_bucket, replica)
         url = str(UrlBuilder().set(path="/v1/bundles/checkout/" + exec_arn))
         with override_bucket_config(BucketConfig.TEST_FIXTURE):
             resp_obj = self.assertGetResponse(
                 url,
                 requests.codes.ok
             )
         status = resp_obj.json.get('status')
         self.assertIsNotNone(status)
         self.assertIn(status, ['RUNNING', 'SUCCEEDED'])
    def _test_file_head(self, replica: Replica):
        file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd"
        version = "2017-06-16T193604.240704Z"

        url = str(UrlBuilder()
                  .set(path="/v1/files/" + file_uuid)
                  .add_query("replica", replica.name)
                  .add_query("version", version))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertHeadResponse(
                url,
                [requests.codes.ok, requests.codes.moved]
            )
 def setUpClass(cls):
     cls.app = ThreadedLocalServer()
     cls.app.start()
     cls.bundles = {replica.name: list() for replica in Replica}
     with override_bucket_config(BucketConfig.TEST):
         for replica in Replica:
             pfx = f"flashflood-{replica.name}-{uuid4()}"
             os.environ[f'DSS_{replica.name.upper()}_FLASHFLOOD_PREFIX_READ'] = pfx
             os.environ[f'DSS_{replica.name.upper()}_FLASHFLOOD_PREFIX_WRITE'] = pfx
             for _ in range(3):
                 uuid, version = _upload_bundle(cls.app, replica)
                 cls.bundles[replica.name].append((uuid, version))
                 events.record_event_for_bundle(replica,
                                                f"bundles/{uuid}.{version}",
                                                use_version_for_timestamp=True)
Beispiel #13
0
 def _test_bundle_get_deleted(self, replica: Replica, bundle_uuid: str,
                              version: typing.Optional[str],
                              expected_version: typing.Optional[str]):
     with override_bucket_config(BucketConfig.TEST_FIXTURE):
         try:
             response = get_bundle_from_bucket(
                 uuid=bundle_uuid,
                 replica=replica,
                 version=version,
                 bucket=None,
             )
         except DSSException:
             response = dict()
     self.assertEquals(
         response['bundle']['version'] if 'bundle' in response else None,
         expected_version)
Beispiel #14
0
    def test_file_get_no_replica(self):
        """
        Verify we raise the correct error code when we provide no replica.
        """
        file_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee"

        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertGetResponse(url,
                                   requests.codes.bad_request,
                                   headers=get_auth_header(),
                                   expected_error=ExpectedErrorFields(
                                       code="illegal_arguments",
                                       status=requests.codes.bad_request,
                                       expect_stacktrace=True))
Beispiel #15
0
    def test_no_files(self):
        """
        Verify we raise the correct error code when we do not provide the list of files.
        """
        bundle_uuid = "ce55fd51-7833-469b-be0b-5da88ec0ffee"

        url = str(UrlBuilder().set(path="/v1/bundles/" +
                                   bundle_uuid).add_query("replica", "aws"))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            self.assertPutResponse(url,
                                   requests.codes.bad_request,
                                   json_request_body=dict(creator_uid=12345, ),
                                   expected_error=ExpectedErrorFields(
                                       code="illegal_arguments",
                                       status=requests.codes.bad_request,
                                       expect_stacktrace=True))
    def test_sanity_check_no_replica(self):
        bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198"
        version = "2017-06-20T214506.766634Z"
        for replica in Replica:
            request_body = {"destination": replica.checkout_bucket}

            url = str(UrlBuilder()
                      .set(path="/v1/bundles/" + bundle_uuid + "/checkout")
                      .add_query("replica", "")
                      .add_query("version", version))

            with override_bucket_config(BucketConfig.TEST_FIXTURE):
                self.assertPostResponse(
                    url,
                    requests.codes.bad_request,
                    request_body
                )
    def test_pre_execution_check_doesnt_exist(self):
        for replica in Replica:
            non_existent_bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf111"
            version = "2017-06-20T214506.766634Z"
            request_body = {"destination": replica.checkout_bucket}

            url = str(UrlBuilder()
                      .set(path="/v1/bundles/" + non_existent_bundle_uuid + "/checkout")
                      .add_query("replica", replica.name)
                      .add_query("version", version))

            with override_bucket_config(BucketConfig.TEST_FIXTURE):
                resp_obj = self.assertPostResponse(
                    url,
                    requests.codes.not_found,
                    request_body
                )
            self.assertEqual(resp_obj.json['code'], 'not_found')
    def launch_checkout(self, dst_bucket: str, replica: Replica) -> str:
        bundle_uuid = "011c7340-9b3c-4d62-bf49-090d79daf198"
        version = "2017-06-20T214506.766634Z"
        request_body = {"destination": dst_bucket}

        url = str(UrlBuilder()
                  .set(path="/v1/bundles/" + bundle_uuid + "/checkout")
                  .add_query("replica", replica.name)
                  .add_query("version", version))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            resp_obj = self.assertPostResponse(
                url,
                requests.codes.ok,
                request_body
            )
        execution_arn = resp_obj.json["checkout_job_id"]
        self.assertIsNotNone(execution_arn)

        return execution_arn
    def _test_file_size(self, replica: Replica, scheme: str, test_bucket: str, uploader: Uploader):
        src_key = generate_test_key()
        src_size = 1024 + int.from_bytes(os.urandom(1), byteorder='little')
        src_data = os.urandom(src_size)
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(src_data)
            fh.flush()

            uploader.checksum_and_upload_file(fh.name, src_key, "text/plain")

        source_url = f"{scheme}://{test_bucket}/{src_key}"

        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S.%fZ")

        self.upload_file(source_url, file_uuid, bundle_uuid=bundle_uuid, version=version)

        url = str(UrlBuilder()
                  .set(path="/v1/files/" + file_uuid)
                  .add_query("replica", replica.name))

        for i in range(FILE_GET_RETRY_COUNT):
            with override_bucket_config(BucketConfig.TEST):
                resp_obj = self.assertGetResponse(
                    url,
                    [requests.codes.found, requests.codes.moved]
                )
                if resp_obj.response.status_code == requests.codes.found:
                    url = resp_obj.response.headers['Location']
                    data = requests.get(url)
                    self.assertEqual(len(data.content), src_size)
                    self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'], str(src_size))
                    return
                elif resp_obj.response.status_code == requests.codes.moved:
                    retryAfter = int(resp_obj.response.headers['Retry-After'])
                    self.assertEqual(retryAfter, RETRY_AFTER_INTERVAL)
                    self.assertIn(url, resp_obj.response.headers['Location'])
        self.fail(f"Failed after {FILE_GET_RETRY_COUNT} retries.")
Beispiel #20
0
    def _test_file_size(self, replica: Replica, scheme: str, test_bucket: str,
                        uploader: Uploader):
        src_key = generate_test_key()
        src_size = 1024 + int.from_bytes(os.urandom(1), byteorder='little')
        src_data = os.urandom(src_size)
        with tempfile.NamedTemporaryFile(delete=True) as fh:
            fh.write(src_data)
            fh.flush()

            uploader.checksum_and_upload_file(fh.name, src_key, "text/plain")

        source_url = f"{scheme}://{test_bucket}/{src_key}"

        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        version = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%S.%fZ")

        self.upload_file(source_url,
                         file_uuid,
                         bundle_uuid=bundle_uuid,
                         version=version)

        url = str(UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
            "replica", replica.name))

        with override_bucket_config(BucketConfig.TEST):
            resp_obj = self.assertGetResponse(
                url,
                requests.codes.found,
                headers=get_auth_header(),
                redirect_follow_retries=FILE_GET_RETRY_COUNT,
                min_retry_interval_header=RETRY_AFTER_INTERVAL,
                override_retry_interval=1,
            )
            url = resp_obj.response.headers['Location']
            data = requests.get(url)
            self.assertEqual(len(data.content), src_size)
            self.assertEqual(resp_obj.response.headers['X-DSS-SIZE'],
                             str(src_size))
Beispiel #21
0
    def _test_file_get_disposition(self, replica: Replica):
        """
        Verify that passing in "content_disposition" returns the expected "Content-Disposition"
        header when fetching the final presigned url.
        """
        url = str(UrlBuilder().set(
            path="/v1/files/ce55fd51-7833-469b-be0b-5da88ebebfcd").add_query(
                "replica",
                replica.name).add_query("content_disposition",
                                        'attachment; filename=test-data.json'))

        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            resp_obj = self.assertGetResponse(
                url,
                requests.codes.found,
                headers=get_auth_header(),
                redirect_follow_retries=FILE_GET_RETRY_COUNT,
                min_retry_interval_header=RETRY_AFTER_INTERVAL,
                override_retry_interval=1)
            url = resp_obj.response.headers['Location']
            response = requests.get(url)
            self.assertEqual(response.headers['Content-Disposition'],
                             'attachment; filename=test-data.json')
Beispiel #22
0
    def _test_file_get_direct(self, replica: Replica):
        """
        Verify that the direct URL option works for GET/ file
        """
        file_uuid = "ce55fd51-7833-469b-be0b-5da88ebebfcd"
        handle = Config.get_blobstore_handle(replica)

        direct_url_req = str(
            UrlBuilder().set(path="/v1/files/" + file_uuid).add_query(
                "replica", replica.name).add_query("directurl", "True"))
        presigned_url_req = str(UrlBuilder().set(
            path="/v1/files/" + file_uuid).add_query("replica", replica.name))
        with override_bucket_config(BucketConfig.TEST_FIXTURE):
            native_resp_obj = self.assertGetResponse(
                direct_url_req,
                requests.codes.found,
                headers=get_auth_header(),
                redirect_follow_retries=FILE_GET_RETRY_COUNT,
                min_retry_interval_header=RETRY_AFTER_INTERVAL,
                override_retry_interval=1,
            )
            resp_obj = self.assertGetResponse(
                presigned_url_req,
                requests.codes.found,
                headers=get_auth_header(),
                redirect_follow_retries=FILE_GET_RETRY_COUNT,
                min_retry_interval_header=RETRY_AFTER_INTERVAL,
                override_retry_interval=1,
            )

            verify_headers = [
                'X-DSS-VERSION', 'X-DSS-CREATOR-UID', 'X-DSS-S3-ETAG',
                'X-DSS-SHA256', 'X-DSS-SHA1', 'X-DSS-CRC32C'
            ]
            native_headers_verify = {
                k: v
                for k, v in native_resp_obj.response.headers.items()
                if k in verify_headers
            }
            presigned_headers_verify = {
                k: v
                for k, v in resp_obj.response.headers.items()
                if k in verify_headers
            }
            self.assertDictEqual(native_headers_verify,
                                 presigned_headers_verify)

            with self.subTest(
                    'Retry-After headers are not included in a successful response.'
            ):
                self.assertEqual(
                    native_resp_obj.response.headers.get('Retry-After'), None)

            self.assertTrue(native_resp_obj.response.headers['Location'].split(
                '//')[0].startswith(replica.storage_schema))
            self.assertTrue(native_resp_obj.response.headers['Location'].split(
                '//')[1].startswith(replica.checkout_bucket))
            blob_path = native_resp_obj.response.headers['Location'].split(
                '/blobs/')[1]
            native_size = handle.get_size(replica.checkout_bucket,
                                          f'blobs/{blob_path}')
            self.assertGreater(native_size, 0)
            self.assertEqual(native_size,
                             int(resp_obj.response.headers['X-DSS-SIZE']))