Example #1
0
    def put_bundle(self,
                   replica: Replica,
                   bundle_uuid: str,
                   files: typing.Iterable[typing.Tuple[str, str, str]],
                   bundle_version: typing.Optional[str] = None,
                   expected_code: int = requests.codes.created):
        builder = UrlBuilder().set(path="/v1/bundles/" +
                                   bundle_uuid).add_query(
                                       "replica", replica.name)
        if bundle_version:
            builder.add_query("version", bundle_version)
        url = str(builder)

        resp_obj = self.assertPutResponse(
            url,
            expected_code,
            json_request_body=dict(
                files=[
                    dict(
                        uuid=file_uuid,
                        version=file_version,
                        name=file_name,
                        indexed=False,
                    ) for file_uuid, file_version, file_name in files
                ],
                creator_uid=12345,
            ),
        )

        if 200 <= resp_obj.response.status_code < 300:
            self.assertHeaders(resp_obj.response, {
                'content-type': "application/json",
            })
            self.assertIn('version', resp_obj.json)
        return resp_obj
Example #2
0
    def upload_file(
        self: typing.Any,
        source_url: str,
        file_uuid: str,
        bundle_uuid: str = None,
        version: str = None,
        expected_code: int = requests.codes.created,
    ):
        bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid
        if version is None:
            timestamp = datetime.datetime.utcnow()
            version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ")

        urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid)
        if version != 'missing':
            urlbuilder.add_query("version", version)

        resp_obj = self.assertPutResponse(str(urlbuilder),
                                          expected_code,
                                          json_request_body=dict(
                                              bundle_uuid=bundle_uuid,
                                              creator_uid=0,
                                              source_url=source_url,
                                          ),
                                          headers=get_auth_header())
        if resp_obj.response.status_code == requests.codes.created:
            self.assertHeaders(resp_obj.response, {
                'content-type': "application/json",
            })
            self.assertIn('version', resp_obj.json)
    def upload_file(self, contents):
        s3_test_bucket = get_env("DSS_S3_BUCKET_TEST")
        src_key = generate_test_key()
        s3 = boto3.resource('s3')
        with io.BytesIO(json.dumps(
                contents).encode()) as fh, ChecksummingSink() as sink:
            sink.write(fh.read())
            sums = sink.get_checksums()
            metadata = {
                'hca-dss-crc32c': sums['crc32c'].lower(),
                'hca-dss-s3_etag': sums['s3_etag'].lower(),
                'hca-dss-sha1': sums['sha1'].lower(),
                'hca-dss-sha256': sums['sha256'].lower()
            }
            fh.seek(0)
            # TODO: consider switching to unmanaged uploader (putobject w/blob)
            s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj(
                fh, ExtraArgs={"Metadata": metadata})
        source_url = f"s3://{s3_test_bucket}/{src_key}"
        file_uuid = str(uuid4())
        version = datetime_to_version_format(datetime.utcnow())
        urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid)
        urlbuilder.add_query("version", version)

        resp_obj = self.assertPutResponse(str(urlbuilder),
                                          requests.codes.created,
                                          json_request_body=dict(
                                              creator_uid=0,
                                              source_url=source_url))
        return file_uuid, resp_obj.json["version"]
Example #4
0
    def upload_file_wait(
        self: typing.Any,
        source_url: str,
        replica: Replica,
        file_uuid: str = None,
        file_version: str = None,
        bundle_uuid: str = None,
        timeout_seconds: int = 120,
        expect_async: typing.Optional[bool] = None,
    ) -> DSSAssertResponse:
        """
        Upload a file.  If the request is being handled asynchronously, wait until the file has landed in the data
        store.
        """
        file_uuid = str(uuid.uuid4()) if file_uuid is None else file_uuid
        bundle_uuid = str(uuid.uuid4()) if bundle_uuid is None else bundle_uuid
        if expect_async is True:
            expected_codes = requests.codes.accepted
        elif expect_async is False:
            expected_codes = requests.codes.created
        else:
            expected_codes = requests.codes.created, requests.codes.accepted

        if file_version is None:
            timestamp = datetime.datetime.utcnow()
            file_version = datetime_to_version_format(timestamp)
        url = UrlBuilder().set(path=f"/v1/files/{file_uuid}")
        url.add_query("version", file_version)

        resp_obj = self.assertPutResponse(
            str(url),
            expected_codes,
            json_request_body=dict(
                bundle_uuid=bundle_uuid,
                creator_uid=0,
                source_url=source_url,
            ),
        )

        if resp_obj.response.status_code == requests.codes.accepted:
            # hit the GET /files endpoint until we succeed.
            start_time = time.time()
            timeout_time = start_time + timeout_seconds

            while time.time() < timeout_time:
                try:
                    self.assertHeadResponse(
                        f"/v1/files/{file_uuid}?replica={replica.name}",
                        requests.codes.ok)
                    break
                except AssertionError:
                    pass

                time.sleep(1)
            else:
                self.fail("Could not find the output file")

        return resp_obj
Example #5
0
 def _tombstone_bundle(self,
                       replica: Replica,
                       bundle_uuid: str,
                       bundle_version: str = None):
     builder = UrlBuilder().set(path="/v1/bundles/" +
                                bundle_uuid).add_query(
                                    "replica", replica.name)
     if bundle_version:
         builder.add_query("version", bundle_version)
     url = str(builder)
     self.assertDeleteResponse(
         url,
         requests.codes.ok,
         json_request_body={'reason': "notification test"},
         headers=get_auth_header())
    def delete_bundle(
            self,
            replica: Replica,
            bundle_uuid: str,
            bundle_version: typing.Optional[str]=None,
            authorized: bool=True):
        # make delete request
        url_builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid).add_query('replica', replica.name)
        if bundle_version:
            url_builder = url_builder.add_query('version', bundle_version)
        url = str(url_builder)

        json_request_body = dict(reason="reason")
        if bundle_version:
            json_request_body['version'] = bundle_version

        expected_code = requests.codes.ok if authorized else requests.codes.forbidden

        # delete and check results
        return self.assertDeleteResponse(
            url,
            expected_code,
            json_request_body=json_request_body,
            headers=get_auth_header(authorized=authorized),
        )
Example #7
0
    def _test_put_auth_errors(self, scheme, test_bucket):
        src_key = generate_test_key()
        source_url = f"{scheme}://{test_bucket}/{src_key}"

        file_uuid = str(uuid.uuid4())
        bundle_uuid = str(uuid.uuid4())
        timestamp = datetime.datetime.utcnow()
        version = timestamp.strftime("%Y-%m-%dT%H%M%S.%fZ")

        urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid)
        urlbuilder.add_query("version", version)
        self._test_auth_errors('put',
                               str(urlbuilder),
                               json_request_body=dict(bundle_uuid=bundle_uuid,
                                                      creator_uid=0,
                                                      source_url=source_url))
Example #8
0
 def build_url(self, url_params=None):
     url = UrlBuilder().set(path="/v1/search").add_query(
         "replica", self.replica.name)
     if url_params:
         for param in url_params:
             url = url.add_query(param, url_params[param])
     return str(url)
Example #9
0
    def upload_file(app, contents, replica):
        src_key = generate_test_key()
        encoded = json.dumps(contents).encode()
        chunk_size = get_s3_multipart_chunk_size(len(encoded))
        with io.BytesIO(encoded) as fh, ChecksummingSink(
                write_chunk_size=chunk_size) as sink:
            sink.write(fh.read())
            sums = sink.get_checksums()
            metadata = {
                'hca-dss-crc32c': sums['crc32c'].lower(),
                'hca-dss-s3_etag': sums['s3_etag'].lower(),
                'hca-dss-sha1': sums['sha1'].lower(),
                'hca-dss-sha256': sums['sha256'].lower()
            }
            fh.seek(0)

            if replica == 'gcp':
                gs_test_bucket = get_env("DSS_GS_BUCKET_TEST")
                gcp_client = gs_storage.Client.from_service_account_json(
                    os.getenv("GOOGLE_APPLICATION_CREDENTIALS"))
                gs_bucket = gcp_client.bucket(gs_test_bucket)
                blob = gs_bucket.blob(src_key)
                blob.upload_from_file(fh, content_type="application/json")
                blob.metadata = metadata
                blob.patch()
                source_url = f"gs://{gs_test_bucket}/{src_key}"

            if replica == 'aws':
                # TODO: consider switching to unmanaged uploader (putobject w/blob)
                s3_test_bucket = get_env("DSS_S3_BUCKET_TEST")
                s3 = boto3.resource('s3')
                s3.Bucket(s3_test_bucket).Object(src_key).upload_fileobj(
                    fh, ExtraArgs={"Metadata": metadata})
                source_url = f"s3://{s3_test_bucket}/{src_key}"

        file_uuid = str(uuid4())
        version = datetime_to_version_format(datetime.utcnow())
        urlbuilder = UrlBuilder().set(path='/v1/files/' + file_uuid)
        urlbuilder.add_query("version", version)

        resp_obj = app.put(str(urlbuilder),
                           json=dict(creator_uid=0, source_url=source_url),
                           headers=get_auth_header())
        resp_obj.raise_for_status()
        return file_uuid, resp_obj.json()["version"]
Example #10
0
    def fetch_collection_paging_response(self, codes, replica: str,
                                         per_page: int):
        """
        GET /collections and iterate through the paging responses containing all of a user's collections.

        If fetch_all is not True, this will return as soon as it gets one successful 206 paging reply.
        """
        url = UrlBuilder().set(path="/v1/collections/")
        url.add_query("replica", replica)
        url.add_query("per_page", str(per_page))
        resp_obj = self.assertGetResponse(
            str(url), codes, headers=get_auth_header(authorized=True))

        if codes == requests.codes.bad_request:
            return True

        link_header = resp_obj.response.headers.get('Link')
        paging_response = False

        while link_header:
            # Make sure we're getting the expected response status code
            self.assertEqual(resp_obj.response.status_code,
                             requests.codes.partial)
            paging_response = True
            link = parse_header_links(link_header)[0]
            self.assertEquals(link['rel'], 'next')
            parsed = urlsplit(link['url'])
            url = UrlBuilder().set(path=parsed.path,
                                   query=parse_qsl(parsed.query),
                                   fragment=parsed.fragment)
            self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'],
                             'true')
            self.assertEqual(
                resp_obj.response.headers['X-OpenAPI-Paginated-Content-Key'],
                'collections')
            resp_obj = self.assertGetResponse(
                str(url),
                expected_code=codes,
                headers=get_auth_header(authorized=True))
            link_header = resp_obj.response.headers.get('Link')

        self.assertEqual(resp_obj.response.headers['X-OpenAPI-Pagination'],
                         'false')
        self.assertEqual(resp_obj.response.status_code, requests.codes.ok)
        return paging_response
Example #11
0
    def test_file_put_large_incorrect_s3_etag(self) -> None:
        bucket = self.s3_test_bucket
        src_key = generate_test_key()
        src_data = os.urandom(ASYNC_COPY_THRESHOLD + 1)

        # upload file with incompatible s3 part size
        self._upload_file_to_mock_ingest(S3Uploader,
                                         bucket,
                                         src_key,
                                         src_data,
                                         s3_part_size=6 * 1024 * 1024)

        file_uuid = str(uuid.uuid4())
        timestamp = datetime.datetime.utcnow()
        file_version = datetime_to_version_format(timestamp)
        url = UrlBuilder().set(path=f"/v1/files/{file_uuid}")
        url.add_query("version", file_version)
        source_url = f"s3://{bucket}/{src_key}"

        # put file into DSS, starting an async copy which will fail
        expected_codes = requests.codes.accepted,
        self.assertPutResponse(str(url),
                               expected_codes,
                               json_request_body=dict(
                                   file_uuid=file_uuid,
                                   creator_uid=0,
                                   source_url=source_url,
                               ),
                               headers=get_auth_header())

        # should eventually get unprocessable after async copy fails
        @eventually(120, 1)
        def tryHead():
            self.assertHeadResponse(
                f"/v1/files/{file_uuid}?replica=aws&version={file_version}",
                requests.codes.unprocessable)

        tryHead()

        # should get unprocessable on GCP too
        self.assertHeadResponse(
            f"/v1/files/{file_uuid}?replica=gcp&version={file_version}",
            requests.codes.unprocessable)
Example #12
0
    def put_bundles_reponse(self, path, replica, expected_code):
        """
        Uploads a file from fixtures to the dss, and then adds it to a bundle with the 'path' name.
        Asserts expected codes were received at each point.
        """
        fixtures_bucket = self.get_test_fixture_bucket(
            replica.name)  # source a file to upload
        file_version = datetime_to_version_format(datetime.datetime.utcnow())
        bundle_version = datetime_to_version_format(datetime.datetime.utcnow())
        bundle_uuid = str(uuid.uuid4())
        file_uuid = str(uuid.uuid4())
        storage_schema = 's3' if replica.name == 'aws' else 'gs'

        # upload a file from test fixtures
        self.upload_file_wait(
            f"{storage_schema}://{fixtures_bucket}/test_good_source_data/0",
            replica,
            file_uuid,
            file_version=file_version,
            bundle_uuid=bundle_uuid)

        # add that file to a bundle
        builder = UrlBuilder().set(path="/v1/bundles/" + bundle_uuid)
        builder.add_query("replica", replica.name)
        builder.add_query("version", bundle_version)
        url = str(builder)

        self.assertPutResponse(url,
                               expected_code,
                               json_request_body=dict(
                                   files=[
                                       dict(uuid=file_uuid,
                                            version=file_version,
                                            name=path,
                                            indexed=False)
                                   ],
                                   creator_uid=0,
                               ),
                               headers=get_auth_header())
    def test_add_query(self):
        builder = UrlBuilder().set(scheme="https",
                                   netloc="humancellatlas.org",
                                   path="/abc",
                                   query=[
                                       ("ghi", "1"),
                                       ("ghi", "2"),
                                   ],
                                   fragment="def")
        self.assertTrue(builder.has_query("ghi"))
        self.assertFalse(builder.has_query("abc"))

        self.assertEqual("https://humancellatlas.org/abc?ghi=1&ghi=2#def",
                         str(builder))

        builder.add_query("abc", "3")
        self.assertTrue(builder.has_query("ghi"))
        self.assertTrue(builder.has_query("abc"))

        self.assertEqual(
            "https://humancellatlas.org/abc?ghi=1&ghi=2&abc=3#def",
            str(builder))