def wait_for_role(sts: STSClient, role_arn: str) -> None: """ Verify that it is possible to assume the given role In practice this always seems to take less than 10 seconds, but give it up to 90 to reduce any chance of flakiness. """ for i in range(90, 0, -1): try: sts.assume_role(RoleArn=role_arn, RoleSessionName="mzcomposevalidatecreated") except Exception as e: if i % 10 == 0: print(f"Unable to assume role, {i} seconds remaining: {e}") time.sleep(1) continue print(f"Successfully assumed role {role_arn}") break else: raise UIError("Never able to assume role")
def should_batch_copy_files_to_storage( s3_client: S3Client, s3_control_client: S3ControlClient, sts_client: STSClient, subtests: SubTests, ) -> None: # pylint: disable=too-many-locals # Given two metadata files with an asset each, all within a prefix original_prefix = any_safe_filename() root_asset_name = any_asset_name() root_asset_filename = any_safe_filename() root_asset_content = any_file_contents() root_asset_multihash = sha256_hex_digest_to_multihash( sha256(root_asset_content).hexdigest()) child_asset_name = any_asset_name() child_asset_filename = any_safe_filename() child_asset_content = any_file_contents() child_asset_multihash = sha256_hex_digest_to_multihash( sha256(child_asset_content).hexdigest()) root_metadata_filename = any_safe_filename() child_metadata_filename = any_safe_filename() with S3Object( BytesIO(initial_bytes=root_asset_content), ResourceName.STAGING_BUCKET_NAME.value, f"{original_prefix}/{root_asset_filename}", ) as root_asset_s3_object, S3Object( BytesIO(initial_bytes=child_asset_content), ResourceName.STAGING_BUCKET_NAME.value, f"{original_prefix}/{child_asset_filename}", ) as child_asset_s3_object, S3Object( BytesIO(initial_bytes=dumps({ **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), STAC_ASSETS_KEY: { child_asset_name: { STAC_HREF_KEY: child_asset_s3_object.url, STAC_FILE_CHECKSUM_KEY: child_asset_multihash, } }, }).encode()), ResourceName.STAGING_BUCKET_NAME.value, f"{original_prefix}/{child_metadata_filename}", ) as child_metadata_s3_object, S3Object( BytesIO(initial_bytes=dumps({ **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), STAC_ASSETS_KEY: { root_asset_name: { STAC_HREF_KEY: root_asset_s3_object.url, STAC_FILE_CHECKSUM_KEY: root_asset_multihash, }, }, STAC_LINKS_KEY: [{ STAC_HREF_KEY: child_metadata_s3_object.url, "rel": "child" }], }).encode()), ResourceName.STAGING_BUCKET_NAME.value, f"{original_prefix}/{root_metadata_filename}", ) as root_metadata_s3_object, Dataset() as dataset: version_id = any_dataset_version_id() asset_id = (f"{DATASET_ID_PREFIX}{dataset.dataset_id}" f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}") with ProcessingAsset( asset_id=asset_id, url=root_metadata_s3_object.url), ProcessingAsset( asset_id=asset_id, url=child_metadata_s3_object.url), ProcessingAsset( asset_id=asset_id, url=root_asset_s3_object.url, multihash=root_asset_multihash), ProcessingAsset( asset_id=asset_id, url=child_asset_s3_object.url, multihash=child_asset_multihash): # When try: response = lambda_handler( { DATASET_ID_KEY: dataset.dataset_id, DATASET_PREFIX_KEY: dataset.dataset_prefix, VERSION_ID_KEY: version_id, METADATA_URL_KEY: root_metadata_s3_object.url, }, any_lambda_context(), ) account_id = sts_client.get_caller_identity()["Account"] metadata_copy_job_result, asset_copy_job_result = wait_for_copy_jobs( response, account_id, s3_control_client, subtests, ) finally: # Then new_prefix = ( f"{dataset.title}{DATASET_KEY_SEPARATOR}{dataset.dataset_id}/{version_id}" ) storage_bucket_prefix = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/" new_root_metadata_key = f"{new_prefix}/{root_metadata_filename}" expected_root_metadata = dumps({ **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), STAC_ASSETS_KEY: { root_asset_name: { STAC_HREF_KEY: root_asset_filename, STAC_FILE_CHECKSUM_KEY: root_asset_multihash, }, }, STAC_LINKS_KEY: [{ STAC_HREF_KEY: child_metadata_filename, "rel": "child" }], }).encode() with subtests.test(msg="Root metadata content"), smart_open( f"{storage_bucket_prefix}{new_root_metadata_key}" ) as new_root_metadata_file: assert expected_root_metadata == new_root_metadata_file.read( ) with subtests.test(msg="Delete root metadata object"): delete_s3_key(ResourceName.STORAGE_BUCKET_NAME.value, new_root_metadata_key, s3_client) new_child_metadata_key = f"{new_prefix}/{child_metadata_filename}" expected_child_metadata = dumps({ **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), STAC_ASSETS_KEY: { child_asset_name: { STAC_HREF_KEY: child_asset_filename, STAC_FILE_CHECKSUM_KEY: child_asset_multihash, } }, }).encode() with subtests.test(msg="Child metadata content"), smart_open( f"{storage_bucket_prefix}{new_child_metadata_key}" ) as new_child_metadata_file: assert expected_child_metadata == new_child_metadata_file.read( ) with subtests.test(msg="Delete child metadata object"): delete_s3_key(ResourceName.STORAGE_BUCKET_NAME.value, new_child_metadata_key, s3_client) # Then the root asset file is in the root prefix with subtests.test(msg="Delete root asset object"): delete_s3_key( ResourceName.STORAGE_BUCKET_NAME.value, f"{new_prefix}/{root_asset_filename}", s3_client, ) # Then the child asset file is in the root prefix with subtests.test(msg="Delete child asset object"): delete_s3_key( ResourceName.STORAGE_BUCKET_NAME.value, f"{new_prefix}/{child_asset_filename}", s3_client, ) # Cleanup delete_copy_job_files( metadata_copy_job_result, asset_copy_job_result, ResourceName.STORAGE_BUCKET_NAME.value, s3_client, subtests, )
def should_successfully_run_dataset_version_creation_process_with_single_asset( # pylint:disable=too-many-arguments self, step_functions_client: SFNClient, lambda_client: LambdaClient, s3_client: S3Client, s3_control_client: S3ControlClient, sts_client: STSClient, subtests: SubTests, ) -> None: # pylint: disable=too-many-locals key_prefix = any_safe_file_path() root_metadata_filename = any_safe_filename() child_metadata_filename = any_safe_filename() asset_contents = any_file_contents() asset_filename = any_safe_filename() with S3Object( file_object=BytesIO(initial_bytes=asset_contents), bucket_name=self.staging_bucket_name, key=f"{key_prefix}/{asset_filename}", ) as asset_s3_object, S3Object( file_object=json_dict_to_file_object( { **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), STAC_ASSETS_KEY: { any_asset_name(): { STAC_HREF_KEY: asset_s3_object.url, STAC_FILE_CHECKSUM_KEY: sha256_hex_digest_to_multihash( sha256(asset_contents).hexdigest() ), }, }, } ), bucket_name=self.staging_bucket_name, key=("{}/{}".format(key_prefix, child_metadata_filename)), ) as child_metadata_file, S3Object( file_object=json_dict_to_file_object( { **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), STAC_LINKS_KEY: [ {STAC_HREF_KEY: child_metadata_file.url, STAC_REL_KEY: STAC_REL_CHILD} ], } ), bucket_name=self.staging_bucket_name, key=("{}/{}".format(key_prefix, root_metadata_filename)), ) as root_metadata_file, Dataset() as dataset: # When try: resp = lambda_client.invoke( FunctionName=ResourceName.DATASET_VERSIONS_ENDPOINT_FUNCTION_NAME.value, Payload=json.dumps( { HTTP_METHOD_KEY: "POST", BODY_KEY: { DATASET_ID_SHORT_KEY: dataset.dataset_id, METADATA_URL_KEY: root_metadata_file.url, }, } ).encode(), ) json_resp = json.load(resp["Payload"]) with subtests.test(msg="Dataset Versions endpoint returns success"): assert json_resp.get(STATUS_CODE_KEY) == HTTPStatus.CREATED, json_resp with subtests.test(msg="Should complete Step Function successfully"): LOGGER.info("Executed State Machine: %s", json_resp) # Then poll for State Machine State while ( execution := step_functions_client.describe_execution( executionArn=json_resp[BODY_KEY][EXECUTION_ARN_KEY] ) )["status"] == "RUNNING": LOGGER.info( # pragma: no cover "Polling for State Machine state %s", "." * 6 ) time.sleep(5) # pragma: no cover assert (execution_output := execution.get("output")), execution account_id = sts_client.get_caller_identity()["Account"] import_dataset_response = json.loads(execution_output)[IMPORT_DATASET_KEY] metadata_copy_job_result, asset_copy_job_result = wait_for_copy_jobs( import_dataset_response, account_id, s3_control_client, subtests, ) finally:
def should_successfully_run_dataset_version_creation_process_with_multiple_assets( # pylint:disable=too-many-arguments self, step_functions_client: SFNClient, lambda_client: LambdaClient, s3_client: S3Client, s3_control_client: S3ControlClient, sts_client: STSClient, subtests: SubTests, ) -> None: # pylint: disable=too-many-locals key_prefix = any_safe_file_path() collection_metadata_filename = any_safe_filename() catalog_metadata_filename = any_safe_filename() item_metadata_filename = any_safe_filename() collection_metadata_url = ( f"s3://{self.staging_bucket_name}/{key_prefix}/{collection_metadata_filename}" ) catalog_metadata_url = ( f"s3://{self.staging_bucket_name}/{key_prefix}/{catalog_metadata_filename}" ) item_metadata_url = f"s3://{self.staging_bucket_name}/{key_prefix}/{item_metadata_filename}" first_asset_contents = any_file_contents() first_asset_filename = any_safe_filename() second_asset_contents = any_file_contents() second_asset_filename = any_safe_filename() with S3Object( file_object=BytesIO(initial_bytes=first_asset_contents), bucket_name=self.staging_bucket_name, key=f"{key_prefix}/{first_asset_filename}", ) as first_asset_s3_object, S3Object( file_object=BytesIO(initial_bytes=second_asset_contents), bucket_name=self.staging_bucket_name, key=f"{key_prefix}/{second_asset_filename}", ) as second_asset_s3_object, S3Object( file_object=json_dict_to_file_object({ **deepcopy(MINIMAL_VALID_STAC_CATALOG_OBJECT), "links": [ { "href": collection_metadata_url, "rel": "child" }, { "href": catalog_metadata_url, "rel": "root" }, { "href": catalog_metadata_url, "rel": "self" }, ], }), bucket_name=self.staging_bucket_name, key=f"{key_prefix}/{catalog_metadata_filename}", ) as catalog_metadata_file, S3Object( file_object=json_dict_to_file_object({ **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT), "assets": { any_asset_name(): { "href": second_asset_s3_object.url, "file:checksum": sha256_hex_digest_to_multihash( sha256(second_asset_contents).hexdigest()), }, }, "links": [ { "href": item_metadata_url, "rel": "child" }, { "href": catalog_metadata_url, "rel": "root" }, { "href": collection_metadata_url, "rel": "self" }, ], }), bucket_name=self.staging_bucket_name, key=f"{key_prefix}/{collection_metadata_filename}", ), S3Object( file_object=json_dict_to_file_object({ **deepcopy(MINIMAL_VALID_STAC_ITEM_OBJECT), "assets": { any_asset_name(): { "href": first_asset_s3_object.url, "file:checksum": sha256_hex_digest_to_multihash( sha256(first_asset_contents).hexdigest()), }, }, "links": [ { "href": catalog_metadata_url, "rel": "root" }, { "href": item_metadata_url, "rel": "self" }, ], }), bucket_name=self.staging_bucket_name, key=f"{key_prefix}/{item_metadata_filename}", ), Dataset() as dataset: # When try: resp = lambda_client.invoke( FunctionName=ResourceName. DATASET_VERSIONS_ENDPOINT_FUNCTION_NAME.value, Payload=json.dumps({ "httpMethod": "POST", "body": { "id": dataset.dataset_id, "metadata-url": catalog_metadata_file.url, }, }).encode(), ) json_resp = json.load(resp["Payload"]) with subtests.test( msg="Dataset Versions endpoint returns success"): assert json_resp.get( "statusCode") == HTTPStatus.CREATED, json_resp with subtests.test( msg="Should complete Step Function successfully"): LOGGER.info("Executed State Machine: %s", json_resp) # Then poll for State Machine State while (execution := step_functions_client.describe_execution( executionArn=json_resp["body"]["execution_arn"]) )["status"] == "RUNNING": LOGGER.info("Polling for State Machine state %s", "." * 6) time.sleep(5) assert execution["status"] == "SUCCEEDED", execution assert (execution_output := execution.get("output")), execution account_id = sts_client.get_caller_identity()["Account"] import_dataset_response = json.loads( execution_output)["import_dataset"] metadata_copy_job_result, asset_copy_job_result = wait_for_copy_jobs( import_dataset_response, account_id, s3_control_client, subtests) finally: