コード例 #1
0
    def should_log_s3_batch_response(self, head_object_mock: MagicMock,
                                     create_job_mock: MagicMock) -> None:
        # Given

        create_job_mock.return_value = response = {"JobId": "Some Response"}
        expected_response_log = json.dumps({S3_BATCH_RESPONSE_KEY: response})
        head_object_mock.return_value = {"ETag": any_etag()}

        with Dataset() as dataset, patch.object(
                self.logger, "debug") as logger_mock, patch(
                    "backend.import_dataset.task.smart_open"):

            # When
            lambda_handler(
                {
                    DATASET_ID_KEY: dataset.dataset_id,
                    DATASET_PREFIX_KEY: dataset.dataset_prefix,
                    METADATA_URL_KEY: any_s3_url(),
                    VERSION_ID_KEY: any_dataset_version_id(),
                },
                any_lambda_context(),
            )

            # Then
            logger_mock.assert_any_call(expected_response_log)
コード例 #2
0
    def should_log_assets_added_to_manifest(
        self,
        head_object_mock: MagicMock,
        subtests: SubTests,
    ) -> None:
        # Given
        with Dataset() as dataset:
            version_id = any_dataset_version_id()
            asset_id = (f"{DATASET_ID_PREFIX}{dataset.dataset_id}"
                        f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}")
            head_object_mock.return_value = {"ETag": any_etag()}

            with ProcessingAsset(
                    asset_id=asset_id, multihash=None, url=any_s3_url()
            ) as metadata_processing_asset, ProcessingAsset(
                    asset_id=asset_id,
                    multihash=any_hex_multihash(),
                    url=any_s3_url(),
            ) as processing_asset, patch.object(
                    self.logger, "debug"
            ) as logger_mock, patch(
                    "backend.import_dataset.task.smart_open"
            ), patch(
                    "backend.import_dataset.task.S3CONTROL_CLIENT.create_job"):

                expected_asset_log = dumps(
                    {"Adding file to manifest": processing_asset.url})
                expected_metadata_log = dumps(
                    {"Adding file to manifest": metadata_processing_asset.url})

                # When
                lambda_handler(
                    {
                        DATASET_ID_KEY: dataset.dataset_id,
                        DATASET_PREFIX_KEY: dataset.dataset_prefix,
                        METADATA_URL_KEY: any_s3_url(),
                        VERSION_ID_KEY: version_id,
                    },
                    any_lambda_context(),
                )

                # Then
                with subtests.test():
                    logger_mock.assert_any_call(expected_asset_log)
                with subtests.test():
                    logger_mock.assert_any_call(expected_metadata_log)
コード例 #3
0
    def should_log_assets_added_to_manifest(
        self,
        head_object_mock: MagicMock,
        create_job_mock: MagicMock,  # pylint:disable=unused-argument
        subtests: SubTests,
    ) -> None:
        # Given
        with Dataset() as dataset:
            version_id = any_dataset_version_id()
            asset_id = f"DATASET#{dataset.dataset_id}#VERSION#{version_id}"
            head_object_mock.return_value = {"ETag": any_etag()}

            with ProcessingAsset(
                    asset_id=asset_id, multihash=None, url=any_s3_url(
                    )) as metadata_processing_asset, ProcessingAsset(
                        asset_id=asset_id,
                        multihash=any_hex_multihash(),
                        url=any_s3_url(),
                    ) as processing_asset, patch.object(
                        self.logger, "debug") as logger_mock, patch(
                            "backend.import_dataset.task.smart_open"):

                expected_asset_log = dumps(
                    {"Adding file to manifest": processing_asset.url})
                expected_metadata_log = dumps(
                    {"Adding file to manifest": metadata_processing_asset.url})

                # When
                lambda_handler(
                    {
                        DATASET_ID_KEY: dataset.dataset_id,
                        METADATA_URL_KEY: any_s3_url(),
                        VERSION_ID_KEY: version_id,
                    },
                    any_lambda_context(),
                )

                # Then
                with subtests.test():
                    logger_mock.assert_any_call(expected_asset_log)
                with subtests.test():
                    logger_mock.assert_any_call(expected_metadata_log)
コード例 #4
0
    def should_log_schema_validation_warning(
            self, validate_schema_mock: MagicMock) -> None:
        # Given

        error_message = "Some error message"
        validate_schema_mock.side_effect = ValidationError(error_message)
        expected_log = dumps({ERROR_KEY: error_message})

        with patch.object(self.logger, "warning") as logger_mock:
            # When
            lambda_handler(
                {
                    METADATA_URL_KEY: any_s3_url(),
                    VERSION_ID_KEY: any_dataset_version_id()
                },
                any_lambda_context(),
            )

            # Then
            logger_mock.assert_any_call(expected_log)
コード例 #5
0
def should_return_required_property_error_when_missing_version_id() -> None:
    # When

    response = lambda_handler(
        {
            DATASET_ID_KEY: any_dataset_id(),
            METADATA_URL_KEY: any_s3_url()
        }, any_lambda_context())

    assert response == {
        ERROR_MESSAGE_KEY: f"'{VERSION_ID_KEY}' is a required property"
    }
コード例 #6
0
    def should_log_payload(self, head_object_mock: MagicMock) -> None:
        # Given
        head_object_mock.return_value = {"ETag": any_etag()}

        with patch("backend.import_dataset.task.S3CONTROL_CLIENT.create_job"
                   ), Dataset() as dataset, patch.object(
                       self.logger, "debug") as logger_mock, patch(
                           "backend.import_dataset.task.validate"), patch(
                               "backend.import_dataset.task.smart_open"):
            event = {
                DATASET_ID_KEY: dataset.dataset_id,
                DATASET_PREFIX_KEY: dataset.dataset_prefix,
                METADATA_URL_KEY: any_s3_url(),
                VERSION_ID_KEY: any_dataset_version_id(),
            }
            expected_payload_log = dumps({EVENT_KEY: event})

            # When
            lambda_handler(event, any_lambda_context())

            # Then
            logger_mock.assert_any_call(expected_payload_log)
コード例 #7
0
def should_batch_copy_files_to_storage(
    s3_client: S3Client,
    s3_control_client: S3ControlClient,
    sts_client: STSClient,
    subtests: SubTests,
) -> None:
    # pylint: disable=too-many-locals
    # Given two metadata files with an asset each, all within a prefix
    original_prefix = any_safe_filename()

    root_asset_name = any_asset_name()
    root_asset_filename = any_safe_filename()
    root_asset_content = any_file_contents()
    root_asset_multihash = sha256_hex_digest_to_multihash(
        sha256(root_asset_content).hexdigest())
    child_asset_name = any_asset_name()
    child_asset_filename = any_safe_filename()
    child_asset_content = any_file_contents()
    child_asset_multihash = sha256_hex_digest_to_multihash(
        sha256(child_asset_content).hexdigest())

    root_metadata_filename = any_safe_filename()
    child_metadata_filename = any_safe_filename()

    with S3Object(
            BytesIO(initial_bytes=root_asset_content),
            ResourceName.STAGING_BUCKET_NAME.value,
            f"{original_prefix}/{root_asset_filename}",
    ) as root_asset_s3_object, S3Object(
            BytesIO(initial_bytes=child_asset_content),
            ResourceName.STAGING_BUCKET_NAME.value,
            f"{original_prefix}/{child_asset_filename}",
    ) as child_asset_s3_object, S3Object(
            BytesIO(initial_bytes=dumps({
                **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
                STAC_ASSETS_KEY: {
                    child_asset_name: {
                        STAC_HREF_KEY: child_asset_s3_object.url,
                        STAC_FILE_CHECKSUM_KEY: child_asset_multihash,
                    }
                },
            }).encode()),
            ResourceName.STAGING_BUCKET_NAME.value,
            f"{original_prefix}/{child_metadata_filename}",
    ) as child_metadata_s3_object, S3Object(
            BytesIO(initial_bytes=dumps({
                **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
                STAC_ASSETS_KEY: {
                    root_asset_name: {
                        STAC_HREF_KEY: root_asset_s3_object.url,
                        STAC_FILE_CHECKSUM_KEY: root_asset_multihash,
                    },
                },
                STAC_LINKS_KEY: [{
                    STAC_HREF_KEY: child_metadata_s3_object.url,
                    "rel": "child"
                }],
            }).encode()),
            ResourceName.STAGING_BUCKET_NAME.value,
            f"{original_prefix}/{root_metadata_filename}",
    ) as root_metadata_s3_object, Dataset() as dataset:
        version_id = any_dataset_version_id()
        asset_id = (f"{DATASET_ID_PREFIX}{dataset.dataset_id}"
                    f"{DB_KEY_SEPARATOR}{VERSION_ID_PREFIX}{version_id}")

        with ProcessingAsset(
                asset_id=asset_id,
                url=root_metadata_s3_object.url), ProcessingAsset(
                    asset_id=asset_id,
                    url=child_metadata_s3_object.url), ProcessingAsset(
                        asset_id=asset_id,
                        url=root_asset_s3_object.url,
                        multihash=root_asset_multihash), ProcessingAsset(
                            asset_id=asset_id,
                            url=child_asset_s3_object.url,
                            multihash=child_asset_multihash):
            # When
            try:
                response = lambda_handler(
                    {
                        DATASET_ID_KEY: dataset.dataset_id,
                        DATASET_PREFIX_KEY: dataset.dataset_prefix,
                        VERSION_ID_KEY: version_id,
                        METADATA_URL_KEY: root_metadata_s3_object.url,
                    },
                    any_lambda_context(),
                )

                account_id = sts_client.get_caller_identity()["Account"]

                metadata_copy_job_result, asset_copy_job_result = wait_for_copy_jobs(
                    response,
                    account_id,
                    s3_control_client,
                    subtests,
                )
            finally:
                # Then
                new_prefix = (
                    f"{dataset.title}{DATASET_KEY_SEPARATOR}{dataset.dataset_id}/{version_id}"
                )
                storage_bucket_prefix = f"{S3_URL_PREFIX}{ResourceName.STORAGE_BUCKET_NAME.value}/"

                new_root_metadata_key = f"{new_prefix}/{root_metadata_filename}"
                expected_root_metadata = dumps({
                    **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
                    STAC_ASSETS_KEY: {
                        root_asset_name: {
                            STAC_HREF_KEY: root_asset_filename,
                            STAC_FILE_CHECKSUM_KEY: root_asset_multihash,
                        },
                    },
                    STAC_LINKS_KEY: [{
                        STAC_HREF_KEY: child_metadata_filename,
                        "rel": "child"
                    }],
                }).encode()
                with subtests.test(msg="Root metadata content"), smart_open(
                        f"{storage_bucket_prefix}{new_root_metadata_key}"
                ) as new_root_metadata_file:
                    assert expected_root_metadata == new_root_metadata_file.read(
                    )

                with subtests.test(msg="Delete root metadata object"):
                    delete_s3_key(ResourceName.STORAGE_BUCKET_NAME.value,
                                  new_root_metadata_key, s3_client)

                new_child_metadata_key = f"{new_prefix}/{child_metadata_filename}"
                expected_child_metadata = dumps({
                    **deepcopy(MINIMAL_VALID_STAC_COLLECTION_OBJECT),
                    STAC_ASSETS_KEY: {
                        child_asset_name: {
                            STAC_HREF_KEY: child_asset_filename,
                            STAC_FILE_CHECKSUM_KEY: child_asset_multihash,
                        }
                    },
                }).encode()
                with subtests.test(msg="Child metadata content"), smart_open(
                        f"{storage_bucket_prefix}{new_child_metadata_key}"
                ) as new_child_metadata_file:
                    assert expected_child_metadata == new_child_metadata_file.read(
                    )

                with subtests.test(msg="Delete child metadata object"):
                    delete_s3_key(ResourceName.STORAGE_BUCKET_NAME.value,
                                  new_child_metadata_key, s3_client)

                # Then the root asset file is in the root prefix
                with subtests.test(msg="Delete root asset object"):
                    delete_s3_key(
                        ResourceName.STORAGE_BUCKET_NAME.value,
                        f"{new_prefix}/{root_asset_filename}",
                        s3_client,
                    )

                # Then the child asset file is in the root prefix
                with subtests.test(msg="Delete child asset object"):
                    delete_s3_key(
                        ResourceName.STORAGE_BUCKET_NAME.value,
                        f"{new_prefix}/{child_asset_filename}",
                        s3_client,
                    )

                # Cleanup
                delete_copy_job_files(
                    metadata_copy_job_result,
                    asset_copy_job_result,
                    ResourceName.STORAGE_BUCKET_NAME.value,
                    s3_client,
                    subtests,
                )