예제 #1
0
    def execute(self, context: 'Context') -> str:
        glacier_hook = GlacierHook(aws_conn_id=self.aws_conn_id)
        gcs_hook = GCSHook(
            gcp_conn_id=self.gcp_conn_id,
            delegate_to=self.delegate_to,
            impersonation_chain=self.impersonation_chain,
        )
        job_id = glacier_hook.retrieve_inventory(vault_name=self.vault_name)

        with tempfile.NamedTemporaryFile() as temp_file:
            glacier_data = glacier_hook.retrieve_inventory_results(
                vault_name=self.vault_name, job_id=job_id["jobId"])
            # Read the file content in chunks using StreamingBody
            # https://botocore.amazonaws.com/v1/documentation/api/latest/reference/response.html
            stream = glacier_data["body"]
            for chunk in stream.iter_chunk(chunk_size=self.chunk_size):
                temp_file.write(chunk)
            temp_file.flush()
            gcs_hook.upload(
                bucket_name=self.bucket_name,
                object_name=self.object_name,
                filename=temp_file.name,
                gzip=self.gzip,
            )
        return f"gs://{self.bucket_name}/{self.object_name}"
예제 #2
0
class TestAmazonGlacierHook(unittest.TestCase):
    def setUp(self):
        with mock.patch(
                "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.__init__",
                return_value=None):
            self.hook = GlacierHook(aws_conn_id="aws_default")

    @mock.patch(
        "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.get_conn")
    def test_retrieve_inventory_should_return_job_id(self, mock_conn):
        # Given
        job_id = {"jobId": "1234abcd"}
        # when
        mock_conn.return_value.initiate_job.return_value = job_id
        result = self.hook.retrieve_inventory(VAULT_NAME)
        # then
        mock_conn.assert_called_once_with()
        assert job_id == result

    @mock.patch(
        "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.get_conn")
    def test_retrieve_inventory_should_log_mgs(self, mock_conn):
        # given
        job_id = {"jobId": "1234abcd"}
        # when
        with LogCapture() as log:
            mock_conn.return_value.initiate_job.return_value = job_id
            self.hook.retrieve_inventory(VAULT_NAME)
            # then
            log.check(
                (
                    'airflow.providers.amazon.aws.hooks.glacier.GlacierHook',
                    'INFO',
                    f"Retrieving inventory for vault: {VAULT_NAME}",
                ),
                (
                    'airflow.providers.amazon.aws.hooks.glacier.GlacierHook',
                    'INFO',
                    f"Initiated inventory-retrieval job for: {VAULT_NAME}",
                ),
                (
                    'airflow.providers.amazon.aws.hooks.glacier.GlacierHook',
                    'INFO',
                    f"Retrieval Job ID: {job_id.get('jobId')}",
                ),
            )

    @mock.patch(
        "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.get_conn")
    def test_retrieve_inventory_results_should_return_response(
            self, mock_conn):
        # when
        mock_conn.return_value.get_job_output.return_value = RESPONSE_BODY
        response = self.hook.retrieve_inventory_results(VAULT_NAME, JOB_ID)
        # then
        mock_conn.assert_called_once_with()
        assert response == RESPONSE_BODY

    @mock.patch(
        "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.get_conn")
    def test_retrieve_inventory_results_should_log_mgs(self, mock_conn):
        # when
        with LogCapture() as log:
            mock_conn.return_value.get_job_output.return_value = REQUEST_RESULT
            self.hook.retrieve_inventory_results(VAULT_NAME, JOB_ID)
            # then
            log.check((
                'airflow.providers.amazon.aws.hooks.glacier.GlacierHook',
                'INFO',
                f"Retrieving the job results for vault: {VAULT_NAME}...",
            ), )

    @mock.patch(
        "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.get_conn")
    def test_describe_job_should_return_status_succeeded(self, mock_conn):
        # when
        mock_conn.return_value.describe_job.return_value = JOB_STATUS
        response = self.hook.describe_job(VAULT_NAME, JOB_ID)
        # then
        mock_conn.assert_called_once_with()
        assert response == JOB_STATUS

    @mock.patch(
        "airflow.providers.amazon.aws.hooks.glacier.GlacierHook.get_conn")
    def test_describe_job_should_log_mgs(self, mock_conn):
        # when
        with LogCapture() as log:
            mock_conn.return_value.describe_job.return_value = JOB_STATUS
            self.hook.describe_job(VAULT_NAME, JOB_ID)
            # then
            log.check(
                (
                    'airflow.providers.amazon.aws.hooks.glacier.GlacierHook',
                    'INFO',
                    f"Retrieving status for vault: {VAULT_NAME} and job {JOB_ID}",
                ),
                (
                    'airflow.providers.amazon.aws.hooks.glacier.GlacierHook',
                    'INFO',
                    f"Job status: {JOB_STATUS.get('Action')}, code status: {JOB_STATUS.get('StatusCode')}",
                ),
            )