def test_s3_file_manager_read(): state = {"called": 0} bar_bytes = "bar".encode() class S3Mock(mock.MagicMock): def download_file(self, *_args, **kwargs): state["called"] += 1 assert state["called"] == 1 state["bucket"] = kwargs.get("Bucket") state["key"] = kwargs.get("Key") file_name = kwargs.get("Filename") state["file_name"] = file_name with open(file_name, "wb") as ff: ff.write(bar_bytes) s3_mock = S3Mock() file_manager = S3FileManager(s3_mock, "some-bucket", "some-key") file_handle = S3FileHandle("some-bucket", "some-key/kdjfkjdkfjkd") with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert state["bucket"] == file_handle.s3_bucket assert state["key"] == file_handle.s3_key # read again. cached with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert os.path.exists(state["file_name"]) file_manager.delete_local_temp() assert not os.path.exists(state["file_name"])
def test_s3_file_manager_read(): state = {'called': 0} bar_bytes = 'bar'.encode() class S3Mock(mock.MagicMock): def download_file(self, *_args, **kwargs): state['called'] += 1 assert state['called'] == 1 state['bucket'] = kwargs.get('Bucket') state['key'] = kwargs.get('Key') file_name = kwargs.get('Filename') state['file_name'] = file_name with open(file_name, 'wb') as ff: ff.write(bar_bytes) s3_mock = S3Mock() file_manager = S3FileManager(s3_mock, 'some-bucket', 'some-key') file_handle = S3FileHandle('some-bucket', 'some-key/kdjfkjdkfjkd') with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert state['bucket'] == file_handle.s3_bucket assert state['key'] == file_handle.s3_key # read again. cached with file_manager.read(file_handle) as file_obj: assert file_obj.read() == bar_bytes assert os.path.exists(state['file_name']) file_manager.delete_local_temp() assert not os.path.exists(state['file_name'])
def test_successful_file_handle_to_s3(mock_s3_bucket): foo_bytes = "foo".encode() remote_s3_object = mock_s3_bucket.Object("some-key/foo") remote_s3_object.put(Body=foo_bytes) file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo") result = execute_pipeline( create_file_handle_pipeline(file_handle), run_config={ "solids": { "file_handle_to_s3": {"config": {"Bucket": mock_s3_bucket.name, "Key": "some-key"}} }, "resources": {"file_manager": {"config": {"s3_bucket": mock_s3_bucket.name}}}, }, ) assert result.success assert mock_s3_bucket.Object(key="some-key").get()["Body"].read() == foo_bytes materializations = result.result_for_solid("file_handle_to_s3").materializations_during_compute assert len(materializations) == 1 assert len(materializations[0].metadata_entries) == 1 assert materializations[0].metadata_entries[ 0 ].entry_data.path == "s3://{bucket}/some-key".format(bucket=mock_s3_bucket.name) assert materializations[0].metadata_entries[0].label == "some-key"
def test_s3_file_manager_read(mock_s3_resource, mock_s3_bucket): body = "bar".encode() remote_s3_object = mock_s3_bucket.Object("some-key/foo") remote_s3_object.put(Body=body) file_manager = S3FileManager(mock_s3_resource.meta.client, mock_s3_bucket.name, "some-key") file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo") with file_manager.read(file_handle) as file_obj: assert file_obj.read() == body # read again. cached remote_s3_object.delete() with file_manager.read(file_handle) as file_obj: assert file_obj.read() == body
def test_successful_file_handle_to_s3(mock_s3_bucket): foo_bytes = b"foo" remote_s3_object = mock_s3_bucket.Object("some-key/foo") remote_s3_object.put(Body=foo_bytes) file_handle = S3FileHandle(mock_s3_bucket.name, "some-key/foo") result = create_file_handle_job(file_handle).execute_in_process( run_config={ "solids": { "file_handle_to_s3": { "config": { "Bucket": mock_s3_bucket.name, "Key": "some-key" } } }, "resources": { "file_manager": { "config": { "s3_bucket": mock_s3_bucket.name } } }, }, ) assert result.success assert mock_s3_bucket.Object( key="some-key").get()["Body"].read() == foo_bytes materializations = [ event.step_materialization_data.materialization for event in result.events_for_node("file_handle_to_s3") if event.event_type == DagsterEventType.ASSET_MATERIALIZATION ] assert len(materializations) == 1 assert len(materializations[0].metadata_entries) == 1 assert (materializations[0].metadata_entries[0].entry_data.path == f"s3://{mock_s3_bucket.name}/some-key") assert materializations[0].metadata_entries[0].label == "some-key"