def test_slack_on_success(): @solid def passing_solid(_): pass @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "slack": ResourceDefinition.hardcoded_resource(MagicMock()), "base_url": ResourceDefinition.hardcoded_resource("foo"), }) ]) def basic_pipeline(): passing_solid.with_hooks(hook_defs={slack_on_success})() result = execute_pipeline(basic_pipeline) assert result.success assert not any([ event.event_type == DagsterEventType.HOOK_ERRORED for event in result.event_list ]) assert any([ event.event_type == DagsterEventType.HOOK_COMPLETED for event in result.event_list ])
def test_filter_out_resources(): @solid(required_resource_keys={"a"}) def requires_resource_a(context): assert context.resources.a assert not hasattr(context.resources, "b") @solid(required_resource_keys={"b"}) def requires_resource_b(context): assert not hasattr(context.resources, "a") assert context.resources.b @solid def not_resources(context): assert not hasattr(context.resources, "a") assert not hasattr(context.resources, "b") @pipeline( mode_defs=[ ModeDefinition( resource_defs={ "a": ResourceDefinition.hardcoded_resource("foo"), "b": ResourceDefinition.hardcoded_resource("bar"), }) ], ) def room_of_requirement(): requires_resource_a() requires_resource_b() not_resources() execute_pipeline(room_of_requirement)
def test_filter_out_resources(): @solid(required_resource_keys={'a'}) def requires_resource_a(context): assert context.resources.a assert not hasattr(context.resources, 'b') @solid(required_resource_keys={'b'}) def requires_resource_b(context): assert not hasattr(context.resources, 'a') assert context.resources.b @solid def not_resources(context): assert not hasattr(context.resources, 'a') assert not hasattr(context.resources, 'b') @pipeline( mode_defs=[ ModeDefinition( resource_defs={ 'a': ResourceDefinition.hardcoded_resource('foo'), 'b': ResourceDefinition.hardcoded_resource('bar'), }) ], ) def room_of_requirement(): requires_resource_a() requires_resource_b() not_resources() execute_pipeline(room_of_requirement)
def test_cache_file_from_s3_overwrite(): with get_temp_dir() as temp_dir: s3_session_one = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session_one)), } ), environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}} } }, 'resources': { 'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}} }, }, ) # assert the download occured assert s3_session_one.download_file.call_count == 1 s3_session_two = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(s3_session_two), } ), environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}} } }, 'resources': { 'file_cache': {'config': {'target_folder': temp_dir, 'overwrite': True}} }, }, ) # assert the download did not occur because file is already there assert s3_session_two.download_file.call_count == 0
def test_cache_file_from_s3_overwrite(): with tempfile.TemporaryDirectory() as temp_dir: s3_session_one = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session_one), } ), run_config={ "solids": { "cache_file_from_s3": { "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}} } }, "resources": { "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}} }, }, ) # assert the download occurred assert s3_session_one.download_file.call_count == 1 s3_session_two = mock.MagicMock() execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session_two), } ), run_config={ "solids": { "cache_file_from_s3": { "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}} } }, "resources": { "file_cache": {"config": {"target_folder": temp_dir, "overwrite": True}} }, }, ) # assert the download did not occur because file is already there assert s3_session_two.download_file.call_count == 0
def test_unzip_file_handle_on_fake_s3(): foo_bytes = b"foo" @solid(required_resource_keys={"file_manager"}, output_defs=[OutputDefinition(S3FileHandle)]) def write_zipped_file_to_s3_store(context): with get_temp_file_name() as zip_file_name: write_zip_file_to_disk(zip_file_name, "an_archive_member", foo_bytes) with open(zip_file_name, "rb") as ff: s3_file_handle = context.resources.file_manager.write_data(ff.read()) return s3_file_handle # Uses mock S3 # https://github.com/spulec/moto/issues/3292 s3 = boto3.client("s3", region_name="us-east-1") s3.create_bucket(Bucket="some-bucket") file_manager = S3FileManager(s3_session=s3, s3_bucket="some-bucket", s3_base_key="dagster") @pipeline( mode_defs=[ ModeDefinition( resource_defs={ "s3": ResourceDefinition.hardcoded_resource(s3), "file_manager": ResourceDefinition.hardcoded_resource(file_manager), "io_manager": s3_pickle_io_manager, }, ) ] ) def do_test_unzip_file_handle_s3(): return unzip_file_handle(write_zipped_file_to_s3_store()) result = execute_pipeline( do_test_unzip_file_handle_s3, run_config={ "resources": {"io_manager": {"config": {"s3_bucket": "some-bucket"}}}, "solids": { "unzip_file_handle": {"inputs": {"archive_member": {"value": "an_archive_member"}}} }, }, ) assert result.success zipped_s3_file = result.result_for_solid("write_zipped_file_to_s3_store").output_value() unzipped_s3_file = result.result_for_solid("unzip_file_handle").output_value() bucket_keys = [obj["Key"] for obj in s3.list_objects(Bucket="some-bucket")["Contents"]] assert zipped_s3_file.s3_key in bucket_keys assert unzipped_s3_file.s3_key in bucket_keys
def test_check_data_ingest_job_retries_on_5xx(self): data_repo = Mock(spec=RepositoryApi) api_responses = [ApiException(status=502), {'failedFiles': 0}] data_repo.retrieve_job_result = Mock(side_effect=api_responses) mode_def = ModeDefinition( name='test', resource_defs={ "data_repo_client": ResourceDefinition.hardcoded_resource(data_repo) }) result: SolidExecutionResult = execute_solid( base_check_data_ingest_job_result, mode_def=mode_def, input_values={'job_id': JobId('fake_job_id')}, run_config={ 'solids': { 'base_check_data_ingest_job_result': { 'config': { 'max_wait_time_seconds': 3, 'poll_interval_seconds': 1 } } } }) self.assertTrue(result.success, "Poll ingest should not raise after a single 5xx")
def test_depends_on_s3_resource_intermediates(): @solid( input_defs=[ InputDefinition('num_one', Int), InputDefinition('num_two', Int) ], output_defs=[OutputDefinition(Int)], ) def add_numbers(_, num_one, num_two): return num_one + num_two # Uses mock S3 s3 = boto3.client('s3') s3.create_bucket(Bucket='some-bucket') @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=s3_plus_default_storage_defs, resource_defs={'s3': ResourceDefinition.hardcoded_resource(s3)}, ) ]) def s3_internal_pipeline(): return add_numbers() result = execute_pipeline( s3_internal_pipeline, environment_dict={ 'solids': { 'add_numbers': { 'inputs': { 'num_one': { 'value': 2 }, 'num_two': { 'value': 4 } } } }, 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } }, }, ) keys_in_bucket = [ obj['Key'] for obj in s3.list_objects(Bucket='some-bucket')['Contents'] ] assert result.success assert result.result_for_solid('add_numbers').output_value() == 6 keys = set() for step_key, output_name in [('add_numbers.compute', 'result')]: keys.add(create_s3_key(result.run_id, step_key, output_name)) assert set(keys_in_bucket) == keys
def test_depends_on_s3_resource_file_manager(): bar_bytes = 'bar'.encode() @solid(output_defs=[OutputDefinition(S3FileHandle)]) def emit_file(context): return context.file_manager.write_data(bar_bytes) @solid(input_defs=[InputDefinition('file_handle', S3FileHandle)]) def accept_file(context, file_handle): local_path = context.file_manager.copy_handle_to_local_temp( file_handle) assert isinstance(local_path, str) assert open(local_path, 'rb').read() == bar_bytes s3_fake_resource = create_s3_fake_resource() @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=s3_plus_default_storage_defs, resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3_fake_resource) }, ) ]) def s3_file_manager_test(): accept_file(emit_file()) result = execute_pipeline( s3_file_manager_test, environment_dict={ 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } } }, ) assert result.success keys_in_bucket = set(s3_fake_resource.buckets['some-bucket'].keys()) for step_key, output_name in [ ('emit_file.compute', 'result'), ('accept_file.compute', 'result'), ]: keys_in_bucket.remove( create_s3_key(result.run_id, step_key, output_name)) assert len(keys_in_bucket) == 1 file_key = list(keys_in_bucket)[0] comps = file_key.split('/') assert '/'.join(comps[:-1]) == 'dagster/storage/{run_id}/files'.format( run_id=result.run_id) assert uuid.UUID(comps[-1])
def test_cache_file_from_s3_specify_target_key(): s3_session = mock.MagicMock() with tempfile.TemporaryDirectory() as temp_dir: solid_result = execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session), } ), run_config={ "solids": { "cache_file_from_s3": { "inputs": {"s3_coordinate": {"bucket": "some-bucket", "key": "some-key"}}, "config": {"file_key": "specified-file-key"}, } }, "resources": {"file_cache": {"config": {"target_folder": temp_dir}}}, }, ) # assert the download occurred assert s3_session.download_file.call_count == 1 assert solid_result.success assert isinstance(solid_result.output_value(), LocalFileHandle) assert "specified-file-key" in solid_result.output_value().path_desc
def test_runtime_metadata_fn(): manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) def runtime_metadata_fn(context, node_info): return { "op_name": context.solid_def.name, "dbt_model": node_info["name"] } assets = load_assets_from_dbt_manifest( manifest_json=manifest_json, runtime_metadata_fn=runtime_metadata_fn) assert_assets_match_project(assets) dbt = MagicMock() assets_job = build_assets_job( "assets_job", assets, resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)}) result = assets_job.execute_in_process() assert result.success for asset in assets: materializations = [ event.event_specific_data.materialization for event in result.events_for_node(asset.op.name) if event.event_type_value == "ASSET_MATERIALIZATION" ] assert len(materializations) == 1 assert materializations[0].metadata_entries == [ EventMetadataEntry.text(asset.op.name, label="op_name"), EventMetadataEntry.text(asset.op.name, label="dbt_model"), ]
def test_hardcoded_resource(): called = {} mock_obj = seven.mock.MagicMock() @solid(required_resource_keys={"hardcoded"}) def solid_hardcoded(context): assert context.resources.hardcoded("called") called["yup"] = True pipeline = PipelineDefinition( name="hardcoded_resource", solid_defs=[solid_hardcoded], mode_defs=[ ModeDefinition( resource_defs={"hardcoded": ResourceDefinition.hardcoded_resource(mock_obj)} ) ], ) result = execute_pipeline(pipeline) assert result.success assert called["yup"] mock_obj.assert_called_with("called")
def test_depends_on_s3_resource_intermediates(): @solid( input_defs=[ InputDefinition("num_one", Int), InputDefinition("num_two", Int) ], output_defs=[OutputDefinition(Int)], ) def add_numbers(_, num_one, num_two): return num_one + num_two # Uses mock S3 s3 = boto3.client("s3") s3.create_bucket(Bucket="some-bucket") @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=s3_plus_default_storage_defs, resource_defs={"s3": ResourceDefinition.hardcoded_resource(s3)}, ) ]) def s3_internal_pipeline(): return add_numbers() result = execute_pipeline( s3_internal_pipeline, run_config={ "solids": { "add_numbers": { "inputs": { "num_one": { "value": 2 }, "num_two": { "value": 4 } } } }, "storage": { "s3": { "config": { "s3_bucket": "some-bucket" } } }, }, ) keys_in_bucket = [ obj["Key"] for obj in s3.list_objects(Bucket="some-bucket")["Contents"] ] assert result.success assert result.result_for_solid("add_numbers").output_value() == 6 keys = set() for step_key, output_name in [("add_numbers.compute", "result")]: keys.add(create_s3_key(result.run_id, step_key, output_name)) assert set(keys_in_bucket) == keys
def test_cache_file_from_s3_specify_target_key(): s3_session = mock.MagicMock() with get_temp_dir() as temp_dir: solid_result = execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)), } ), environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': {'s3_coordinate': {'bucket': 'some-bucket', 'key': 'some-key'}}, 'config': {'file_key': 'specified-file-key'}, } }, 'resources': {'file_cache': {'config': {'target_folder': temp_dir}}}, }, ) # assert the download occured assert s3_session.download_file.call_count == 1 assert solid_result.success assert isinstance(solid_result.output_value(), LocalFileHandle) assert 'specified-file-key' in solid_result.output_value().path_desc
def test_asset_io_manager(gcs_bucket): @asset def upstream(): return 2 @asset def downstream(upstream): return 1 + upstream @asset(partitions_def=StaticPartitionsDefinition(["apple", "orange"])) def partitioned(): return 8 fake_gcs_client = FakeGCSClient() asset_group = AssetGroup( [upstream, downstream, partitioned], resource_defs={ "io_manager": gcs_pickle_asset_io_manager.configured({ "gcs_bucket": gcs_bucket, "gcs_prefix": "assets" }), "gcs": ResourceDefinition.hardcoded_resource(fake_gcs_client), }, ) asset_job = asset_group.build_job(name="my_asset_job") result = asset_job.execute_in_process(partition_key="apple") assert result.success assert fake_gcs_client.get_all_blob_paths() == { f"{gcs_bucket}/assets/upstream", f"{gcs_bucket}/assets/downstream", f"{gcs_bucket}/assets/partitioned/apple", }
def test_check_has_data_false(): # need a bucket with a blob that has size 0, aka no content/empty content this_test_bucket = FakeGoogleBucket( {"gs://my-fake-bucket/fake-prefix": HexBlobInfo(hex_md5="b2d6ec45472467c836f253bd170182c7", content="")} ) this_test_mode = ModeDefinition( "test_check_has_data_mode", resource_defs={**load_table_test_mode.resource_defs} ) this_test_mode.resource_defs["gcs"] = ResourceDefinition.hardcoded_resource( FakeGCSClient( buckets={test_bucket_name: this_test_bucket} ) ) result: SolidExecutionResult = execute_solid( check_has_data, mode_def=this_test_mode, input_values={ "metadata_fanout_result": metadata_fanout_result }, run_config=run_config ) assert result.success assert not result.output_value("no_data")
def test_execute_byfeature_parquet_lakehouse(): with get_temp_dir() as temp_dir: lakehouse = ByFeatureParquetLakehouse(temp_dir) pipeline_def = construct_lakehouse_pipeline( name='test', lakehouse_tables=[TableOne, TableTwo, TableThree], mode_defs=[ ModeDefinition( resource_defs={ 'spark': spark_session_resource, 'lakehouse': ResourceDefinition.hardcoded_resource(lakehouse), }) ], ) pipeline_result = execute_pipeline(pipeline_def) assert pipeline_result.success def get_table(table_def): spark = spark_session_from_config() return spark.read.parquet( os.path.join(temp_dir, table_def.metadata[FEATURE_AREA], table_def.name)).collect() assert get_table(TableOne) == [Row(num=1)] assert get_table(TableTwo) == [Row(num=2)] assert set(get_table(TableThree)) == set([Row(num=1), Row(num=2)])
def test_source_asset(): @asset def asset1(source1): assert source1 == 5 return 1 class MyIOManager(IOManager): def handle_output(self, context, obj): pass def load_input(self, context): assert context.resource_config["a"] == 7 assert context.resources.subresource == 9 assert context.upstream_output.resources.subresource == 9 return 5 @io_manager(config_schema={"a": int}, required_resource_keys={"subresource"}) def my_io_manager(_): return MyIOManager() job = build_assets_job( "a", [asset1], source_assets=[ SourceAsset(AssetKey("source1"), io_manager_key="special_io_manager") ], resource_defs={ "special_io_manager": my_io_manager.configured({"a": 7}), "subresource": ResourceDefinition.hardcoded_resource(9), }, ) assert job.graph.node_defs == [asset1.op] assert job.execute_in_process().success
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url='url', jdbc_url='url', dialect='dialect', load_table=mock.MagicMock(), host='host', db_name='db_name', ) @solid def emit_mock(_): return mock.MagicMock(spec=DataFrame) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ 'db_info': ResourceDefinition.hardcoded_resource(db_info_mock), 'spark': ResourceDefinition.hardcoded_resource( mock.MagicMock()), }) ]) def load_df_test(): load_data_to_database_from_spark(emit_mock()) solid_result = execute_pipeline( load_df_test, environment_dict={ 'solids': { 'load_data_to_database_from_spark': { 'config': { 'table_name': 'foo' } } } }, ).result_for_solid('load_data_to_database_from_spark') assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries['Host'].entry_data.text == 'host' assert entries['Db'].entry_data.text == 'db_name'
def test_depends_on_s3_resource_intermediates(): @solid( input_defs=[ InputDefinition('num_one', Int), InputDefinition('num_two', Int) ], output_defs=[OutputDefinition(Int)], ) def add_numbers(_, num_one, num_two): return num_one + num_two s3_fake_resource = create_s3_fake_resource() @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=s3_plus_default_storage_defs, resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3_fake_resource) }, ) ]) def s3_internal_pipeline(): return add_numbers() result = execute_pipeline( s3_internal_pipeline, environment_dict={ 'solids': { 'add_numbers': { 'inputs': { 'num_one': { 'value': 2 }, 'num_two': { 'value': 4 } } } }, 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } }, }, ) assert result.success assert result.result_for_solid('add_numbers').output_value() == 6 assert 'some-bucket' in s3_fake_resource.session.buckets keys = set() for step_key, output_name in [('add_numbers.compute', 'result')]: keys.add(create_s3_key(result.run_id, step_key, output_name)) assert set(s3_fake_resource.session.buckets['some-bucket'].keys()) == keys
def test_unzip_file_handle_on_fake_s3(): foo_bytes = 'foo'.encode() @solid(output_defs=[OutputDefinition(S3FileHandle)]) def write_zipped_file_to_s3_store(context): with get_temp_file_name() as zip_file_name: write_zip_file_to_disk(zip_file_name, 'an_archive_member', foo_bytes) with open(zip_file_name, 'rb') as ff: s3_file_handle = context.file_manager.write_data(ff.read()) return s3_file_handle # Uses mock S3 s3 = boto3.client('s3') s3.create_bucket(Bucket='some-bucket') @pipeline(mode_defs=[ ModeDefinition( resource_defs={'s3': ResourceDefinition.hardcoded_resource(s3)}, system_storage_defs=[s3_system_storage], ) ]) def do_test_unzip_file_handle_s3(): return unzip_file_handle(write_zipped_file_to_s3_store()) result = execute_pipeline( do_test_unzip_file_handle_s3, environment_dict={ 'storage': { 's3': { 'config': { 's3_bucket': 'some-bucket' } } }, 'solids': { 'unzip_file_handle': { 'inputs': { 'archive_member': { 'value': 'an_archive_member' } } } }, }, ) assert result.success zipped_s3_file = result.result_for_solid( 'write_zipped_file_to_s3_store').output_value() unzipped_s3_file = result.result_for_solid( 'unzip_file_handle').output_value() bucket_keys = [ obj['Key'] for obj in s3.list_objects(Bucket='some-bucket')['Contents'] ] assert zipped_s3_file.s3_key in bucket_keys assert unzipped_s3_file.s3_key in bucket_keys
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url="url", jdbc_url="url", dialect="dialect", load_table=mock.MagicMock(), host="host", db_name="db_name", ) @solid( required_resource_keys={"pyspark"}, output_defs=[OutputDefinition(io_manager_key="pyspark_io_manager")], ) def emit_mock(context): return context.resources.pyspark.spark_session.read.csv( file_relative_path(__file__, "../data/test.csv") ) @pipeline( mode_defs=[ ModeDefinition( resource_defs={ "db_info": ResourceDefinition.hardcoded_resource(db_info_mock), "pyspark": pyspark_resource, "pyspark_step_launcher": no_step_launcher, "pyspark_io_manager": local_parquet_io_manager, "io_manager": fs_io_manager, } ) ] ) def load_df_test(): load_data_to_database_from_spark(emit_mock()) with tempfile.TemporaryDirectory() as temp_dir: solid_result = execute_pipeline( load_df_test, run_config={ "solids": {"load_data_to_database_from_spark": {"config": {"table_name": "foo"}}}, "resources": { "io_manager": {"config": {"base_dir": temp_dir}}, "pyspark_io_manager": {"config": {"base_dir": temp_dir}}, }, }, ).result_for_solid("load_data_to_database_from_spark") assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries["Host"].entry_data.text == "host" assert entries["Db"].entry_data.text == "db_name"
def test_hook_resource(): slack_mock = mock.MagicMock() @job( resource_defs={"slack": ResourceDefinition.hardcoded_resource(slack_mock)}, ) def foo(): a.with_hooks({slack_message_on_success, slack_message_on_failure})() foo.execute_in_process() assert slack_mock.chat.post_message.call_count == 1
def test_cache_file_from_s3_basic(): s3_session = mock.MagicMock() with get_temp_dir() as temp_dir: pipeline_result = execute_solid_with_resources( cache_file_from_s3, resources={ 'file_cache': fs_file_cache, 's3': ResourceDefinition.hardcoded_resource(S3Resource(s3_session)), }, environment_dict={ 'solids': { 'cache_file_from_s3': { 'inputs': { 'bucket_data': { 'bucket': 'some-bucket', 'key': 'some-key' } } } }, 'resources': { 'file_cache': { 'config': { 'target_folder': temp_dir } } }, }, ) # assert the download occured assert s3_session.download_file.call_count == 1 assert pipeline_result.success solid_result = pipeline_result.result_for_solid('cache_file_from_s3') assert solid_result.success expectation_results = solid_result.expectation_results_during_compute assert len(expectation_results) == 1 expectation_result = expectation_results[0] assert expectation_result.success assert expectation_result.label == 'file_handle_exists' path_in_metadata = expectation_result.metadata_entries[ 0].entry_data.path assert isinstance(path_in_metadata, str) assert os.path.exists(path_in_metadata) assert isinstance(solid_result.result_value(), LocalFileHandle) assert 'some-key' in solid_result.result_value().path_desc
def test_load_from_manifest_json(): manifest_path = file_relative_path(__file__, "sample_manifest.json") with open(manifest_path, "r") as f: manifest_json = json.load(f) assets = load_assets_from_dbt_manifest(manifest_json=manifest_json) assert_assets_match_project(assets) dbt = MagicMock() assets_job = build_assets_job( "assets_job", assets, resource_defs={"dbt": ResourceDefinition.hardcoded_resource(dbt)}) assert assets_job.execute_in_process().success
def create_file_handle_pipeline(temp_file_handle, s3_resource): @solid def emit_temp_handle(_): return temp_file_handle @pipeline(mode_defs=[ ModeDefinition(resource_defs={ "s3": ResourceDefinition.hardcoded_resource(s3_resource) }) ]) def test(): return file_handle_to_s3(emit_temp_handle()) return test
def test_cache_file_from_s3_basic(): s3_session = mock.MagicMock() with get_temp_dir() as temp_dir: solid_result = execute_solid( cache_file_from_s3, ModeDefinition( resource_defs={ "file_cache": fs_file_cache, "s3": ResourceDefinition.hardcoded_resource(s3_session), }), run_config={ "solids": { "cache_file_from_s3": { "inputs": { "s3_coordinate": { "bucket": "some-bucket", "key": "some-key" } } } }, "resources": { "file_cache": { "config": { "target_folder": temp_dir } } }, }, ) # assert the download occurred assert s3_session.download_file.call_count == 1 assert solid_result.success expectation_results = solid_result.expectation_results_during_compute assert len(expectation_results) == 1 expectation_result = expectation_results[0] assert expectation_result.success assert expectation_result.label == "file_handle_exists" path_in_metadata = expectation_result.metadata_entries[ 0].entry_data.path assert isinstance(path_in_metadata, str) assert os.path.exists(path_in_metadata) assert isinstance(solid_result.output_value(), LocalFileHandle) assert "some-key" in solid_result.output_value().path_desc
def test_hook_resource(): slack_mock = mock.MagicMock() @pipeline(mode_defs=[ ModeDefinition( "unittest", resource_defs={ "slack": ResourceDefinition.hardcoded_resource(slack_mock) }, ), ]) def foo(): a.with_hooks({slack_on_success, slack_on_failure})() execute_pipeline(foo) assert slack_mock.chat.post_message.call_count == 1
def create_file_handle_pipeline(temp_file_handle, s3_resource): # pylint: disable=no-value-for-parameter @solid def emit_temp_handle(_): return temp_file_handle @pipeline(mode_defs=[ ModeDefinition(resource_defs={ 's3': ResourceDefinition.hardcoded_resource(s3_resource) }) ]) def test(): return file_handle_to_s3(emit_temp_handle()) return test
def test_airline_demo_load_df(): db_info_mock = DbInfo( engine=mock.MagicMock(), url="url", jdbc_url="url", dialect="dialect", load_table=mock.MagicMock(), host="host", db_name="db_name", ) @solid def emit_mock(_): return mock.MagicMock(spec=DataFrame) @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "db_info": ResourceDefinition.hardcoded_resource(db_info_mock), "pyspark": pyspark_resource, "pyspark_step_launcher": no_step_launcher, }) ]) def load_df_test(): load_data_to_database_from_spark(emit_mock()) solid_result = execute_pipeline( load_df_test, run_config={ "solids": { "load_data_to_database_from_spark": { "config": { "table_name": "foo" } } } }, ).result_for_solid("load_data_to_database_from_spark") assert solid_result.success mats = solid_result.materializations_during_compute assert len(mats) == 1 mat = mats[0] assert len(mat.metadata_entries) == 2 entries = {me.label: me for me in mat.metadata_entries} assert entries["Host"].entry_data.text == "host" assert entries["Db"].entry_data.text == "db_name"