yield Output(df, output_name="treated_data") yield Output(error, output_name="error") @discord_message_on_failure @discord_message_on_success @redis_keepalive_on_failure @redis_keepalive_on_succes @pipeline( mode_defs=[ ModeDefinition( "dev", resource_defs={ "basedosdados_config": basedosdados_config, "timezone_config": timezone_config, "discord_webhook": discord_webhook, "keepalive_key": keepalive_key, }, ), ], tags={ "pipeline": "br_rj_riodejaneiro_stpl_gps_registros", "dagster-k8s/config": { "container_config": { "resources": { "requests": { "cpu": "250m", "memory": "250Mi" }, "limits": {
"weight", ] quantities = [cereal["cups"] for cereal in cereals] reweights = [1.0 / float(quantity) for quantity in quantities] normalized_cereals = deepcopy(cereals) for idx in range(len(normalized_cereals)): cereal = normalized_cereals[idx] for column in columns_to_normalize: cereal[column] = float(cereal[column]) * reweights[idx] context.resources.warehouse.update_normalized_cereals(normalized_cereals) @pipeline(mode_defs=[ ModeDefinition( resource_defs={"warehouse": local_sqlite_warehouse_resource}) ]) def resources_pipeline(): normalize_calories(read_csv()) if __name__ == "__main__": run_config = { "solids": { "read_csv": { "inputs": { "csv_path": { "value": "cereal.csv" } } }
from dagster.core.definitions.executor import default_executors from dagster.core.definitions.reconstructable import ReconstructablePipeline from dagster.core.events import DagsterEventType from dagster.core.test_utils import instance_for_test, nesting_composite_pipeline from dagster.utils import send_interrupt from dagster_dask import DataFrame, dask_executor from dask.distributed import Scheduler, Worker @solid def simple(_): return 1 @pipeline(mode_defs=[ ModeDefinition(executor_defs=default_executors + [dask_executor]) ]) def dask_engine_pipeline(): simple() def test_execute_on_dask_local(): with tempfile.TemporaryDirectory() as tempdir: with instance_for_test(temp_dir=tempdir) as instance: result = execute_pipeline( reconstructable(dask_engine_pipeline), run_config={ "intermediate_storage": { "filesystem": { "config": { "base_dir": tempdir
def get_context(self, solid_config=None, mode_def=None, run_config=None): """Get a dagstermill execution context for interactive exploration and development. Args: solid_config (Optional[Any]): If specified, this value will be made available on the context as its ``solid_config`` property. mode_def (Optional[:class:`dagster.ModeDefinition`]): If specified, defines the mode to use to construct the context. Specify this if you would like a context constructed with specific ``resource_defs`` or ``logger_defs``. By default, an ephemeral mode with a console logger will be constructed. run_config(Optional[dict]): The environment config dict with which to construct the context. Returns: :py:class:`~dagstermill.DagstermillExecutionContext` """ check.opt_inst_param(mode_def, "mode_def", ModeDefinition) run_config = check.opt_dict_param(run_config, "run_config", key_type=str) # If we are running non-interactively, and there is already a context reconstituted, return # that context rather than overwriting it. if self.context is not None and isinstance( self.context, DagstermillRuntimeExecutionContext ): return self.context if not mode_def: mode_def = ModeDefinition(logger_defs={"dagstermill": colored_console_logger}) run_config["loggers"] = {"dagstermill": {}} solid_def = SolidDefinition( name="this_solid", input_defs=[], compute_fn=lambda *args, **kwargs: None, output_defs=[], description="Ephemeral solid constructed by dagstermill.get_context()", required_resource_keys=mode_def.resource_key_set, ) pipeline_def = PipelineDefinition( [solid_def], mode_defs=[mode_def], name="ephemeral_dagstermill_pipeline" ) run_id = make_new_run_id() # construct stubbed PipelineRun for notebook exploration... # The actual pipeline run during pipeline execution will be serialized and reconstituted # in the `reconstitute_pipeline_context` call pipeline_run = PipelineRun( pipeline_name=pipeline_def.name, run_id=run_id, run_config=run_config, mode=mode_def.name, step_keys_to_execute=None, status=PipelineRunStatus.NOT_STARTED, tags=None, ) self.in_pipeline = False self.solid_def = solid_def self.pipeline = pipeline_def environment_config = EnvironmentConfig.build(pipeline_def, run_config, mode=mode_def.name) pipeline = InMemoryPipeline(pipeline_def) execution_plan = ExecutionPlan.build(pipeline, environment_config) with scoped_pipeline_context( execution_plan, pipeline, run_config, pipeline_run, DagsterInstance.ephemeral(), scoped_resources_builder_cm=self._setup_resources, ) as pipeline_context: self.context = DagstermillExecutionContext( pipeline_context=pipeline_context, pipeline_def=pipeline_def, solid_config=solid_config, resource_keys_to_init=get_required_resource_keys_to_init( execution_plan, pipeline_def, environment_config, pipeline_context.intermediate_storage_def, ), solid_name=solid_def.name, ) return self.context
from click.testing import CliRunner from dagster_celery import celery_executor from dagster_celery.cli import main from dagster import ModeDefinition, default_executors, execute_pipeline, pipeline, seven, solid from dagster.core.definitions.pointer import FileCodePointer from dagster.core.definitions.reconstructable import ReconstructablePipeline from dagster.core.instance import DagsterInstance BUILDKITE = os.getenv('BUILDKITE') skip_ci = pytest.mark.skipif( bool(BUILDKITE), reason='Tests hang forever on buildkite for reasons we don\'t currently understand', ) celery_mode_defs = [ModeDefinition(executor_defs=default_executors + [celery_executor])] @contextmanager def execute_pipeline_on_celery(pipeline_name): with seven.TemporaryDirectory() as tempdir: pipeline_def = ReconstructablePipeline(FileCodePointer(__file__, pipeline_name)) instance = DagsterInstance.local_temp(tempdir=tempdir) result = execute_pipeline( pipeline_def, environment_dict={ 'storage': {'filesystem': {'config': {'base_dir': tempdir}}}, 'execution': {'celery': {}}, }, instance=instance, )
def test_nb_solid(name, **kwargs): output_defs = kwargs.pop("output_defs", [OutputDefinition(is_required=False)]) return dagstermill.define_dagstermill_solid( name=name, notebook_path=nb_test_path(name), output_notebook="notebook", output_defs=output_defs, **kwargs, ) default_mode_defs = [ ModeDefinition(resource_defs={"file_manager": local_file_manager}) ] hello_world = test_nb_solid("hello_world", output_defs=[]) @pipeline(mode_defs=default_mode_defs) def hello_world_pipeline(): hello_world() hello_world_with_custom_tags_and_description = test_nb_solid( "hello_world", output_defs=[], tags={"foo": "bar"}, description="custom description")
load_data_to_database_from_spark, process_q2_data, process_sfo_weather_data, q2_sfo_outbound_flights, sfo_delays_by_destination, subsample_spark_dataset, tickets_with_destination, unzip_file, westbound_delays, ) test_mode = ModeDefinition( name='test', resources={ 'spark': spark_session_local, 'db_info': redshift_db_info_resource, 'tempfile': tempfile_resource, 's3': s3_resource, }, ) local_mode = ModeDefinition( name='local', resources={ 'spark': spark_session_local, 's3': s3_resource, 'db_info': postgres_db_info_resource, 'tempfile': tempfile_resource, }, )
CONFIG_FILE = """ solids: spark_solid: config: spark_home: /your/spark_home application_jar: "{path}" deploy_mode: "client" application_arguments: "--local-path /tmp/dagster/events/data --date 2019-01-01" master_url: "local[*]" spark_conf: spark: app: name: "test_app" """ MODE_DEF = ModeDefinition(resource_defs={"spark": spark_resource}) def test_jar_not_found(): spark_solid = create_spark_solid("spark_solid", main_class="something") # guid guaranteed to not exist run_config = yaml.safe_load(CONFIG_FILE.format(path=str(uuid.uuid4()))) result = execute_solid( spark_solid, run_config=run_config, raise_on_error=False, mode_def=MODE_DEF ) assert result.failure_data assert ( "does not exist. A valid jar must be built before running this op." in result.failure_data.error.cause.message )
from dagster import ModeDefinition, default_executors, fs_io_manager, pipeline, solid from dagster_dask import dask_executor @solid def hello_world(_): return "Hello, World!" @pipeline(mode_defs=[ ModeDefinition( resource_defs={"io_manager": fs_io_manager}, executor_defs=default_executors + [dask_executor], ) ]) def dask_pipeline(): return hello_world()
from dagster_pandas import DataFrame from test2_queries import COPY_INTO, CREATE_STAGE, CREATE_TABLE, TRANSFORMS, parse_s3_config # Typically we would get these from some other configuration source that we could create on a per-env basis access_key_id, secret_key = parse_s3_config('test_creds/boto.cfg') table_name = 'BMESICK.test_data_ingest_dagster' stage_name = table_name + '_stg' bucket_name = 'bmez-astronomer' # Modes allow you to configure substantial behavior based on environment # (use local disk instead of S3 for local runs, sqlite instead of Snowflake, etc) prod_mode = ModeDefinition( name='prod', resource_defs={ 's3': s3_resource, 'snowflake': snowflake_resource }, system_storage_defs=s3_plus_default_storage_defs, ) # Presets are a type of configuration where each file can overwrite values in the next, this is where per-environment # settings and, I guess, secrets go? preset_defs = PresetDefinition.from_files( name='prod', mode='prod', environment_files=[ file_relative_path(__file__, 'environments/shared.yaml'), ], )
@resource(config=Field(Int)) def multer_resource(init_context): return lambda x: x * init_context.resource_config @resource(config={'num_one': Field(Int), 'num_two': Field(Int)}) def double_adder_resource(init_context): return (lambda x: x + init_context.resource_config['num_one'] + init_context.resource_config['num_two']) @pipeline( mode_defs=[ ModeDefinition( name='add_mode', resource_defs={'op': adder_resource}, description='Mode that adds things', ), ModeDefinition( name='mult_mode', resource_defs={'op': multer_resource}, description='Mode that multiplies things', ), ModeDefinition( name='double_adder', resource_defs={'op': double_adder_resource}, description='Mode that adds two numbers to thing', ), ], preset_defs=[PresetDefinition.from_files("add", mode="add_mode")], )
def test_get_out_of_pipeline_context(): context = dagstermill.get_context(mode_def=ModeDefinition( resource_defs={'list': ResourceDefinition(lambda _: [])})) assert context.pipeline_def.name == 'ephemeral_dagstermill_pipeline' assert context.resources.list == []
"root": { "config": { "sleep_secs": [-10, 30] } } }, }, ), PresetDefinition( "sleep", { "execution": { "multiprocess": {} }, "solids": { "root": { "config": { "sleep_secs": [0, 10] } } }, }, ), ], mode_defs=[ModeDefinition(resource_defs={"io_manager": fs_io_manager})], ) def branch_pipeline(): out_1, out_2 = root() branch("branch_1", out_1, 3) branch("branch_2", out_2, 5)
def test_input_manager_with_retries(): _called = False _count = {"total": 0} @input_manager def should_succeed(_, _resource_config): if _count["total"] < 2: _count["total"] += 1 raise RetryRequested(max_retries=3) return "foo" @input_manager def should_retry(_, _resource_config): raise RetryRequested(max_retries=3) @input_manager def should_not_execute(_, _resource_config): _called = True @pipeline(mode_defs=[ ModeDefinition( resource_defs={ "should_succeed": should_succeed, "should_not_execute": should_not_execute, "should_retry": should_retry, }) ]) def simple(): @solid def source_solid(_): return "foo" @solid(input_defs=[ InputDefinition("solid_input", manager_key="should_succeed") ]) def take_input_1(_, solid_input): return solid_input @solid(input_defs=[ InputDefinition("solid_input", manager_key="should_retry") ]) def take_input_2(_, solid_input): return solid_input @solid(input_defs=[ InputDefinition("solid_input", manager_key="should_not_execute") ]) def take_input_3(_, solid_input): return solid_input take_input_3(take_input_2(take_input_1(source_solid()))) with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) step_stats = instance.get_run_step_stats(result.run_id) assert len(step_stats) == 3 step_stats_1 = instance.get_run_step_stats(result.run_id, step_keys=["take_input_1"]) assert len(step_stats_1) == 1 step_stat_1 = step_stats_1[0] assert step_stat_1.status.value == "SUCCESS" assert step_stat_1.attempts == 3 step_stats_2 = instance.get_run_step_stats(result.run_id, step_keys=["take_input_2"]) assert len(step_stats_2) == 1 step_stat_2 = step_stats_2[0] assert step_stat_2.status.value == "FAILURE" assert step_stat_2.attempts == 4 step_stats_3 = instance.get_run_step_stats(result.run_id, step_keys=["take_input_3"]) assert len(step_stats_3) == 0 assert _called == False
@resource def add_two_resource(_): def add_two(num): return num + 2 return add_two @solid(required_resource_keys={"adder"}) def solid_that_uses_adder_resource(context, number): return context.resources.adder(number) @pipeline(mode_defs=[ ModeDefinition(name="add_one", resource_defs={"adder": add_one_resource}), ModeDefinition(name="add_two", resource_defs={"adder": add_two_resource}), ]) def pipeline_with_mode(): solid_that_uses_adder_resource() _explode_pid = {"pid": None} # Will throw if the run worker pid tries to access the definition, but subprocesses (the step # workers) can access the definition class ExplodingTestPipeline(ReconstructablePipeline): def __new__( cls, repository,
"bar": int }, input_defs=[InputDefinition("people", DataFrame)], output_defs=[OutputDefinition(DataFrame)], required_resource_keys={"pyspark_step_launcher"}, ) def filter_df_solid(_, people): return people.filter(people["age"] < 30) MODE_DEFS = [ ModeDefinition( "prod", resource_defs={ "pyspark_step_launcher": emr_pyspark_step_launcher, "pyspark": pyspark_resource, "s3": s3_resource, }, intermediate_storage_defs=s3_plus_default_intermediate_storage_defs, ), ModeDefinition( "local", resource_defs={ "pyspark_step_launcher": no_step_launcher, "pyspark": pyspark_resource }, ), ] @pipeline(mode_defs=MODE_DEFS)
if context.last_completion_time: run_config["since"] = context.last_completion_time return RunRequest(run_key=None, run_config=run_config) return {"foo_sensor": foo_sensor} @solid(version="foo") def my_solid(): return 5 @pipeline( name="memoizable", mode_defs=[ ModeDefinition( resource_defs={"io_manager": versioned_filesystem_io_manager}) ], tags={MEMOIZED_RUN_TAG: "true"}, ) def memoizable_pipeline(): my_solid() @repository def bar(): return { "pipelines": { "foo": foo_pipeline, "baz": baz_pipeline, "partitioned_scheduled_pipeline": partitioned_scheduled_pipeline, "memoizable": memoizable_pipeline,
@resource(config_field=Field(Int, is_optional=True)) def a_resource(context): raise Exception("Bad Resource") resources = {'BadResource': a_resource} @solid(required_resource_keys={'BadResource'}) def one(_): return 1 @pipeline(mode_defs=[ModeDefinition(resource_defs=resources)]) def resource_error_pipeline(): one() if __name__ == '__main__': result = execute_pipeline( resource_error_pipeline, environment_dict={ 'storage': { 'filesystem': {} }, 'execution': { 'in_process': { 'config': { 'raise_on_error': False
def test_io_manager_config_inside_composite(): stored_dict = {} @io_manager(output_config_schema={"output_suffix": str}) def inner_manager(_): class MyHardcodedIOManager(IOManager): def handle_output(self, context, obj): keys = tuple(context.get_run_scoped_output_identifier() + [context.config["output_suffix"]]) stored_dict[keys] = obj def load_input(self, context): keys = tuple(context.upstream_output. get_run_scoped_output_identifier() + [context.upstream_output.config["output_suffix"]]) return stored_dict[keys] return MyHardcodedIOManager() @solid(output_defs=[OutputDefinition(io_manager_key="inner_manager")]) def my_solid(_): return "hello" @solid def my_solid_takes_input(_, x): assert x == "hello" return x @composite_solid def my_composite_solid(): return my_solid_takes_input(my_solid()) @pipeline(mode_defs=[ ModeDefinition(name="default", resource_defs={"inner_manager": inner_manager}) ]) def my_pipeline(): my_composite_solid() result = execute_pipeline( my_pipeline, run_config={ "solids": { "my_composite_solid": { "solids": { "my_solid": { "outputs": { "result": { "output_suffix": "my_suffix" } } } }, } } }, ) assert result.success assert result.output_for_solid("my_composite_solid.my_solid") == "hello" assert (stored_dict.get((result.run_id, "my_composite_solid.my_solid", "result", "my_suffix")) == "hello")
return number_df.join(string_df, number_df.id == string_df.id, 'inner').drop(string_df.id) def test_execute_typed_in_mem_lakehouse(): lakehouse = TypedPySparkMemLakehouse() pipeline_result = execute_spark_lakehouse_build( tables=[NumberTable, StringTable, JoinTable], lakehouse=lakehouse ) assert pipeline_result.success # Row ordering varies on 3.5 - compare as dicts assert ( lakehouse.collected_tables['JoinTable'][0].asDict() == Row(id=1, number=2, string='23').asDict() ) # for dagit typed_lakehouse_pipeline = construct_lakehouse_pipeline( name='typed_lakehouse_pipeline', lakehouse_tables=[NumberTable, StringTable, JoinTable], mode_defs=[ ModeDefinition( resource_defs={ 'lakehouse': typed_pyspark_mem_lakehouse, 'spark': spark_session_resource, } ) ], )
] quantities = [cereal['cups'] for cereal in cereals] reweights = [1.0 / float(quantity) for quantity in quantities] normalized_cereals = deepcopy(cereals) for idx in range(len(normalized_cereals)): cereal = normalized_cereals[idx] for column in columns_to_normalize: cereal[column] = float(cereal[column]) * reweights[idx] context.resources.warehouse.update_normalized_cereals(normalized_cereals) @pipeline(mode_defs=[ ModeDefinition( name='unittest', resource_defs={'warehouse': local_sqlite_warehouse_resource}, ), ModeDefinition( name='dev', resource_defs={'warehouse': sqlachemy_postgres_warehouse_resource}, ), ]) def modes_pipeline(): normalize_calories(read_csv()) if __name__ == '__main__': environment_dict = { 'solids': { 'read_csv': { 'inputs': {
# pylint: disable=no-value-for-parameter from dagster import execute_pipeline, ModeDefinition, solid, pipeline from dagster_slack import slack_resource @solid(required_resource_keys={'slack'}) def post_hello_message(context): context.resources.slack.chat.post_message( channel='#dagster', text='"Hello, World" from Dagster!') @pipeline(mode_defs=[ModeDefinition(resource_defs={'slack': slack_resource})]) def resources_pipeline(): post_hello_message() if __name__ == '__main__': execute_pipeline( resources_pipeline, environment_dict={ 'resources': { 'slack': { 'config': { 'token': 'xoxp-1234123412341234-12341234-1234' } } } }, )
output_defs = kwargs.pop("output_defs", [OutputDefinition(is_required=False)]) return dagstermill.define_dagstermill_op( name=name, notebook_path=path, output_notebook_name="notebook", output_defs=output_defs, **kwargs, ) default_mode_defs = [ ModeDefinition( resource_defs={ "output_notebook_io_manager": local_output_notebook_io_manager, "io_manager": fs_io_manager, }) ] hello_world = test_nb_solid("hello_world", output_defs=[]) @pipeline(mode_defs=default_mode_defs) def hello_world_pipeline(): hello_world() hello_world_op = test_nb_op( "hello_world_op", nb_test_path("hello_world"),
load_data_to_database_from_spark, process_sfo_weather_data, q2_sfo_outbound_flights, s3_to_df, s3_to_dw_table, sfo_delays_by_destination, tickets_with_destination, westbound_delays, ) test_mode = ModeDefinition( name='test', resource_defs={ 'spark': pyspark_resource, 'db_info': redshift_db_info_resource, 'tempfile': tempfile_resource, 's3': s3_resource, 'file_cache': fs_file_cache, }, system_storage_defs=s3_plus_default_storage_defs, ) local_mode = ModeDefinition( name='local', resource_defs={ 'spark': pyspark_resource, 's3': s3_resource, 'db_info': postgres_db_info_resource, 'tempfile': tempfile_resource, 'file_cache': fs_file_cache,
if context.solid_config["return_wrong_type"]: return string + string return int(string) @pipeline( description= ("Demo pipeline that enables configurable types of errors thrown during pipeline execution, " "including solid execution errors, type errors, and resource initialization errors." ), mode_defs=[ ModeDefinition( name="errorable_mode", resource_defs={ "errorable_resource": define_errorable_resource(), "io_manager": errorable_io_manager, }, ), ], preset_defs=[ PresetDefinition.from_pkg_resources( "passing", pkg_resource_defs=[("dagster_test.toys.environments", "error.yaml") ], mode="errorable_mode", ) ], tags={"monster": "error"}, ) def error_monster():
@lambda_solid(input_defs=[InputDefinition('word')]) def count_letters(word): counts = defaultdict(int) for letter in word: counts[letter] += 1 return dict(counts) @lambda_solid() def error_solid(): raise Exception('Unusual error') @pipeline(mode_defs=[ ModeDefinition(system_storage_defs=s3_plus_default_storage_defs, resource_defs={'s3': s3_resource}) ]) def demo_pipeline(): count_letters(multiply_the_word()) @pipeline(mode_defs=[ ModeDefinition( system_storage_defs=gcs_plus_default_storage_defs, resource_defs={'gcs': gcs_resource}, ) ]) def demo_pipeline_gcs(): count_letters(multiply_the_word())
process_sfo_weather_data, q2_sfo_outbound_flights, s3_to_df, s3_to_dw_table, sfo_delays_by_destination, tickets_with_destination, westbound_delays, ) test_mode = ModeDefinition( name="test", resource_defs={ "pyspark_step_launcher": no_step_launcher, "pyspark": pyspark_resource, "db_info": redshift_db_info_resource, "tempfile": tempfile_resource, "s3": s3_resource, "file_cache": fs_file_cache, "file_manager": local_file_manager, }, intermediate_storage_defs=s3_plus_default_intermediate_storage_defs, ) local_mode = ModeDefinition( name="local", resource_defs={ "pyspark_step_launcher": no_step_launcher, "pyspark": pyspark_resource, "s3": s3_resource, "db_info": postgres_db_info_resource, "tempfile": tempfile_resource,
"inner_shape_string": String }), "permissive_complex_shape": Permissive(fields={ "inner_shape_array": Array(str), "inner_shape_string": String }), "noneable_complex_shape": Noneable( Shape( fields={ "inner_noneable_shape_array": Array(str), "inner_noneable_shape_string": String, })), }, ) def test_solid(_): return 1 @pipeline( mode_defs=[ModeDefinition(resource_defs={"my_resource": my_resource})]) def test_pipeline(): test_solid() @repository def experimental_repository(): return [test_pipeline, metrics_pipeline, rollup_pipeline ] + define_schedules()
def test_output_manager_with_failure(): _called_input_manager = False _called_solid = False @output_manager def should_fail(_, _resource_config, _obj): raise Failure( description="Foolure", metadata_entries=[ EventMetadataEntry.text(label="label", text="text", description="description") ], ) @input_manager def should_not_enter(_): _called_input_manager = True @solid(output_defs=[OutputDefinition(manager_key="should_fail")]) def emit_str(_): return "emit" @solid(input_defs=[ InputDefinition(name="_input_str", dagster_type=str, manager_key="should_not_enter") ]) def should_not_call(_, _input_str): _called_solid = True @pipeline(mode_defs=[ ModeDefinition(resource_defs={ "should_fail": should_fail, "should_not_enter": should_not_enter }) ]) def simple(): should_not_call(emit_str()) with seven.TemporaryDirectory() as tmpdir_path: instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path)) result = execute_pipeline(simple, instance=instance, raise_on_error=False) assert not result.success failure_data = result.result_for_solid("emit_str").failure_data assert failure_data.error.cls_name == "Failure" assert failure_data.user_failure_data.description == "Foolure" assert failure_data.user_failure_data.metadata_entries[ 0].label == "label" assert failure_data.user_failure_data.metadata_entries[ 0].entry_data.text == "text" assert failure_data.user_failure_data.metadata_entries[ 0].description == "description" assert not _called_input_manager and not _called_solid
def test_depends_on_adls2_resource_file_manager(storage_account, file_system): bar_bytes = b"bar" @solid(output_defs=[OutputDefinition(ADLS2FileHandle)], required_resource_keys={"file_manager"}) def emit_file(context): return context.resources.file_manager.write_data(bar_bytes) @solid( input_defs=[InputDefinition("file_handle", ADLS2FileHandle)], required_resource_keys={"file_manager"}, ) def accept_file(context, file_handle): local_path = context.resources.file_manager.copy_handle_to_local_temp( file_handle) assert isinstance(local_path, str) assert open(local_path, "rb").read() == bar_bytes adls2_fake_resource = FakeADLS2Resource(storage_account) adls2_fake_file_manager = ADLS2FileManager( adls2_client=adls2_fake_resource.adls2_client, file_system=file_system, prefix="some-prefix", ) @pipeline(mode_defs=[ ModeDefinition(resource_defs={ "adls2": ResourceDefinition.hardcoded_resource(adls2_fake_resource), "file_manager": ResourceDefinition.hardcoded_resource(adls2_fake_file_manager), }, ) ]) def adls2_file_manager_test(): accept_file(emit_file()) result = execute_pipeline( adls2_file_manager_test, run_config={ "resources": { "file_manager": { "config": { "adls2_file_system": file_system } } } }, ) assert result.success keys_in_bucket = set( adls2_fake_resource.adls2_client.file_systems[file_system].keys()) assert len(keys_in_bucket) == 1 file_key = list(keys_in_bucket)[0] comps = file_key.split("/") assert "/".join(comps[:-1]) == "some-prefix" assert uuid.UUID(comps[-1])