def define_airline_demo_ingest_pipeline(): solids = [ canonicalize_column_names, download_from_s3_to_bytes, ingest_csv_to_spark, load_data_to_database_from_spark, process_q2_data, process_sfo_weather_data, subsample_spark_dataset, unzip_file, ] dependencies = { SolidInstance('download_from_s3_to_bytes', alias='download_april_on_time_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_may_on_time_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_june_on_time_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_master_cord_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_q2_coupon_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_q2_market_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_q2_ticket_data'): {}, SolidInstance('download_from_s3_to_bytes', alias='download_q2_sfo_weather'): {}, SolidInstance('unzip_file', alias='unzip_april_on_time_data'): { 'archive_file': DependencyDefinition('download_april_on_time_data') }, SolidInstance('unzip_file', alias='unzip_may_on_time_data'): { 'archive_file': DependencyDefinition('download_may_on_time_data') }, SolidInstance('unzip_file', alias='unzip_june_on_time_data'): { 'archive_file': DependencyDefinition('download_june_on_time_data') }, SolidInstance('unzip_file', alias='unzip_master_cord_data'): { 'archive_file': DependencyDefinition('download_master_cord_data') }, SolidInstance('unzip_file', alias='unzip_q2_coupon_data'): { 'archive_file': DependencyDefinition('download_q2_coupon_data') }, SolidInstance('unzip_file', alias='unzip_q2_market_data'): { 'archive_file': DependencyDefinition('download_q2_market_data') }, SolidInstance('unzip_file', alias='unzip_q2_ticket_data'): { 'archive_file': DependencyDefinition('download_q2_ticket_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_april_on_time_data'): { 'input_csv_file': DependencyDefinition('unzip_april_on_time_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_may_on_time_data'): { 'input_csv_file': DependencyDefinition('unzip_may_on_time_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_june_on_time_data'): { 'input_csv_file': DependencyDefinition('unzip_june_on_time_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_q2_sfo_weather'): { 'input_csv_file': DependencyDefinition('download_q2_sfo_weather') }, SolidInstance('ingest_csv_to_spark', alias='ingest_q2_coupon_data'): { 'input_csv_file': DependencyDefinition('unzip_q2_coupon_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_q2_market_data'): { 'input_csv_file': DependencyDefinition('unzip_q2_market_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_q2_ticket_data'): { 'input_csv_file': DependencyDefinition('unzip_q2_ticket_data') }, SolidInstance('ingest_csv_to_spark', alias='ingest_master_cord_data'): { 'input_csv_file': DependencyDefinition('unzip_master_cord_data') }, 'process_q2_data': { 'april_data': DependencyDefinition('ingest_april_on_time_data'), 'may_data': DependencyDefinition('ingest_may_on_time_data'), 'june_data': DependencyDefinition('ingest_june_on_time_data'), 'master_cord_data': DependencyDefinition('ingest_master_cord_data'), }, SolidInstance('subsample_spark_dataset', alias='subsample_q2_ticket_data'): { 'data_frame': DependencyDefinition('ingest_q2_ticket_data') }, SolidInstance('subsample_spark_dataset', alias='subsample_q2_market_data'): { 'data_frame': DependencyDefinition('ingest_q2_market_data') }, SolidInstance('subsample_spark_dataset', alias='subsample_q2_coupon_data'): { 'data_frame': DependencyDefinition('ingest_q2_coupon_data') }, 'process_sfo_weather_data': { 'sfo_weather_data': DependencyDefinition('ingest_q2_sfo_weather') }, SolidInstance('canonicalize_column_names', alias='canonicalize_q2_coupon_data'): { 'data_frame': DependencyDefinition('subsample_q2_coupon_data') }, SolidInstance('canonicalize_column_names', alias='canonicalize_q2_market_data'): { 'data_frame': DependencyDefinition('subsample_q2_market_data') }, SolidInstance('canonicalize_column_names', alias='canonicalize_q2_ticket_data'): { 'data_frame': DependencyDefinition('subsample_q2_ticket_data') }, SolidInstance('load_data_to_database_from_spark', alias='load_q2_on_time_data'): { 'data_frame': DependencyDefinition('process_q2_data') }, SolidInstance('load_data_to_database_from_spark', alias='load_q2_coupon_data'): { 'data_frame': DependencyDefinition('canonicalize_q2_coupon_data') }, SolidInstance('load_data_to_database_from_spark', alias='load_q2_market_data'): { 'data_frame': DependencyDefinition('canonicalize_q2_market_data') }, SolidInstance('load_data_to_database_from_spark', alias='load_q2_ticket_data'): { 'data_frame': DependencyDefinition('canonicalize_q2_ticket_data') }, SolidInstance('load_data_to_database_from_spark', alias='load_q2_sfo_weather'): { 'data_frame': DependencyDefinition('process_sfo_weather_data') }, } return PipelineDefinition( name="airline_demo_ingest_pipeline", solids=solids, dependencies=dependencies, mode_definitions=[test_mode, local_mode, prod_mode], preset_definitions=[ PresetDefinition( name='local_fast', mode='local', environment_files=[ file_relative_path(__file__, 'environments/local_base.yaml'), file_relative_path(__file__, 'environments/local_fast_ingest.yaml'), ], ), PresetDefinition( name='local_full', mode='local', environment_files=[ file_relative_path(__file__, 'environments/local_base.yaml'), file_relative_path(__file__, 'environments/local_full_ingest.yaml'), ], ), ], )
return string + string return int(string) @pipeline( mode_defs=[ ModeDefinition( name='errorable_mode', resource_defs={'errorable_resource': define_errorable_resource()}) ], preset_defs=[ PresetDefinition.from_files( 'passing', environment_files=[ file_relative_path(__file__, 'environments/error.yaml') ], mode='errorable_mode', ) ], ) def error_monster(): start = emit_num.alias('start')() middle = num_to_str.alias('middle')(num=start) str_to_num.alias('end')(string=middle) if __name__ == '__main__': result = execute_pipeline( error_monster, {
return people.count() emr_mode = ModeDefinition( name='emr', resource_defs={ 'pyspark_step_launcher': emr_pyspark_step_launcher, 'pyspark': pyspark_resource, 's3': s3_resource, }, intermediate_storage_defs=s3_plus_default_intermediate_storage_defs, ) emr_preset = PresetDefinition.from_pkg_resources( name='emr', mode='emr', pkg_resource_defs=[('emr_pyspark', 'prod_resources.yaml'), ('emr_pyspark', 's3_storage.yaml')], ) local_mode = ModeDefinition( name='local', resource_defs={ 'pyspark_step_launcher': no_step_launcher, 'pyspark': pyspark_resource }, ) @pipeline( mode_defs=[emr_mode, local_mode], preset_defs=[emr_preset],
@pipeline( mode_defs=[ ModeDefinition(name="prod", resource_defs={"db": postgres, "slack": slack_resource}), ModeDefinition(name="dev", resource_defs={"db": postgres, "slack": mock_slack_resource}), ], preset_defs=[ PresetDefinition( name="dev", run_config={ "solids": { "download_file": { "config": {"url": CEREALS_DATASET_URL, "target_path": "cereals.csv"} }, "post_plot_to_slack": {"config": {"channels": ["foo_channel"]}}, }, "resources": { "db": { "config": { "db_url": "postgresql://*****:*****@localhost:5432/dbt_example" } }, "slack": {"config": {"token": "nonce"}}, }, }, mode="dev", ), PresetDefinition( name="prod", run_config={ "solids": { "download_file": { "config": {"url": CEREALS_DATASET_URL, "target_path": "cereals.csv"}
sum_df['sum'] = sum_df['num1'] + sum_df['num2'] return sum_df @solid def sum_sq_solid(_, sum_df: DataFrame) -> DataFrame: sum_sq_df = sum_df.copy() sum_sq_df['sum_sq'] = sum_df['sum']**2 return sum_sq_df @pipeline(preset_defs=[ PresetDefinition.from_files( 'test', environment_files=[ file_relative_path(__file__, 'environments/pandas_hello_world_test.yaml') ], ), PresetDefinition.from_files( 'prod', environment_files=[ file_relative_path(__file__, 'environments/pandas_hello_world_prod.yaml') ], ), ]) def pandas_hello_world_pipeline(): return sum_sq_solid(sum_solid())
input_defs=[InputDefinition("sum_sq_solid", dagster_pd.DataFrame)], output_def=OutputDefinition(dagster_pd.DataFrame), ) def always_fails_solid(**_kwargs): raise Exception("I am a programmer and I make error") @pipeline def pandas_hello_world_fails(): always_fails_solid(sum_sq_solid=sum_sq_solid(sum_df=sum_solid())) @pipeline(preset_defs=[ PresetDefinition.from_files( "test", config_files=[ file_relative_path(__file__, "environments/pandas_hello_world_test.yaml") ], ), PresetDefinition.from_files( "prod", config_files=[ file_relative_path(__file__, "environments/pandas_hello_world_prod.yaml") ], ), ]) def pandas_hello_world(): sum_sq_solid(sum_solid())
Field(Int), 'field_six_nullable_int_list': Field(List[Optional[Int]], is_optional=True), })), })), ) def a_solid_with_multilayered_config(_): return None return a_solid_with_multilayered_config() @pipeline(preset_defs=[ PresetDefinition.from_files( name='prod', environment_files=[ script_relative_path('../environments/csv_hello_world_prod.yaml') ], ), PresetDefinition.from_files( name='test', environment_files=[ script_relative_path('../environments/csv_hello_world_test.yaml') ], ), PresetDefinition( name='test_inline', environment_dict={ 'solids': { 'sum_solid': { 'inputs': { 'num': script_relative_path("../data/num.csv")
@pipeline( mode_defs=[ ModeDefinition( name='default', resource_defs={ 's3': s3_resource, 'snowflake': snowflake_resource, 'spark': spark_resource, }, ) ], preset_defs=[ PresetDefinition.from_pkg_resources( 'default', pkg_resource_defs=[ ('dagster_examples.event_pipeline_demo.environments', 'default.yaml'), ], ) ], ) def event_ingest_pipeline(): event_ingest = create_spark_solid( name='event_ingest', main_class='io.dagster.events.EventPipeline', description='Ingest events from JSON to Parquet', ) @solid(input_defs=[InputDefinition('start', Nothing)], required_resource_keys={'snowflake'}) def snowflake_load(context): # TODO: express dependency of this solid on event_ingest context.resources.snowflake.load_table_from_local_parquet(
@pipeline( mode_defs=[ ModeDefinition( name='local', resource_defs={'transporter': local_transporter, 'volume': temporary_directory_mount}, ), ModeDefinition( name='production', resource_defs={'transporter': production_transporter, 'volume': mount}, ), ], preset_defs=[ PresetDefinition.from_files( 'dev', mode='local', environment_files=[ file_relative_path(__file__, 'environments/bay_bike_pipeline_base.yaml'), file_relative_path(__file__, 'environments/bay_bike_pipeline_dev.yaml'), ], ), PresetDefinition.from_files( 'production', mode='production', environment_files=[ file_relative_path(__file__, 'environments/bay_bike_pipeline_base.yaml'), file_relative_path(__file__, 'environments/bay_bike_pipeline_production.yaml'), ], ), ], ) def extract_monthly_bay_bike_pipeline(): upload_consolidated_csv = upload_file_to_bucket.alias('upload_consolidated_csv')
resource_defs={"ge_data_context": ge_data_context}) ], preset_defs=[ PresetDefinition( "sample_preset_success", mode="basic", run_config={ "resources": { "ge_data_context": { "config": { "ge_root_dir": file_relative_path(__file__, "./great_expectations") } } }, "solids": { "read_in_datafile": { "inputs": { "csv_path": { "value": file_relative_path(__file__, "./succeed.csv") } } } }, }, ), PresetDefinition( "sample_preset_fail", mode="basic",
with open(file_relative_path(__file__, 'sql/explore_visits_by_hour.sql'), 'r') as f: query = f.read() return bq_solid_for_queries( [query]).alias('explore_visits_by_hour_internal')(start=start) @pipeline( mode_defs=[ ModeDefinition( name='default', resource_defs={ 'bigquery': bigquery_resource, 'dataproc': dataproc_resource }, ) ], preset_defs=[ PresetDefinition.from_files( name='default', mode='default', environment_files=[ file_relative_path(__file__, 'environments/default.yaml') ], ) ], ) def gcp_pipeline(): return explore_visits_by_hour(bq_load_events(events_dataproc()))
if context.solid_config['return_wrong_type']: return string + string return int(string) @pipeline( mode_defs=[ ModeDefinition( name='errorable_mode', resource_defs={'errorable_resource': define_errorable_resource()} ) ], preset_defs=[ PresetDefinition.from_pkg_resources( 'passing', pkg_resource_defs=[('dagster_examples.toys.environments', 'error.yaml')], mode='errorable_mode', ) ], ) def error_monster(): start = emit_num.alias('start')() middle = num_to_str.alias('middle')(num=start) str_to_num.alias('end')(string=middle) if __name__ == '__main__': result = execute_pipeline( error_monster, { 'solids': {
), ModeDefinition( name="dev", resource_defs={ "warehouse": sqlalchemy_postgres_warehouse_resource }, ), ], preset_defs=[ PresetDefinition( "unittest", run_config={ "resources": { "warehouse": { "config": { "conn_str": ":memory:" } } } }, mode="unittest", ), PresetDefinition.from_files( "dev", config_files=[ file_relative_path(__file__, "presets_dev_warehouse.yaml"), file_relative_path(__file__, "presets_csv.yaml"), ], mode="dev", ), ],
"tempfile": tempfile_resource, "file_cache": s3_file_cache, "file_manager": s3_file_manager, }, intermediate_storage_defs=s3_plus_default_intermediate_storage_defs, ) @pipeline( # ordered so the local is first and therefore the default mode_defs=[local_mode, test_mode, prod_mode], preset_defs=[ PresetDefinition.from_pkg_resources( name="local_fast", mode="local", pkg_resource_defs=[ ("airline_demo.environments", "local_base.yaml"), ("airline_demo.environments", "local_fast_ingest.yaml"), ], ), PresetDefinition.from_pkg_resources( name="local_full", mode="local", pkg_resource_defs=[ ("airline_demo.environments", "local_base.yaml"), ("airline_demo.environments", "local_full_ingest.yaml"), ], ), PresetDefinition.from_pkg_resources( name="prod_fast", mode="prod", pkg_resource_defs=[
'tempfile': tempfile_resource, 'file_cache': s3_file_cache, }, system_storage_defs=s3_plus_default_storage_defs, ) @pipeline( # ordered so the local is first and therefore the default mode_defs=[local_mode, test_mode, prod_mode], preset_defs=[ PresetDefinition.from_pkg_resources( name='local_fast', mode='local', pkg_resource_defs=[ ('dagster_examples.airline_demo.environments', 'local_base.yaml'), ('dagster_examples.airline_demo.environments', 'local_fast_ingest.yaml'), ], ), PresetDefinition.from_pkg_resources( name='local_full', mode='local', pkg_resource_defs=[ ('dagster_examples.airline_demo.environments', 'local_base.yaml'), ('dagster_examples.airline_demo.environments', 'local_full_ingest.yaml'), ], ), PresetDefinition.from_pkg_resources(
def test_presets(): @solid(config={'error': Bool}) def can_fail(context): if context.solid_config['error']: raise Exception('I did an error') return 'cool' @lambda_solid def always_fail(): raise Exception('I always do this') pipeline = PipelineDefinition( name='simple', solid_defs=[can_fail, always_fail], preset_defs=[ PresetDefinition.from_files( 'passing', environment_files=[ file_relative_path(__file__, 'pass_env.yaml') ], solid_subset=['can_fail'], ), PresetDefinition.from_files( 'passing_overide_to_fail', environment_files=[ file_relative_path(__file__, 'pass_env.yaml') ], solid_subset=['can_fail'], ).with_additional_config( {'solids': { 'can_fail': { 'config': { 'error': True } } }}), PresetDefinition( 'passing_direct_dict', environment_dict={ 'solids': { 'can_fail': { 'config': { 'error': False } } } }, solid_subset=['can_fail'], ), PresetDefinition.from_files( 'failing_1', environment_files=[ file_relative_path(__file__, 'fail_env.yaml') ], solid_subset=['can_fail'], ), PresetDefinition.from_files('failing_2', environment_files=[ file_relative_path( __file__, 'pass_env.yaml') ]), PresetDefinition( 'subset', solid_subset=['can_fail'], ), ], ) with pytest.raises(DagsterInvalidDefinitionError): PresetDefinition.from_files('invalid_1', environment_files=[ file_relative_path( __file__, 'not_a_file.yaml') ]) with pytest.raises(DagsterInvariantViolationError): PresetDefinition.from_files( 'invalid_2', environment_files=[ file_relative_path(__file__, 'test_repository_definition.py') ], ) assert execute_pipeline(pipeline, preset='passing').success assert execute_pipeline(pipeline, preset='passing_direct_dict').success assert execute_pipeline(pipeline, preset='failing_1', raise_on_error=False).success == False assert execute_pipeline(pipeline, preset='failing_2', raise_on_error=False).success == False with pytest.raises(DagsterInvariantViolationError, match='Could not find preset'): execute_pipeline(pipeline, preset='not_failing', raise_on_error=False) assert (execute_pipeline(pipeline, preset='passing_overide_to_fail', raise_on_error=False).success == False) assert execute_pipeline( pipeline, preset='passing', environment_dict={ 'solids': { 'can_fail': { 'config': { 'error': False } } } }, ).success with pytest.raises( check.CheckError, match=re.escape( 'The environment set in preset \'passing\' does not agree with the environment passed ' 'in the `environment_dict` argument.'), ): execute_pipeline( pipeline, preset='passing', environment_dict={ 'solids': { 'can_fail': { 'config': { 'error': True } } } }, ) assert execute_pipeline( pipeline, preset='subset', environment_dict={ 'solids': { 'can_fail': { 'config': { 'error': False } } } }, ).success
def test_empty_preset(): empty_preset = PresetDefinition("empty") assert empty_preset.run_config == None assert empty_preset.get_environment_yaml() == "{}\n"
@solid def save_metrics(context, data_path): context.log.info( "Saving metrics to path {data_path}".format(data_path=data_path)) @pipeline( preset_defs=[ PresetDefinition( name="test", environment_dict={ "solids": { "save_metrics": { "inputs": { "data_path": { "value": "s3://bucket-name/test_data" } } } } }, ), ], ) def metrics_pipeline(): save_metrics() @solid def rollup_data(context, data_path): context.log.info( "Rolling up data from path {data_path}".format(data_path=data_path))
@pipeline( description= ("Demo fork-shaped pipeline that has two-path parallel structure of solids." ), preset_defs=[ PresetDefinition( "sleep_failed", { "intermediate_storage": { "filesystem": {} }, "execution": { "multiprocess": {} }, "solids": { "root": { "config": { "sleep_secs": [-10, 30] } } }, }, ), PresetDefinition( "sleep", { "intermediate_storage": { "filesystem": {} }, "execution": {
'tempfile': tempfile_resource, 'file_cache': s3_file_cache, }, system_storage_defs=s3_plus_default_storage_defs, ) @pipeline( # ordered so the local is first and therefore the default mode_defs=[local_mode, test_mode, prod_mode], preset_defs=[ PresetDefinition( name='local_fast', mode='local', environment_files=[ file_relative_path(__file__, 'environments/local_base.yaml'), file_relative_path(__file__, 'environments/local_fast_ingest.yaml'), ], ), PresetDefinition( name='local_full', mode='local', environment_files=[ file_relative_path(__file__, 'environments/local_base.yaml'), file_relative_path(__file__, 'environments/local_full_ingest.yaml'), ], ), ], )
time.sleep(0.1) if (context.retry_number + 1) >= context.solid_config["work_on_attempt"]: return "success" else: raise RetryRequested( max_retries=context.solid_config["max_retries"], seconds_to_wait=context.solid_config["delay"], ) @pipeline( preset_defs=[ PresetDefinition( name="pass_after_retry", run_config={ "solids": { "retry_solid": { "config": { "delay": 0.2, "work_on_attempt": 2, "max_retries": 1, } } } }, ) ] ) def retry_pipeline(): echo(retry_solid())
@pipeline( mode_defs=[ ModeDefinition( intermediate_storage_defs=s3_plus_default_intermediate_storage_defs, resource_defs={"s3": s3_resource}, executor_defs=default_executors + [celery_k8s_job_executor], ) ], preset_defs=[ PresetDefinition.from_files( "example", config_files=[ file_relative_path( __file__, os.path.join("..", "run_config", "celery_k8s.yaml")), file_relative_path( __file__, os.path.join("..", "run_config", "pipeline.yaml")), ], mode="default", ), ], ) def example_pipe(): count_letters(multiply_the_word()) @repository def example_repo(): return [example_pipe]
def test_presets(): @solid(config_schema={"error": Bool}) def can_fail(context): if context.solid_config["error"]: raise Exception("I did an error") return "cool" @lambda_solid def always_fail(): raise Exception("I always do this") pipe = PipelineDefinition( name="simple", solid_defs=[can_fail, always_fail], preset_defs=[ PresetDefinition.from_files( "passing", config_files=[file_relative_path(__file__, "pass_env.yaml")], solid_selection=["can_fail"], ), PresetDefinition.from_files( "passing_overide_to_fail", config_files=[file_relative_path(__file__, "pass_env.yaml")], solid_selection=["can_fail"], ).with_additional_config( {"solids": { "can_fail": { "config": { "error": True } } }}), PresetDefinition( "passing_direct_dict", run_config={ "solids": { "can_fail": { "config": { "error": False } } } }, solid_selection=["can_fail"], ), PresetDefinition.from_files( "failing_1", config_files=[file_relative_path(__file__, "fail_env.yaml")], solid_selection=["can_fail"], ), PresetDefinition.from_files( "failing_2", config_files=[file_relative_path(__file__, "pass_env.yaml")]), PresetDefinition( "subset", solid_selection=["can_fail"], ), ], ) with pytest.raises(DagsterInvariantViolationError): PresetDefinition.from_files( "invalid_1", config_files=[file_relative_path(__file__, "not_a_file.yaml")]) with pytest.raises(DagsterInvariantViolationError): PresetDefinition.from_files( "invalid_2", config_files=[ file_relative_path(__file__, "test_repository_definition.py") ], ) assert execute_pipeline(pipe, preset="passing").success assert execute_pipeline(pipe, preset="passing_direct_dict").success assert execute_pipeline(pipe, preset="failing_1", raise_on_error=False).success == False assert execute_pipeline(pipe, preset="failing_2", raise_on_error=False).success == False with pytest.raises(DagsterInvariantViolationError, match="Could not find preset"): execute_pipeline(pipe, preset="not_failing", raise_on_error=False) assert (execute_pipeline(pipe, preset="passing_overide_to_fail", raise_on_error=False).success == False) assert execute_pipeline( pipe, preset="passing", run_config={ "solids": { "can_fail": { "config": { "error": False } } } }, ).success with pytest.raises( check.CheckError, match=re.escape( "The environment set in preset 'passing' does not agree with the environment passed " "in the `run_config` argument."), ): execute_pipeline( pipe, preset="passing", run_config={"solids": { "can_fail": { "config": { "error": True } } }}, ) assert execute_pipeline( pipe, preset="subset", run_config={ "solids": { "can_fail": { "config": { "error": False } } } }, ).success
@pipeline( description= ("Demo pipeline that enables configurable types of errors thrown during pipeline execution, " "including solid execution errors, type errors, and resource initialization errors." ), mode_defs=[ ModeDefinition( name="errorable_mode", resource_defs={"errorable_resource": define_errorable_resource()}) ], preset_defs=[ PresetDefinition.from_pkg_resources( "passing", pkg_resource_defs=[("dagster_test.toys.environments", "error.yaml") ], mode="errorable_mode", ) ], ) def error_monster(): start = emit_num.alias("start")() middle = num_to_str.alias("middle")(num=start) str_to_num.alias("end")(string=middle) if __name__ == "__main__": result = execute_pipeline( error_monster, { "solids": {
'field_five_int': Int, 'field_six_nullable_int_list': Field([Noneable(int)], is_required=False), }, }, ) def a_solid_with_multilayered_config(_): return None return a_solid_with_multilayered_config() @pipeline( preset_defs=[ PresetDefinition.from_files( name='prod', environment_files=[ file_relative_path(__file__, '../environments/csv_hello_world_prod.yaml') ], ), PresetDefinition.from_files( name='test', environment_files=[ file_relative_path(__file__, '../environments/csv_hello_world_test.yaml') ], ), PresetDefinition( name='test_inline', run_config={ 'solids': { 'sum_solid': { 'inputs': {'num': file_relative_path(__file__, '../data/num.csv')} }
from dagster import ModeDefinition, PresetDefinition, RepositoryDefinition, pipeline, solid mode = ModeDefinition( name='prod', resource_defs={ 'pyspark_step_launcher': emr_pyspark_step_launcher, 'pyspark': pyspark_resource, 's3': s3_resource, }, system_storage_defs=s3_plus_default_storage_defs, ) preset = PresetDefinition.from_files( name='prod', mode='prod', environment_files=['prod_resources.yaml', 's3_storage.yaml'], ) @solid(required_resource_keys={'pyspark_step_launcher'}) def hello(_): return 1 @pipeline( mode_defs=[mode], preset_defs=[preset], ) def my_pipeline(): hello()
def define_multi_mode_with_resources_pipeline(): # API red alert. One has to wrap a type in Field because it is callable @resource(config=Int) def adder_resource(init_context): return lambda x: x + init_context.resource_config @resource(config=Int) def multer_resource(init_context): return lambda x: x * init_context.resource_config @resource(config={'num_one': Int, 'num_two': Int}) def double_adder_resource(init_context): return (lambda x: x + init_context.resource_config['num_one'] + init_context.resource_config['num_two']) @solid(required_resource_keys={'op'}) def apply_to_three(context): return context.resources.op(3) return PipelineDefinition( name='multi_mode_with_resources', solid_defs=[apply_to_three], mode_defs=[ ModeDefinition(name='add_mode', resource_defs={'op': adder_resource}), ModeDefinition(name='mult_mode', resource_defs={'op': multer_resource}), ModeDefinition( name='double_adder_mode', resource_defs={'op': double_adder_resource}, description='Mode that adds two numbers to thing', ), ], preset_defs=[ PresetDefinition.from_files( 'add', mode='add_mode', environment_files=[ file_relative_path( __file__, './environments/multi_mode_with_resources/add_mode.yaml' ) ], ), PresetDefinition( 'multiproc', mode='add_mode', environment_dict={ 'resources': { 'op': { 'config': 2 } }, 'execution': { 'multiprocess': {} }, 'storage': { 'filesystem': {} }, }, ), ], )
@lambda_solid def do_something(): return 1 @lambda_solid def do_input(x): return x @pipeline( name="foo", preset_defs=[ PresetDefinition(name="test", tags={"foo": "bar"}), ], ) def foo_pipeline(): do_input(do_something()) def define_foo_pipeline(): return foo_pipeline @pipeline(name="baz", description="Not much tbh") def baz_pipeline(): do_input()
"field_six_nullable_int_list": Field([Noneable(int)], is_required=False), }, }, ) def a_solid_with_multilayered_config(_): return None a_solid_with_multilayered_config() @pipeline(preset_defs=[ PresetDefinition.from_files( name="prod", config_files=[ file_relative_path(__file__, "../environments/csv_hello_world_prod.yaml") ], ), PresetDefinition.from_files( name="test", config_files=[ file_relative_path(__file__, "../environments/csv_hello_world_test.yaml") ], ), PresetDefinition( name="test_inline", run_config={ "solids": { "sum_solid": {
def define_airline_demo_warehouse_pipeline(): return PipelineDefinition( name="airline_demo_warehouse_pipeline", solids=[ average_sfo_outbound_avg_delays_by_destination, delays_by_geography, delays_vs_fares, delays_vs_fares_nb, eastbound_delays, q2_sfo_outbound_flights, sfo_delays_by_destination, tickets_with_destination, put_object_to_s3_bytes, westbound_delays, ], dependencies={ 'q2_sfo_outbound_flights': {}, 'tickets_with_destination': {}, 'westbound_delays': {}, 'eastbound_delays': {}, 'average_sfo_outbound_avg_delays_by_destination': { 'q2_sfo_outbound_flights': DependencyDefinition('q2_sfo_outbound_flights') }, 'delays_vs_fares': { 'tickets_with_destination': DependencyDefinition('tickets_with_destination'), 'average_sfo_outbound_avg_delays_by_destination': DependencyDefinition( 'average_sfo_outbound_avg_delays_by_destination'), }, 'fares_vs_delays': { 'table_name': DependencyDefinition('delays_vs_fares') }, 'sfo_delays_by_destination': { 'table_name': DependencyDefinition( 'average_sfo_outbound_avg_delays_by_destination') }, 'delays_by_geography': { 'eastbound_delays': DependencyDefinition('eastbound_delays'), 'westbound_delays': DependencyDefinition('westbound_delays'), }, SolidInstance('put_object_to_s3_bytes', alias='upload_outbound_avg_delay_pdf_plots'): { 'file_obj': DependencyDefinition('sfo_delays_by_destination') }, SolidInstance('put_object_to_s3_bytes', alias='upload_delays_vs_fares_pdf_plots'): { 'file_obj': DependencyDefinition('fares_vs_delays') }, SolidInstance('put_object_to_s3_bytes', alias='upload_delays_by_geography_pdf_plots'): { 'file_obj': DependencyDefinition('delays_by_geography') }, }, mode_definitions=[test_mode, local_mode, prod_mode], preset_definitions=[ PresetDefinition( name='local', mode='local', environment_files=[ file_relative_path(__file__, 'environments/local_base.yaml'), file_relative_path(__file__, 'environments/local_warehouse.yaml'), ], ) ], )