def define_example_repository(): return RepositoryDefinition( name='notebook_repo', pipeline_dict={ 'bad_kernel': define_bad_kernel_pipeline, 'error_pipeline': define_error_pipeline, 'hello_world_pipeline': define_hello_world_pipeline, 'hello_world_config_pipeline': define_hello_world_config_pipeline, 'hello_world_with_output_pipeline': define_hello_world_with_output_pipeline, 'hello_logging_pipeline': define_hello_logging_pipeline, 'resource_pipeline': define_resource_pipeline, 'resource_with_exception_pipeline': define_resource_with_exception_pipeline, 'test_add_pipeline': define_add_pipeline, 'test_notebook_dag': define_test_notebook_dag_pipeline, 'tutorial_pipeline': define_tutorial_pipeline, }, )
def define_demo_repo(): # Lazy import here to prevent deps issues from dagster import RepositoryDefinition from dagster_examples.toys.error_monster import error_monster from dagster_examples.toys.sleepy import sleepy_pipeline from dagster_examples.toys.log_spew import log_spew from dagster_examples.toys.stdout_spew import stdout_spew_pipeline from dagster_examples.toys.many_events import many_events from dagster_examples.toys.composition import composition from dagster_examples.toys.pandas_hello_world import ( pandas_hello_world_pipeline, pandas_hello_world_pipeline_no_config, ) from dagster_examples.airline_demo.pipelines import ( airline_demo_ingest_pipeline, airline_demo_warehouse_pipeline, ) from dagster_examples.event_pipeline_demo.pipelines import event_ingest_pipeline from dagster_examples.pyspark_pagerank.pyspark_pagerank_pipeline import pyspark_pagerank from dagster_pandas.examples import papermill_pandas_hello_world_pipeline from dagster_examples.jaffle_dbt.jaffle import jaffle_pipeline return RepositoryDefinition( name='demo_repository', pipeline_defs=[ pandas_hello_world_pipeline_no_config, pandas_hello_world_pipeline, sleepy_pipeline, error_monster, log_spew, many_events, composition, airline_demo_ingest_pipeline, airline_demo_warehouse_pipeline, event_ingest_pipeline, pyspark_pagerank, papermill_pandas_hello_world_pipeline, jaffle_pipeline, stdout_spew_pipeline, ], )
def test_get_schedule_stats(self, storage): assert storage repository = RepositoryDefinition("repository_name") current_time = time.time() error = SerializableErrorInfo(message="Error", stack=[], cls_name="TestError") # Create ticks for x in range(2): storage.create_schedule_tick(repository, self.build_tick(current_time)) for x in range(3): storage.create_schedule_tick( repository, self.build_tick(current_time, ScheduleTickStatus.SUCCESS, run_id=str(x))) for x in range(4): storage.create_schedule_tick( repository, self.build_tick(current_time, ScheduleTickStatus.SKIPPED), ) for x in range(5): storage.create_schedule_tick( repository, self.build_tick(current_time, ScheduleTickStatus.FAILURE, error=error), ) stats = storage.get_schedule_tick_stats_by_schedule( repository, "my_schedule") assert stats.ticks_started == 2 assert stats.ticks_succeeded == 3 assert stats.ticks_skipped == 4 assert stats.ticks_failed == 5
def test_create_tick(self, storage): assert storage repository = RepositoryDefinition("repository_name") current_time = time.time() tick = storage.create_schedule_tick(repository, self.build_tick(current_time)) assert tick.tick_id == 1 ticks = storage.get_schedule_ticks_by_schedule(repository, "my_schedule") assert len(ticks) == 1 tick = ticks[0] assert tick.tick_id == 1 assert tick.schedule_name == "my_schedule" assert tick.cron_schedule == "* * * * *" assert tick.timestamp == current_time assert tick.status == ScheduleTickStatus.STARTED assert tick.run_id == None assert tick.error == None
def get_repo_at_time_1(): @lambda_solid def solid_A(): pass @lambda_solid def solid_B(): pass @pipeline def evolving_pipeline(): solid_A() solid_B() @pipeline def foo_pipeline(): solid_A() return RepositoryDefinition( 'evolving_repo', pipeline_defs=[evolving_pipeline, foo_pipeline])
def test_add_multiple_schedules(self, storage): assert storage repository = RepositoryDefinition( name="repository_name", repository_data=RepositoryData.from_list([])) schedule = self.build_schedule("my_schedule", "* * * * *") schedule_2 = self.build_schedule("my_schedule_2", "* * * * *") schedule_3 = self.build_schedule("my_schedule_3", "* * * * *") storage.add_schedule(repository.name, schedule) storage.add_schedule(repository.name, schedule_2) storage.add_schedule(repository.name, schedule_3) schedules = storage.all_schedules(repository.name) assert len(schedules) == 3 assert any(s.name == "my_schedule" for s in schedules) assert any(s.name == "my_schedule_2" for s in schedules) assert any(s.name == "my_schedule_3" for s in schedules)
def get_repo_at_time_2(): @lambda_solid def solid_A(): pass @lambda_solid def solid_B_prime(): pass @pipeline def evolving_pipeline(): solid_A() solid_B_prime() @pipeline def bar_pipeline(): solid_A() return RepositoryDefinition( name='evolving_repo', pipeline_defs=[evolving_pipeline, bar_pipeline])
def test_update_tick_to_skip(self, storage): assert storage repository = RepositoryDefinition("repository_name") current_time = time.time() tick = storage.create_schedule_tick(repository.name, self.build_tick(current_time)) updated_tick = tick.with_status(ScheduleTickStatus.SKIPPED) assert updated_tick.status == ScheduleTickStatus.SKIPPED storage.update_schedule_tick(repository.name, updated_tick) ticks = storage.get_schedule_ticks_by_schedule(repository.name, "my_schedule") assert len(ticks) == 1 tick = ticks[0] assert tick.tick_id == 1 assert tick.schedule_name == "my_schedule" assert tick.cron_schedule == "* * * * *" assert tick.timestamp == current_time assert tick.status == ScheduleTickStatus.SKIPPED assert tick.run_id == None assert tick.error == None
def define_example_repository(): pipeline_dict = { 'bad_kernel_pipeline': define_bad_kernel_pipeline, 'error_pipeline': define_error_pipeline, 'hello_world_pipeline': define_hello_world_pipeline, 'hello_world_config_pipeline': define_hello_world_config_pipeline, 'hello_world_explicit_yield_pipeline': define_hello_world_explicit_yield_pipeline, 'hello_world_with_output_pipeline': define_hello_world_with_output_pipeline, 'hello_logging_pipeline': define_hello_logging_pipeline, 'resource_pipeline': define_resource_pipeline, 'resource_with_exception_pipeline': define_resource_with_exception_pipeline, 'test_add_pipeline': define_add_pipeline, 'test_notebook_dag': define_test_notebook_dag_pipeline, } if DAGSTER_PANDAS_PRESENT and SKLEARN_PRESENT and MATPLOTLIB_PRESENT: pipeline_dict['tutorial_pipeline'] = define_tutorial_pipeline return RepositoryDefinition(name='notebook_repo', pipeline_dict=pipeline_dict)
def test_repo_definition(): called = defaultdict(int) repo = RepositoryDefinition( name='some_repo', pipeline_dict={ 'foo': lambda: create_single_node_pipeline('foo', called), 'bar': lambda: create_single_node_pipeline('bar', called), }, ) foo_pipeline = repo.get_pipeline('foo') assert isinstance(foo_pipeline, PipelineDefinition) assert foo_pipeline.name == 'foo' assert 'foo' in called assert called['foo'] == 1 assert 'bar' not in called bar_pipeline = repo.get_pipeline('bar') assert isinstance(bar_pipeline, PipelineDefinition) assert bar_pipeline.name == 'bar' assert 'foo' in called assert called['foo'] == 1 assert 'bar' in called assert called['bar'] == 1 foo_pipeline = repo.get_pipeline('foo') assert isinstance(foo_pipeline, PipelineDefinition) assert foo_pipeline.name == 'foo' assert 'foo' in called assert called['foo'] == 1 pipelines = repo.get_all_pipelines() assert set(['foo', 'bar']) == set([pipeline.name for pipeline in pipelines]) assert repo.get_solid_def('foo_solid').name == 'foo_solid' assert repo.get_solid_def('bar_solid').name == 'bar_solid'
def define_repository(): return RepositoryDefinition( name='test', pipeline_dict={ 'more_complicated_config': define_more_complicated_config, 'more_complicated_nested_config': define_more_complicated_nested_config, 'csv_hello_world': define_csv_hello_world, 'csv_hello_world_two': define_pipeline_two, 'csv_hello_world_with_expectations': define_csv_hello_world_with_expectations, 'pipeline_with_list': define_pipeline_with_list, 'csv_hello_world_df_input': define_pipeline_with_csv_df_input, 'no_config_pipeline': define_no_config_pipeline, 'scalar_output_pipeline': define_scalar_output_pipeline, 'pipeline_with_enum_config': define_pipeline_with_enum_config, 'naughty_programmer_pipeline': define_naughty_programmer_pipeline, 'secret_pipeline': define_pipeline_with_secret, 'pipeline_with_step_metadata': define_pipeline_with_step_metadata, 'pipeline_with_expectations': define_pipeline_with_expectation, 'multi_mode_with_resources': define_multi_mode_with_resources_pipeline, 'multi_mode_with_loggers': define_multi_mode_with_loggers_pipeline, 'composites_pipeline': define_composites_pipeline, }, )
def define_repository(): return RepositoryDefinition( name='test', pipeline_defs=[ composites_pipeline, csv_hello_world, csv_hello_world_df_input, csv_hello_world_two, csv_hello_world_with_expectations, materialization_pipeline, more_complicated_config, more_complicated_nested_config, multi_mode_with_loggers, multi_mode_with_resources, naughty_programmer_pipeline, no_config_pipeline, pipeline_with_enum_config, pipeline_with_expectations, pipeline_with_list, pipeline_with_step_metadata, scalar_output_pipeline, secret_pipeline, ], )
def asset_repo(): @solid def solid_a(_): yield Materialization(asset_key='a', label='a') yield Output(1) @solid def solid_b(_, num): yield Materialization(asset_key='b', label='b') time.sleep(0.1) yield Materialization(asset_key='c', label='c') yield Output(num) @pipeline def single_asset_pipeline(): solid_a() @pipeline def multi_asset_pipeline(): solid_b(solid_a()) return RepositoryDefinition( name='asset_repo', pipeline_defs=[single_asset_pipeline, multi_asset_pipeline])
def test_repository_snap_all_props(): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() repo = RepositoryDefinition(name='noop_repo', pipeline_defs=[noop_pipeline]) external_repo_data = external_repository_data_from_def(repo) assert external_repo_data.name == 'noop_repo' assert len(external_repo_data.external_pipeline_datas) == 1 assert isinstance(external_repo_data.external_pipeline_datas[0], ExternalPipelineData) pipeline_snapshot = external_repo_data.external_pipeline_datas[ 0].pipeline_snapshot assert isinstance(pipeline_snapshot, PipelineSnapshot) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description is None assert pipeline_snapshot.tags == {}
def define_repository(): return RepositoryDefinition( name='test', pipeline_defs=[ composites_pipeline, csv_hello_world, csv_hello_world_df_input, csv_hello_world_two, csv_hello_world_with_expectations, hello_world_with_tags, eventually_successful, infinite_loop_pipeline, materialization_pipeline, more_complicated_config, more_complicated_nested_config, multi_mode_with_loggers, multi_mode_with_resources, naughty_programmer_pipeline, noop_pipeline, pipeline_with_invalid_definition_error, no_config_pipeline, no_config_chain_pipeline, pipeline_with_enum_config, pipeline_with_expectations, pipeline_with_list, required_resource_pipeline, retry_resource_pipeline, retry_multi_output_pipeline, scalar_output_pipeline, spew_pipeline, tagged_pipeline, retry_multi_input_early_terminate_pipeline, ], schedule_defs=define_schedules(), partition_set_defs=define_partitions(), )
def test_active_repository_data(snapshot): rep_def = RepositoryDefinition(name='repo', pipeline_defs=[a_pipeline]) snapshot.assert_match(serialize_pp(active_repository_data_from_def(rep_def)))
def define_repo(): return RepositoryDefinition(name='partitioning-tutorial', pipeline_defs=[compute_total_stock_volume])
def cereal_repository(): return RepositoryDefinition( 'hello_cereal_repository', pipeline_defs=[hello_cereal_pipeline], schedule_defs=cereal_schedules(), )
def define_repository(): return RepositoryDefinition('emr_pyspark', pipeline_defs=[my_pipeline])
def define_repo(): return RepositoryDefinition(name='experimental_repository', pipeline_defs=[dash_stats])
def define_error_pipeline_repo(): return RepositoryDefinition(name='error_pipeline', pipeline_dict={'bad': define_bad_pipeline})
def test_empty_repo(): return RepositoryDefinition(name='empty_repository', pipeline_defs=[])
def define_repository(): return RepositoryDefinition(name='test', pipeline_dict={'math': define_csv_hello_world})
def define_repository(): return RepositoryDefinition(name='test', pipeline_defs=[math])
def define_demo_execution_repo(): return RepositoryDefinition(name='demo_execution_repo', pipeline_defs=[demo_execution_pipeline])
def lakehouse_test_repo(): return RepositoryDefinition(name='lakehouse_test_repo', pipeline_defs=[typed_lakehouse_pipeline])
def define_repo(): return RepositoryDefinition(name='gcp_data_platform', pipeline_dict={'gcp_pipeline': gcp_pipeline})
def define_test_repository(): return RepositoryDefinition( name='test', pipeline_dict={'pipeline': define_circular_dependency_pipeline})
def cereal_repository(): return RepositoryDefinition('hello_cereal_repository', pipeline_defs=[hello_cereal_pipeline])
def define_bar_repo(): return RepositoryDefinition('bar', {'foo': define_foo_pipeline, 'baz': lambda: baz_pipeline})