Beispiel #1
0
def define_example_repository():
    return RepositoryDefinition(
        name='notebook_repo',
        pipeline_dict={
            'bad_kernel': define_bad_kernel_pipeline,
            'error_pipeline': define_error_pipeline,
            'hello_world_pipeline': define_hello_world_pipeline,
            'hello_world_config_pipeline': define_hello_world_config_pipeline,
            'hello_world_with_output_pipeline':
            define_hello_world_with_output_pipeline,
            'hello_logging_pipeline': define_hello_logging_pipeline,
            'resource_pipeline': define_resource_pipeline,
            'resource_with_exception_pipeline':
            define_resource_with_exception_pipeline,
            'test_add_pipeline': define_add_pipeline,
            'test_notebook_dag': define_test_notebook_dag_pipeline,
            'tutorial_pipeline': define_tutorial_pipeline,
        },
    )
Beispiel #2
0
def define_demo_repo():
    # Lazy import here to prevent deps issues

    from dagster import RepositoryDefinition
    from dagster_examples.toys.error_monster import error_monster
    from dagster_examples.toys.sleepy import sleepy_pipeline
    from dagster_examples.toys.log_spew import log_spew
    from dagster_examples.toys.stdout_spew import stdout_spew_pipeline
    from dagster_examples.toys.many_events import many_events
    from dagster_examples.toys.composition import composition
    from dagster_examples.toys.pandas_hello_world import (
        pandas_hello_world_pipeline,
        pandas_hello_world_pipeline_no_config,
    )
    from dagster_examples.airline_demo.pipelines import (
        airline_demo_ingest_pipeline,
        airline_demo_warehouse_pipeline,
    )
    from dagster_examples.event_pipeline_demo.pipelines import event_ingest_pipeline
    from dagster_examples.pyspark_pagerank.pyspark_pagerank_pipeline import pyspark_pagerank
    from dagster_pandas.examples import papermill_pandas_hello_world_pipeline
    from dagster_examples.jaffle_dbt.jaffle import jaffle_pipeline

    return RepositoryDefinition(
        name='demo_repository',
        pipeline_defs=[
            pandas_hello_world_pipeline_no_config,
            pandas_hello_world_pipeline,
            sleepy_pipeline,
            error_monster,
            log_spew,
            many_events,
            composition,
            airline_demo_ingest_pipeline,
            airline_demo_warehouse_pipeline,
            event_ingest_pipeline,
            pyspark_pagerank,
            papermill_pandas_hello_world_pipeline,
            jaffle_pipeline,
            stdout_spew_pipeline,
        ],
    )
    def test_get_schedule_stats(self, storage):
        assert storage

        repository = RepositoryDefinition("repository_name")
        current_time = time.time()

        error = SerializableErrorInfo(message="Error",
                                      stack=[],
                                      cls_name="TestError")

        # Create ticks
        for x in range(2):
            storage.create_schedule_tick(repository,
                                         self.build_tick(current_time))

        for x in range(3):
            storage.create_schedule_tick(
                repository,
                self.build_tick(current_time,
                                ScheduleTickStatus.SUCCESS,
                                run_id=str(x)))

        for x in range(4):
            storage.create_schedule_tick(
                repository,
                self.build_tick(current_time, ScheduleTickStatus.SKIPPED),
            )

        for x in range(5):
            storage.create_schedule_tick(
                repository,
                self.build_tick(current_time,
                                ScheduleTickStatus.FAILURE,
                                error=error),
            )

        stats = storage.get_schedule_tick_stats_by_schedule(
            repository, "my_schedule")
        assert stats.ticks_started == 2
        assert stats.ticks_succeeded == 3
        assert stats.ticks_skipped == 4
        assert stats.ticks_failed == 5
    def test_create_tick(self, storage):
        assert storage

        repository = RepositoryDefinition("repository_name")
        current_time = time.time()
        tick = storage.create_schedule_tick(repository,
                                            self.build_tick(current_time))
        assert tick.tick_id == 1

        ticks = storage.get_schedule_ticks_by_schedule(repository,
                                                       "my_schedule")
        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.tick_id == 1
        assert tick.schedule_name == "my_schedule"
        assert tick.cron_schedule == "* * * * *"
        assert tick.timestamp == current_time
        assert tick.status == ScheduleTickStatus.STARTED
        assert tick.run_id == None
        assert tick.error == None
Beispiel #5
0
def get_repo_at_time_1():
    @lambda_solid
    def solid_A():
        pass

    @lambda_solid
    def solid_B():
        pass

    @pipeline
    def evolving_pipeline():
        solid_A()
        solid_B()

    @pipeline
    def foo_pipeline():
        solid_A()

    return RepositoryDefinition(
        'evolving_repo', pipeline_defs=[evolving_pipeline, foo_pipeline])
Beispiel #6
0
    def test_add_multiple_schedules(self, storage):
        assert storage

        repository = RepositoryDefinition(
            name="repository_name",
            repository_data=RepositoryData.from_list([]))
        schedule = self.build_schedule("my_schedule", "* * * * *")
        schedule_2 = self.build_schedule("my_schedule_2", "* * * * *")
        schedule_3 = self.build_schedule("my_schedule_3", "* * * * *")

        storage.add_schedule(repository.name, schedule)
        storage.add_schedule(repository.name, schedule_2)
        storage.add_schedule(repository.name, schedule_3)

        schedules = storage.all_schedules(repository.name)
        assert len(schedules) == 3

        assert any(s.name == "my_schedule" for s in schedules)
        assert any(s.name == "my_schedule_2" for s in schedules)
        assert any(s.name == "my_schedule_3" for s in schedules)
Beispiel #7
0
def get_repo_at_time_2():
    @lambda_solid
    def solid_A():
        pass

    @lambda_solid
    def solid_B_prime():
        pass

    @pipeline
    def evolving_pipeline():
        solid_A()
        solid_B_prime()

    @pipeline
    def bar_pipeline():
        solid_A()

    return RepositoryDefinition(
        name='evolving_repo', pipeline_defs=[evolving_pipeline, bar_pipeline])
Beispiel #8
0
    def test_update_tick_to_skip(self, storage):
        assert storage

        repository = RepositoryDefinition("repository_name")
        current_time = time.time()
        tick = storage.create_schedule_tick(repository.name, self.build_tick(current_time))

        updated_tick = tick.with_status(ScheduleTickStatus.SKIPPED)
        assert updated_tick.status == ScheduleTickStatus.SKIPPED

        storage.update_schedule_tick(repository.name, updated_tick)

        ticks = storage.get_schedule_ticks_by_schedule(repository.name, "my_schedule")
        assert len(ticks) == 1
        tick = ticks[0]
        assert tick.tick_id == 1
        assert tick.schedule_name == "my_schedule"
        assert tick.cron_schedule == "* * * * *"
        assert tick.timestamp == current_time
        assert tick.status == ScheduleTickStatus.SKIPPED
        assert tick.run_id == None
        assert tick.error == None
Beispiel #9
0
def define_example_repository():
    pipeline_dict = {
        'bad_kernel_pipeline': define_bad_kernel_pipeline,
        'error_pipeline': define_error_pipeline,
        'hello_world_pipeline': define_hello_world_pipeline,
        'hello_world_config_pipeline': define_hello_world_config_pipeline,
        'hello_world_explicit_yield_pipeline':
        define_hello_world_explicit_yield_pipeline,
        'hello_world_with_output_pipeline':
        define_hello_world_with_output_pipeline,
        'hello_logging_pipeline': define_hello_logging_pipeline,
        'resource_pipeline': define_resource_pipeline,
        'resource_with_exception_pipeline':
        define_resource_with_exception_pipeline,
        'test_add_pipeline': define_add_pipeline,
        'test_notebook_dag': define_test_notebook_dag_pipeline,
    }
    if DAGSTER_PANDAS_PRESENT and SKLEARN_PRESENT and MATPLOTLIB_PRESENT:
        pipeline_dict['tutorial_pipeline'] = define_tutorial_pipeline

    return RepositoryDefinition(name='notebook_repo',
                                pipeline_dict=pipeline_dict)
def test_repo_definition():
    called = defaultdict(int)
    repo = RepositoryDefinition(
        name='some_repo',
        pipeline_dict={
            'foo': lambda: create_single_node_pipeline('foo', called),
            'bar': lambda: create_single_node_pipeline('bar', called),
        },
    )

    foo_pipeline = repo.get_pipeline('foo')
    assert isinstance(foo_pipeline, PipelineDefinition)
    assert foo_pipeline.name == 'foo'

    assert 'foo' in called
    assert called['foo'] == 1
    assert 'bar' not in called

    bar_pipeline = repo.get_pipeline('bar')
    assert isinstance(bar_pipeline, PipelineDefinition)
    assert bar_pipeline.name == 'bar'

    assert 'foo' in called
    assert called['foo'] == 1
    assert 'bar' in called
    assert called['bar'] == 1

    foo_pipeline = repo.get_pipeline('foo')
    assert isinstance(foo_pipeline, PipelineDefinition)
    assert foo_pipeline.name == 'foo'

    assert 'foo' in called
    assert called['foo'] == 1

    pipelines = repo.get_all_pipelines()

    assert set(['foo',
                'bar']) == set([pipeline.name for pipeline in pipelines])

    assert repo.get_solid_def('foo_solid').name == 'foo_solid'
    assert repo.get_solid_def('bar_solid').name == 'bar_solid'
Beispiel #11
0
def define_repository():
    return RepositoryDefinition(
        name='test',
        pipeline_dict={
            'more_complicated_config': define_more_complicated_config,
            'more_complicated_nested_config': define_more_complicated_nested_config,
            'csv_hello_world': define_csv_hello_world,
            'csv_hello_world_two': define_pipeline_two,
            'csv_hello_world_with_expectations': define_csv_hello_world_with_expectations,
            'pipeline_with_list': define_pipeline_with_list,
            'csv_hello_world_df_input': define_pipeline_with_csv_df_input,
            'no_config_pipeline': define_no_config_pipeline,
            'scalar_output_pipeline': define_scalar_output_pipeline,
            'pipeline_with_enum_config': define_pipeline_with_enum_config,
            'naughty_programmer_pipeline': define_naughty_programmer_pipeline,
            'secret_pipeline': define_pipeline_with_secret,
            'pipeline_with_step_metadata': define_pipeline_with_step_metadata,
            'pipeline_with_expectations': define_pipeline_with_expectation,
            'multi_mode_with_resources': define_multi_mode_with_resources_pipeline,
            'multi_mode_with_loggers': define_multi_mode_with_loggers_pipeline,
            'composites_pipeline': define_composites_pipeline,
        },
    )
Beispiel #12
0
def define_repository():
    return RepositoryDefinition(
        name='test',
        pipeline_defs=[
            composites_pipeline,
            csv_hello_world,
            csv_hello_world_df_input,
            csv_hello_world_two,
            csv_hello_world_with_expectations,
            materialization_pipeline,
            more_complicated_config,
            more_complicated_nested_config,
            multi_mode_with_loggers,
            multi_mode_with_resources,
            naughty_programmer_pipeline,
            no_config_pipeline,
            pipeline_with_enum_config,
            pipeline_with_expectations,
            pipeline_with_list,
            pipeline_with_step_metadata,
            scalar_output_pipeline,
            secret_pipeline,
        ],
    )
Beispiel #13
0
def asset_repo():
    @solid
    def solid_a(_):
        yield Materialization(asset_key='a', label='a')
        yield Output(1)

    @solid
    def solid_b(_, num):
        yield Materialization(asset_key='b', label='b')
        time.sleep(0.1)
        yield Materialization(asset_key='c', label='c')
        yield Output(num)

    @pipeline
    def single_asset_pipeline():
        solid_a()

    @pipeline
    def multi_asset_pipeline():
        solid_b(solid_a())

    return RepositoryDefinition(
        name='asset_repo',
        pipeline_defs=[single_asset_pipeline, multi_asset_pipeline])
Beispiel #14
0
def test_repository_snap_all_props():
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    repo = RepositoryDefinition(name='noop_repo',
                                pipeline_defs=[noop_pipeline])
    external_repo_data = external_repository_data_from_def(repo)

    assert external_repo_data.name == 'noop_repo'
    assert len(external_repo_data.external_pipeline_datas) == 1
    assert isinstance(external_repo_data.external_pipeline_datas[0],
                      ExternalPipelineData)

    pipeline_snapshot = external_repo_data.external_pipeline_datas[
        0].pipeline_snapshot
    assert isinstance(pipeline_snapshot, PipelineSnapshot)
    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description is None
    assert pipeline_snapshot.tags == {}
Beispiel #15
0
def define_repository():
    return RepositoryDefinition(
        name='test',
        pipeline_defs=[
            composites_pipeline,
            csv_hello_world,
            csv_hello_world_df_input,
            csv_hello_world_two,
            csv_hello_world_with_expectations,
            hello_world_with_tags,
            eventually_successful,
            infinite_loop_pipeline,
            materialization_pipeline,
            more_complicated_config,
            more_complicated_nested_config,
            multi_mode_with_loggers,
            multi_mode_with_resources,
            naughty_programmer_pipeline,
            noop_pipeline,
            pipeline_with_invalid_definition_error,
            no_config_pipeline,
            no_config_chain_pipeline,
            pipeline_with_enum_config,
            pipeline_with_expectations,
            pipeline_with_list,
            required_resource_pipeline,
            retry_resource_pipeline,
            retry_multi_output_pipeline,
            scalar_output_pipeline,
            spew_pipeline,
            tagged_pipeline,
            retry_multi_input_early_terminate_pipeline,
        ],
        schedule_defs=define_schedules(),
        partition_set_defs=define_partitions(),
    )
Beispiel #16
0
def test_active_repository_data(snapshot):
    rep_def = RepositoryDefinition(name='repo', pipeline_defs=[a_pipeline])
    snapshot.assert_match(serialize_pp(active_repository_data_from_def(rep_def)))
Beispiel #17
0
def define_repo():
    return RepositoryDefinition(name='partitioning-tutorial',
                                pipeline_defs=[compute_total_stock_volume])
Beispiel #18
0
def cereal_repository():
    return RepositoryDefinition(
        'hello_cereal_repository',
        pipeline_defs=[hello_cereal_pipeline],
        schedule_defs=cereal_schedules(),
    )
Beispiel #19
0
def define_repository():
    return RepositoryDefinition('emr_pyspark', pipeline_defs=[my_pipeline])
Beispiel #20
0
def define_repo():
    return RepositoryDefinition(name='experimental_repository',
                                pipeline_defs=[dash_stats])
Beispiel #21
0
def define_error_pipeline_repo():
    return RepositoryDefinition(name='error_pipeline',
                                pipeline_dict={'bad': define_bad_pipeline})
Beispiel #22
0
def test_empty_repo():
    return RepositoryDefinition(name='empty_repository', pipeline_defs=[])
Beispiel #23
0
def define_repository():
    return RepositoryDefinition(name='test',
                                pipeline_dict={'math': define_csv_hello_world})
Beispiel #24
0
def define_repository():
    return RepositoryDefinition(name='test', pipeline_defs=[math])
def define_demo_execution_repo():
    return RepositoryDefinition(name='demo_execution_repo',
                                pipeline_defs=[demo_execution_pipeline])
Beispiel #26
0
def lakehouse_test_repo():
    return RepositoryDefinition(name='lakehouse_test_repo',
                                pipeline_defs=[typed_lakehouse_pipeline])
Beispiel #27
0
def define_repo():
    return RepositoryDefinition(name='gcp_data_platform',
                                pipeline_dict={'gcp_pipeline': gcp_pipeline})
Beispiel #28
0
def define_test_repository():
    return RepositoryDefinition(
        name='test',
        pipeline_dict={'pipeline': define_circular_dependency_pipeline})
Beispiel #29
0
def cereal_repository():
    return RepositoryDefinition('hello_cereal_repository',
                                pipeline_defs=[hello_cereal_pipeline])
Beispiel #30
0
def define_bar_repo():
    return RepositoryDefinition('bar', {'foo': define_foo_pipeline, 'baz': lambda: baz_pipeline})