Example #1
0
def _traverse_airflow_dag(task, seen_tasks, pipeline_dependencies, solid_defs):
    check.inst_param(task, 'task', BaseOperator)
    check.list_param(seen_tasks, 'seen_tasks', BaseOperator)
    check.list_param(solid_defs, 'solid_defs', SolidDefinition)

    seen_tasks.append(task)
    current_solid = make_dagster_solid_from_airflow_task(task)
    solid_defs.append(current_solid)

    if len(task.upstream_list) > 0:
        # To enforce predictable iteration order
        task_upstream_list = sorted(task.upstream_list, key=lambda x: x.task_id)

        pipeline_dependencies[current_solid.name] = {
            'airflow_task_ready': MultiDependencyDefinition(
                [
                    DependencyDefinition(
                        solid='airflow_' + task_upstream.task_id, output='airflow_task_complete'
                    )
                    for task_upstream in task_upstream_list
                ]
            )
        }

    # To enforce predictable iteration order
    task_downstream_list = sorted(task.downstream_list, key=lambda x: x.task_id)
    for child_task in task_downstream_list:
        if child_task not in seen_tasks:
            _traverse_airflow_dag(child_task, seen_tasks, pipeline_dependencies, solid_defs)
def test_interleaved_values():
    @solid(inputs=[InputDefinition('stuff', List(Any))])
    def collect(_context, stuff):
        assert set(stuff) == set([1, None, 'one'])
        return stuff

    @lambda_solid
    def emit_num():
        return 1

    @lambda_solid
    def emit_none():
        pass

    @lambda_solid
    def emit_str():
        return 'one'

    result = execute_pipeline(
        PipelineDefinition(
            name='input_test',
            solids=[emit_num, emit_none, emit_str, collect],
            dependencies={
                'collect': {
                    'stuff':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_num'),
                        DependencyDefinition('emit_none'),
                        DependencyDefinition('emit_str'),
                    ])
                }
            },
        ))
    assert result.success
Example #3
0
def define_many_events_pipeline():
    return PipelineDefinition(
        name='many_events',
        solids=[
            many_table_materializations,
            many_materializations_and_passing_expectations,
            check_users_and_groups_one_fails_one_succeeds,
            check_admins_both_succeed,
        ]
        + create_raw_file_solids(),
        dependencies=merge_dicts(
            {'many_table_materializations': {}},
            {
                'many_table_materializations': {
                    'start': MultiDependencyDefinition(
                        [DependencyDefinition(raw_file) for raw_file in raw_files]
                    )
                },
                'many_materializations_and_passing_expectations': {
                    'start': DependencyDefinition('many_table_materializations')
                },
                'check_users_and_groups_one_fails_one_succeeds': {
                    'start': DependencyDefinition('many_materializations_and_passing_expectations')
                },
                'check_admins_both_succeed': {
                    'start': DependencyDefinition('many_materializations_and_passing_expectations')
                },
            },
        ),
        mode_definitions=[ModeDefinition()],
    )
def test_fanin_deps():
    called = defaultdict(int)

    @lambda_solid
    def emit_two():
        return 2

    @lambda_solid(output=OutputDefinition(Nothing))
    def emit_nothing():
        called['emit_nothing'] += 1

    @solid(
        inputs=[
            InputDefinition('ready', Nothing),
            InputDefinition('num_1', Int),
            InputDefinition('num_2', Int),
        ]
    )
    def adder(_context, num_1, num_2):
        assert called['emit_nothing'] == 3
        called['adder'] += 1
        return num_1 + num_2

    pipeline = PipelineDefinition(
        name='input_test',
        solids=[emit_two, emit_nothing, adder],
        dependencies={
            SolidInstance('emit_two', 'emit_1'): {},
            SolidInstance('emit_two', 'emit_2'): {},
            SolidInstance('emit_nothing', '_one'): {},
            SolidInstance('emit_nothing', '_two'): {},
            SolidInstance('emit_nothing', '_three'): {},
            'adder': {
                'ready': MultiDependencyDefinition(
                    [
                        DependencyDefinition('_one'),
                        DependencyDefinition('_two'),
                        DependencyDefinition('_three'),
                    ]
                ),
                'num_1': DependencyDefinition('emit_1'),
                'num_2': DependencyDefinition('emit_2'),
            },
        },
    )
    result = execute_pipeline(pipeline)
    assert result.success
    assert called['adder'] == 1
    assert called['emit_nothing'] == 3
Example #5
0
def test_fanin_deps():
    called = defaultdict(int)

    @lambda_solid
    def emit_two():
        return 2

    @lambda_solid(output_def=OutputDefinition(Nothing))
    def emit_nothing():
        called["emit_nothing"] += 1

    @solid(
        input_defs=[
            InputDefinition("ready", Nothing),
            InputDefinition("num_1", Int),
            InputDefinition("num_2", Int),
        ]
    )
    def adder(_context, num_1, num_2):
        assert called["emit_nothing"] == 3
        called["adder"] += 1
        return num_1 + num_2

    pipeline = PipelineDefinition(
        name="input_test",
        solid_defs=[emit_two, emit_nothing, adder],
        dependencies={
            SolidInvocation("emit_two", "emit_1"): {},
            SolidInvocation("emit_two", "emit_2"): {},
            SolidInvocation("emit_nothing", "_one"): {},
            SolidInvocation("emit_nothing", "_two"): {},
            SolidInvocation("emit_nothing", "_three"): {},
            "adder": {
                "ready": MultiDependencyDefinition(
                    [
                        DependencyDefinition("_one"),
                        DependencyDefinition("_two"),
                        DependencyDefinition("_three"),
                    ]
                ),
                "num_1": DependencyDefinition("emit_1"),
                "num_2": DependencyDefinition("emit_2"),
            },
        },
    )
    result = execute_pipeline(pipeline)
    assert result.success
    assert called["adder"] == 1
    assert called["emit_nothing"] == 3
Example #6
0
def test_pipeline_subset_with_multi_dependency():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def return_two():
        return 2

    @lambda_solid(inputs=[InputDefinition('dep', Nothing)])
    def noop():
        return 3

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, return_two, noop],
        dependencies={
            'noop': {
                'dep':
                MultiDependencyDefinition([
                    DependencyDefinition('return_one'),
                    DependencyDefinition('return_two')
                ])
            }
        },
    )

    pipeline_result = execute_pipeline(pipeline_def)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('noop').result_value() == 3

    subset_result = execute_pipeline(pipeline_def.build_sub_pipeline(['noop']))

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 1
    assert pipeline_result.result_for_solid('noop').result_value() == 3

    events = execute_pipeline_iterator(
        pipeline_def.build_sub_pipeline(['noop']))

    for step_event in step_output_event_filter(events):
        assert step_event.is_step_success

    subset_result = execute_pipeline(
        pipeline_def.build_sub_pipeline(['return_one', 'return_two', 'noop']))

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 3
    assert pipeline_result.result_for_solid('noop').result_value() == 3
Example #7
0
def test_interleaved_values():
    result = execute_pipeline(
        PipelineDefinition(
            name="input_test",
            solid_defs=[emit_num, emit_none, emit_str, collect],
            dependencies={
                "collect": {
                    "stuff":
                    MultiDependencyDefinition([
                        DependencyDefinition("emit_num"),
                        DependencyDefinition("emit_none"),
                        DependencyDefinition("emit_str"),
                    ])
                }
            },
        ))
    assert result.success
Example #8
0
def test_interleaved_values():
    result = execute_pipeline(
        PipelineDefinition(
            name='input_test',
            solid_defs=[emit_num, emit_none, emit_str, collect],
            dependencies={
                'collect': {
                    'stuff':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_num'),
                        DependencyDefinition('emit_none'),
                        DependencyDefinition('emit_str'),
                    ])
                }
            },
        ))
    assert result.success
Example #9
0
def test_pipeline_subset_with_multi_dependency():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def return_two():
        return 2

    @lambda_solid(input_defs=[InputDefinition("dep", Nothing)])
    def noop():
        return 3

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, return_two, noop],
        name="test",
        dependencies={
            "noop": {
                "dep":
                MultiDependencyDefinition([
                    DependencyDefinition("return_one"),
                    DependencyDefinition("return_two")
                ])
            }
        },
    )

    pipeline_result = execute_pipeline(pipeline_def)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid("noop").output_value() == 3

    subset_result = execute_pipeline(
        pipeline_def.get_pipeline_subset_def({"noop"}))

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 1
    assert pipeline_result.result_for_solid("noop").output_value() == 3

    subset_result = execute_pipeline(
        pipeline_def.get_pipeline_subset_def(
            {"return_one", "return_two", "noop"}))

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 3
    assert pipeline_result.result_for_solid("noop").output_value() == 3
def test_pipeline_subset_with_multi_dependency():
    @lambda_solid
    def return_one():
        return 1

    @lambda_solid
    def return_two():
        return 2

    @lambda_solid(input_defs=[InputDefinition('dep', Nothing)])
    def noop():
        return 3

    pipeline_def = PipelineDefinition(
        solid_defs=[return_one, return_two, noop],
        dependencies={
            'noop': {
                'dep':
                MultiDependencyDefinition([
                    DependencyDefinition('return_one'),
                    DependencyDefinition('return_two')
                ])
            }
        },
    )

    pipeline_result = execute_pipeline(pipeline_def)
    assert pipeline_result.success
    assert pipeline_result.result_for_solid('noop').output_value() == 3

    subset_result = execute_pipeline(
        pipeline_def.subset_for_execution(['noop']))

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 1
    assert pipeline_result.result_for_solid('noop').output_value() == 3

    subset_result = execute_pipeline(
        pipeline_def.subset_for_execution(['return_one', 'return_two',
                                           'noop']))

    assert subset_result.success
    assert len(subset_result.solid_result_list) == 3
    assert pipeline_result.result_for_solid('noop').output_value() == 3
Example #11
0
def _traverse_airflow_dag(
    task, seen_tasks, pipeline_dependencies, solid_defs, use_airflow_template_context, unique_id
):
    check.inst_param(task, "task", BaseOperator)
    check.list_param(seen_tasks, "seen_tasks", BaseOperator)
    check.list_param(solid_defs, "solid_defs", SolidDefinition)
    check.bool_param(use_airflow_template_context, "use_airflow_template_context")
    unique_id = check.opt_int_param(unique_id, "unique_id")

    seen_tasks.append(task)
    current_solid = make_dagster_solid_from_airflow_task(
        task, use_airflow_template_context, unique_id
    )
    solid_defs.append(current_solid)

    if len(task.upstream_list) > 0:
        # To enforce predictable iteration order
        task_upstream_list = sorted(task.upstream_list, key=lambda x: x.task_id)

        pipeline_dependencies[current_solid.name] = {
            "airflow_task_ready": MultiDependencyDefinition(
                [
                    DependencyDefinition(
                        solid=normalized_name(task_upstream.task_id, unique_id),
                        output="airflow_task_complete",
                    )
                    for task_upstream in task_upstream_list
                ]
            )
        }

    # To enforce predictable iteration order
    task_downstream_list = sorted(task.downstream_list, key=lambda x: x.task_id)
    for child_task in task_downstream_list:
        if child_task not in seen_tasks:
            _traverse_airflow_dag(
                child_task,
                seen_tasks,
                pipeline_dependencies,
                solid_defs,
                use_airflow_template_context,
                unique_id,
            )
Example #12
0
def test_nothing_deps():

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r'Input "stuff" expects a value of type \[Any\] and output '
            '"result" returns type Nothing',
    ):
        PipelineDefinition(
            name="input_test",
            solid_defs=[emit_num, emit_nothing, emit_str, collect],
            dependencies={
                "collect": {
                    "stuff":
                    MultiDependencyDefinition([
                        DependencyDefinition("emit_num"),
                        DependencyDefinition("emit_nothing"),
                        DependencyDefinition("emit_str"),
                    ])
                }
            },
        )
Example #13
0
def test_simple_values():
    @solid(input_defs=[InputDefinition("numbers", List[Int])])
    def sum_num(_context, numbers):
        # cant guarantee order
        assert set(numbers) == set([1, 2, 3])
        return sum(numbers)

    @lambda_solid
    def emit_1():
        return 1

    @lambda_solid
    def emit_2():
        return 2

    @lambda_solid
    def emit_3():
        return 3

    result = execute_pipeline(
        PipelineDefinition(
            name="input_test",
            solid_defs=[emit_1, emit_2, emit_3, sum_num],
            dependencies={
                "sum_num": {
                    "numbers": MultiDependencyDefinition(
                        [
                            DependencyDefinition("emit_1"),
                            DependencyDefinition("emit_2"),
                            DependencyDefinition("emit_3"),
                        ]
                    )
                }
            },
        )
    )
    assert result.success
    assert result.result_for_solid("sum_num").output_value() == 6
def test_nothing_deps():
    @solid(inputs=[InputDefinition('stuff', List(Any))])
    def collect(_context, stuff):
        return stuff

    @lambda_solid(output=OutputDefinition(Int))
    def emit_num():
        return 1

    @lambda_solid(output=OutputDefinition(Nothing))
    def emit_nothing():
        pass

    @lambda_solid(output=OutputDefinition(String))
    def emit_str():
        return 'one'

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=r'Input "stuff" expects a value of type \[Any\] and output '
            '"result" returns type Nothing',
    ):
        PipelineDefinition(
            name='input_test',
            solids=[emit_num, emit_nothing, emit_str, collect],
            dependencies={
                'collect': {
                    'stuff':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_num'),
                        DependencyDefinition('emit_nothing'),
                        DependencyDefinition('emit_str'),
                    ])
                }
            },
        )
def test_simple_values():
    @solid(inputs=[InputDefinition('numbers', List(Int))])
    def sum_num(_context, numbers):
        # cant guarantee order
        assert set(numbers) == set([1, 2, 3])
        return sum(numbers)

    @lambda_solid
    def emit_1():
        return 1

    @lambda_solid
    def emit_2():
        return 2

    @lambda_solid
    def emit_3():
        return 3

    result = execute_pipeline(
        PipelineDefinition(
            name='input_test',
            solids=[emit_1, emit_2, emit_3, sum_num],
            dependencies={
                'sum_num': {
                    'numbers':
                    MultiDependencyDefinition([
                        DependencyDefinition('emit_1'),
                        DependencyDefinition('emit_2'),
                        DependencyDefinition('emit_3'),
                    ])
                }
            },
        ))
    assert result.success
    assert result.result_for_solid('sum_num').transformed_value() == 6
Example #16
0
def test_fan_in_manual():
    # manually building up this guy
    @composite_solid
    def _target_composite_dsl(str_in, none_in):
        num = emit_num()
        return collect([num, str_in, none_in])

    # base case works
    _target_composite_manual = CompositeSolidDefinition(
        name="manual_composite",
        solid_defs=[emit_num, collect],
        input_mappings=[
            InputDefinition("str_in").mapping_to("collect", "stuff", 1),
            InputDefinition("none_in").mapping_to("collect", "stuff", 2),
        ],
        output_mappings=[OutputDefinition().mapping_from("collect")],
        dependencies={
            "collect": {
                "stuff":
                MultiDependencyDefinition([
                    DependencyDefinition("emit_num"),
                    MappedInputPlaceholder,
                    MappedInputPlaceholder,
                ])
            }
        },
    )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match=
            "index 2 in the MultiDependencyDefinition is not a MappedInputPlaceholder",
    ):
        _missing_placeholder = CompositeSolidDefinition(
            name="manual_composite",
            solid_defs=[emit_num, collect],
            input_mappings=[
                InputDefinition("str_in").mapping_to("collect", "stuff", 1),
                InputDefinition("none_in").mapping_to("collect", "stuff", 2),
            ],
            output_mappings=[OutputDefinition().mapping_from("collect")],
            dependencies={
                "collect": {
                    "stuff":
                    MultiDependencyDefinition([
                        DependencyDefinition("emit_num"),
                        MappedInputPlaceholder,
                    ])
                }
            },
        )

    with pytest.raises(DagsterInvalidDefinitionError,
                       match="is not a MultiDependencyDefinition"):
        _bad_target = CompositeSolidDefinition(
            name="manual_composite",
            solid_defs=[emit_num, collect],
            input_mappings=[
                InputDefinition("str_in").mapping_to("collect", "stuff", 1),
                InputDefinition("none_in").mapping_to("collect", "stuff", 2),
            ],
            output_mappings=[OutputDefinition().mapping_from("collect")],
            dependencies={
                "collect": {
                    "stuff": DependencyDefinition("emit_num")
                }
            },
        )

    with pytest.raises(
            DagsterInvalidDefinitionError,
            match="Unsatisfied MappedInputPlaceholder at index 3",
    ):
        _missing_placeholder = CompositeSolidDefinition(
            name="manual_composite",
            solid_defs=[emit_num, collect],
            input_mappings=[
                InputDefinition("str_in").mapping_to("collect", "stuff", 1),
                InputDefinition("none_in").mapping_to("collect", "stuff", 2),
            ],
            output_mappings=[OutputDefinition().mapping_from("collect")],
            dependencies={
                "collect": {
                    "stuff":
                    MultiDependencyDefinition([
                        DependencyDefinition("emit_num"),
                        MappedInputPlaceholder,
                        MappedInputPlaceholder,
                        MappedInputPlaceholder,
                    ])
                }
            },
        )