Ejemplo n.º 1
0
    def get_pipeline_index(self):
        if self._cached_pipeline_index is None:
            from dagster.core.snap.pipeline_snapshot import PipelineIndex, PipelineSnapshot

            self._cached_pipeline_index = PipelineIndex(PipelineSnapshot.from_pipeline_def(self))

        return self._cached_pipeline_index
Ejemplo n.º 2
0
def test_deserialize_solid_def_snaps_default_field():
    @solid(
        config={
            'foo': Field(str, is_required=False, default_value='hello'),
            'bar': Field(str)
        })
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    assert isinstance(recevied_config_type, Shape)
    assert isinstance(recevied_config_type.fields['foo'].config_type, String)
    assert isinstance(recevied_config_type.fields['bar'].config_type, String)
    assert not recevied_config_type.fields['foo'].is_required
    assert recevied_config_type.fields['foo'].default_value == 'hello'
    _map_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Ejemplo n.º 3
0
def test_diamond_task_dag(snapshot):
    dag = DAG(
        dag_id='diamond_task_dag',
        default_args=default_args,
        schedule_interval=None,
    )
    dummy_operator_1 = DummyOperator(
        task_id='dummy_operator_1',
        dag=dag,
    )
    dummy_operator_2 = DummyOperator(
        task_id='dummy_operator_2',
        dag=dag,
    )
    dummy_operator_3 = DummyOperator(
        task_id='dummy_operator_3',
        dag=dag,
    )
    dummy_operator_4 = DummyOperator(
        task_id='dummy_operator_4',
        dag=dag,
    )
    dummy_operator_1 >> dummy_operator_2
    dummy_operator_1 >> dummy_operator_3
    dummy_operator_2 >> dummy_operator_4
    dummy_operator_3 >> dummy_operator_4

    snapshot.assert_match(
        serialize_pp(
            PipelineSnapshot.from_pipeline_def(
                make_dagster_pipeline_from_airflow_dag(
                    dag)).dep_structure_snapshot))
Ejemplo n.º 4
0
    def get_pipeline_snapshot(self):
        if self._cached_pipeline_snapshot is None:
            from dagster.core.snap.pipeline_snapshot import PipelineSnapshot

            self._cached_pipeline_snapshot = PipelineSnapshot.from_pipeline_def(
                self)

        return self._cached_pipeline_snapshot
Ejemplo n.º 5
0
 def from_repository_definition(repository_definition):
     check.inst_param(repository_definition, 'repository_definition',
                      RepositoryDefinition)
     return RepositorySnapshot(
         name=repository_definition.name,
         pipeline_snapshots=[
             PipelineSnapshot.from_pipeline_def(pipeline_definition)
             for pipeline_definition in
             repository_definition.get_all_pipelines()
         ],
     )
Ejemplo n.º 6
0
def test_empty_pipeline_snap_props(snapshot):

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(get_noop_pipeline())

    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description is None
    assert pipeline_snapshot.tags == {}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 7
0
def get_pipeline_snapshot_from_cli_args(cli_args):
    _cli_load_invariant(cli_args.get('pipeline_name') is not None)

    if cli_args.get('image'):
        _cli_load_invariant(
            is_module_available('docker'),
            msg='--image is not supported without dagster[docker] or the Python package docker installed.',
        )
        repository_snapshot = get_container_snapshot(cli_args.get('image'))
        return repository_snapshot.get_pipeline_snapshot(cli_args.get('pipeline_name')[0])
    else:
        pipeline_definition = handle_for_pipeline_cli_args(cli_args).build_pipeline_definition()
        return PipelineSnapshot.from_pipeline_def(pipeline_definition)
Ejemplo n.º 8
0
def test_deserialize_solid_def_snaps_noneable():
    @solid(config=Field(Noneable(str)))
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    assert isinstance(recevied_config_type, Noneable)
    assert isinstance(recevied_config_type.inner_type, String)
Ejemplo n.º 9
0
def test_one_task_dag(snapshot):
    dag = DAG(
        dag_id='one_task_dag',
        default_args=default_args,
        schedule_interval=None,
    )
    dummy_operator = DummyOperator(
        task_id='dummy_operator',
        dag=dag,
    )

    snapshot.assert_match(
        serialize_pp(
            PipelineSnapshot.from_pipeline_def(
                make_dagster_pipeline_from_airflow_dag(
                    dag)).dep_structure_snapshot))
Ejemplo n.º 10
0
def test_deserialize_solid_def_snaps_permissive():
    @solid(config=Field(Permissive({'foo': Field(str)})))
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    assert isinstance(recevied_config_type, Permissive)
    assert isinstance(recevied_config_type.fields['foo'].config_type, String)
    _map_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Ejemplo n.º 11
0
def test_deserialize_solid_def_snaps_array():
    @solid(config=Field([str]))
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    assert isinstance(recevied_config_type, Array)
    assert isinstance(recevied_config_type.inner_type, String)
    _array_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Ejemplo n.º 12
0
def test_pipeline_snap_all_props(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline(description='desc', tags={'key': 'value'})
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)

    assert pipeline_snapshot.name == 'noop_pipeline'
    assert pipeline_snapshot.description == 'desc'
    assert pipeline_snapshot.tags == {'key': 'value'}

    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 13
0
def test_basic_dep_fan_out(snapshot):
    @solid
    def return_one(_):
        return 1

    @solid(input_defs=[InputDefinition('value', int)])
    def passthrough(_, value):
        return value

    @pipeline
    def single_dep_pipeline():
        return_one_result = return_one()
        passthrough.alias('passone')(return_one_result)
        passthrough.alias('passtwo')(return_one_result)

    dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids(
        single_dep_pipeline)
    index = DependencyStructureIndex(dep_structure_snapshot)

    assert index.get_invocation('return_one')
    assert index.get_invocation('passone')
    assert index.get_invocation('passtwo')

    assert index.get_upstream_output('passone', 'value') == OutputHandleSnap(
        'return_one', 'result')
    assert index.get_upstream_output('passtwo', 'value') == OutputHandleSnap(
        'return_one', 'result')

    assert set(index.get_downstream_inputs('return_one', 'result')) == set([
        InputHandle('passthrough', 'passone', 'value'),
        InputHandle('passthrough', 'passtwo', 'value'),
    ])

    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(dep_structure_snapshot)) ==
            dep_structure_snapshot)

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(single_dep_pipeline)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 14
0
def test_mode_snap(snapshot):
    @resource(config={'foo': str})
    def a_resource(_):
        pass

    @resource(description='resource_description')
    def no_config_resource(_):
        pass

    @logger(config={'bar': str})
    def a_logger(_):
        pass

    @logger(description='logger_description')
    def no_config_logger(_):
        pass

    @pipeline(mode_defs=[
        ModeDefinition(
            name='a_mode',
            description='a_desc',
            resource_defs={
                'some_resource': a_resource,
                'no_config_resource': no_config_resource,
            },
            logger_defs={
                'some_logger': a_logger,
                'no_config_logger': no_config_logger
            },
        )
    ])
    def a_pipeline():
        pass

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(a_pipeline)
    assert len(pipeline_snapshot.mode_def_snaps) == 1
    mode_def_snap = pipeline_snapshot.mode_def_snaps[0]

    snapshot.assert_match(serialize_dagster_namedtuple(mode_def_snap))

    assert mode_def_snap == deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(mode_def_snap))
Ejemplo n.º 15
0
def test_multi_type_config_nested_dicts(nested_dict_types, snapshot):
    D1, D2, D3 = nested_dict_types

    @solid(config=D1({'foo': D2({'bar': D3({'baz': Field(int)})})}))
    def fancy_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        fancy_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    snapshot.assert_match(
        serialize_pp(snap_from_config_type(recevied_config_type)))
    _map_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Ejemplo n.º 16
0
def test_two_invocations_deps_snap(snapshot):
    @solid
    def noop_solid(_):
        pass

    @pipeline
    def two_solid_pipeline():
        noop_solid.alias('one')()
        noop_solid.alias('two')()

    index = DependencyStructureIndex(
        build_dep_structure_snapshot_from_icontains_solids(two_solid_pipeline))
    assert index.get_invocation('one')
    assert index.get_invocation('two')

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(two_solid_pipeline)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 17
0
def test_deserialize_solid_def_snaps_enum():
    @solid(config=Field(
        Enum('CowboyType',
             [EnumValue('good'),
              EnumValue('bad'),
              EnumValue('ugly')])))
    def noop_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        noop_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    assert isinstance(recevied_config_type, Enum)
    assert recevied_config_type.given_name == 'CowboyType'
    assert all(enum_value.config_value in ('good', 'bad', 'ugly')
               for enum_value in recevied_config_type.enum_values)
Ejemplo n.º 18
0
def test_deserialize_solid_def_snaps_multi_type_config(snapshot):
    @solid(config=Field(
        Permissive({
            'foo':
            Field(Array(float)),
            'bar':
            Selector({
                'baz': Field(Noneable(int)),
                'qux': {
                    'quux':
                    Field(str),
                    'corge':
                    Field(
                        Enum(
                            'RGB',
                            [
                                EnumValue('red'),
                                EnumValue('green'),
                                EnumValue('blue')
                            ],
                        )),
                },
            }),
        })))
    def fancy_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        fancy_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    snapshot.assert_match(
        serialize_pp(snap_from_config_type(recevied_config_type)))
    _map_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Ejemplo n.º 19
0
def test_multi_type_config_array_dict_fields(dict_config_type, snapshot):
    @solid(config=Array(
        dict_config_type({
            'foo': Field(int),
            'bar': Field(str)
        })))
    def fancy_solid(_):
        pass

    @pipeline
    def noop_pipeline():
        fancy_solid()

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline)
    solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid')
    recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(
        solid_def_snap)
    snapshot.assert_match(
        serialize_pp(snap_from_config_type(recevied_config_type)))
    _array_has_stable_hashes(
        recevied_config_type,
        pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
Ejemplo n.º 20
0
def test_basic_fan_in(snapshot):
    @solid(output_defs=[OutputDefinition(Nothing)])
    def return_nothing(_):
        return None

    @solid(input_defs=[InputDefinition('nothing', Nothing)])
    def take_nothings(_):
        return None

    @pipeline
    def fan_in_test():
        take_nothings([
            return_nothing.alias('nothing_one')(),
            return_nothing.alias('nothing_two')()
        ])

    dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids(
        fan_in_test)
    index = DependencyStructureIndex(dep_structure_snapshot)

    assert index.get_invocation('nothing_one')
    assert index.get_invocation('take_nothings')

    assert index.get_upstream_outputs('take_nothings', 'nothing') == [
        OutputHandleSnap('nothing_one', 'result'),
        OutputHandleSnap('nothing_two', 'result'),
    ]

    assert (deserialize_json_to_dagster_namedtuple(
        serialize_dagster_namedtuple(dep_structure_snapshot)) ==
            dep_structure_snapshot)

    pipeline_snapshot = PipelineSnapshot.from_pipeline_def(fan_in_test)
    assert pipeline_snapshot == serialize_rt(pipeline_snapshot)

    snapshot.assert_match(serialize_pp(pipeline_snapshot))
    snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
Ejemplo n.º 21
0
def test_complex_dag(snapshot):
    dag = DAG(dag_id="complex_dag",
              default_args=default_args,
              schedule_interval=None)

    # Create
    create_entry_group = DummyOperator(
        task_id="create_entry_group",
        dag=dag,
    )
    create_entry_group_result = DummyOperator(
        task_id="create_entry_group_result",
        dag=dag,
    )
    create_entry_group_result2 = DummyOperator(
        task_id="create_entry_group_result2",
        dag=dag,
    )
    create_entry_gcs = DummyOperator(
        task_id="create_entry_gcs",
        dag=dag,
    )
    create_entry_gcs_result = DummyOperator(
        task_id="create_entry_gcs_result",
        dag=dag,
    )
    create_entry_gcs_result2 = DummyOperator(
        task_id="create_entry_gcs_result2",
        dag=dag,
    )
    create_tag = DummyOperator(
        task_id="create_tag",
        dag=dag,
    )
    create_tag_result = DummyOperator(
        task_id="create_tag_result",
        dag=dag,
    )
    create_tag_result2 = DummyOperator(
        task_id="create_tag_result2",
        dag=dag,
    )
    create_tag_template = DummyOperator(
        task_id="create_tag_template",
        dag=dag,
    )
    create_tag_template_result = DummyOperator(
        task_id="create_tag_template_result",
        dag=dag,
    )
    create_tag_template_result2 = DummyOperator(
        task_id="create_tag_template_result2",
        dag=dag,
    )
    create_tag_template_field = DummyOperator(
        task_id="create_tag_template_field",
        dag=dag,
    )
    create_tag_template_field_result = DummyOperator(
        task_id="create_tag_template_field_result",
        dag=dag,
    )
    create_tag_template_field_result2 = DummyOperator(
        task_id="create_tag_template_field_result",
        dag=dag,
    )

    # Delete
    delete_entry = DummyOperator(
        task_id="delete_entry",
        dag=dag,
    )
    create_entry_gcs >> delete_entry
    delete_entry_group = DummyOperator(
        task_id="delete_entry_group",
        dag=dag,
    )
    create_entry_group >> delete_entry_group
    delete_tag = DummyOperator(
        task_id="delete_tag",
        dag=dag,
    )
    create_tag >> delete_tag
    delete_tag_template_field = DummyOperator(
        task_id="delete_tag_template_field",
        dag=dag,
    )
    delete_tag_template = DummyOperator(
        task_id="delete_tag_template",
        dag=dag,
    )

    # Get
    get_entry_group = DummyOperator(
        task_id="get_entry_group",
        dag=dag,
    )
    get_entry_group_result = DummyOperator(
        task_id="get_entry_group_result",
        dag=dag,
    )
    get_entry = DummyOperator(
        task_id="get_entry",
        dag=dag,
    )
    get_entry_result = DummyOperator(
        task_id="get_entry_result",
        dag=dag,
    )
    get_tag_template = DummyOperator(
        task_id="get_tag_template",
        dag=dag,
    )
    get_tag_template_result = DummyOperator(
        task_id="get_tag_template_result",
        dag=dag,
    )

    # List
    list_tags = DummyOperator(
        task_id="list_tags",
        dag=dag,
    )
    list_tags_result = DummyOperator(
        task_id="list_tags_result",
        dag=dag,
    )

    # Lookup
    lookup_entry = DummyOperator(
        task_id="lookup_entry",
        dag=dag,
    )
    lookup_entry_result = DummyOperator(
        task_id="lookup_entry_result",
        dag=dag,
    )

    # Rename
    rename_tag_template_field = DummyOperator(
        task_id="rename_tag_template_field",
        dag=dag,
    )

    # Search
    search_catalog = DummyOperator(
        task_id="search_catalog",
        dag=dag,
    )
    search_catalog_result = DummyOperator(
        task_id="search_catalog_result",
        dag=dag,
    )

    # Update
    update_entry = DummyOperator(
        task_id="update_entry",
        dag=dag,
    )
    update_tag = DummyOperator(
        task_id="update_tag",
        dag=dag,
    )
    update_tag_template = DummyOperator(
        task_id="update_tag_template",
        dag=dag,
    )
    update_tag_template_field = DummyOperator(
        task_id="update_tag_template_field",
        dag=dag,
    )

    # Create
    create_tasks = [
        create_entry_group,
        create_entry_gcs,
        create_tag_template,
        create_tag_template_field,
        create_tag,
    ]
    chain(*create_tasks)

    create_entry_group >> delete_entry_group
    create_entry_group >> create_entry_group_result
    create_entry_group >> create_entry_group_result2

    create_entry_gcs >> delete_entry
    create_entry_gcs >> create_entry_gcs_result
    create_entry_gcs >> create_entry_gcs_result2

    create_tag_template >> delete_tag_template_field
    create_tag_template >> create_tag_template_result
    create_tag_template >> create_tag_template_result2

    create_tag_template_field >> delete_tag_template_field
    create_tag_template_field >> create_tag_template_field_result
    create_tag_template_field >> create_tag_template_field_result2

    create_tag >> delete_tag
    create_tag >> create_tag_result
    create_tag >> create_tag_result2

    # Delete
    delete_tasks = [
        delete_tag,
        delete_tag_template_field,
        delete_tag_template,
        delete_entry_group,
        delete_entry,
    ]
    chain(*delete_tasks)

    # Get
    create_tag_template >> get_tag_template >> delete_tag_template
    get_tag_template >> get_tag_template_result

    create_entry_gcs >> get_entry >> delete_entry
    get_entry >> get_entry_result

    create_entry_group >> get_entry_group >> delete_entry_group
    get_entry_group >> get_entry_group_result

    # List
    create_tag >> list_tags >> delete_tag
    list_tags >> list_tags_result

    # Lookup
    create_entry_gcs >> lookup_entry >> delete_entry
    lookup_entry >> lookup_entry_result

    # Rename
    create_tag_template_field >> rename_tag_template_field >> delete_tag_template_field

    # Search
    chain(create_tasks, search_catalog, delete_tasks)
    search_catalog >> search_catalog_result

    # Update
    create_entry_gcs >> update_entry >> delete_entry
    create_tag >> update_tag >> delete_tag
    create_tag_template >> update_tag_template >> delete_tag_template
    create_tag_template_field >> update_tag_template_field >> rename_tag_template_field

    snapshot.assert_match(
        serialize_pp(
            PipelineSnapshot.from_pipeline_def(
                make_dagster_pipeline_from_airflow_dag(
                    dag)).dep_structure_snapshot))
Ejemplo n.º 22
0
def test_empty_pipeline_snap_snapshot(snapshot):
    snapshot.assert_match(
        serialize_pp(PipelineSnapshot.from_pipeline_def(get_noop_pipeline())))