def get_pipeline_index(self): if self._cached_pipeline_index is None: from dagster.core.snap.pipeline_snapshot import PipelineIndex, PipelineSnapshot self._cached_pipeline_index = PipelineIndex(PipelineSnapshot.from_pipeline_def(self)) return self._cached_pipeline_index
def test_deserialize_solid_def_snaps_default_field(): @solid( config={ 'foo': Field(str, is_required=False, default_value='hello'), 'bar': Field(str) }) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) assert isinstance(recevied_config_type, Shape) assert isinstance(recevied_config_type.fields['foo'].config_type, String) assert isinstance(recevied_config_type.fields['bar'].config_type, String) assert not recevied_config_type.fields['foo'].is_required assert recevied_config_type.fields['foo'].default_value == 'hello' _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_diamond_task_dag(snapshot): dag = DAG( dag_id='diamond_task_dag', default_args=default_args, schedule_interval=None, ) dummy_operator_1 = DummyOperator( task_id='dummy_operator_1', dag=dag, ) dummy_operator_2 = DummyOperator( task_id='dummy_operator_2', dag=dag, ) dummy_operator_3 = DummyOperator( task_id='dummy_operator_3', dag=dag, ) dummy_operator_4 = DummyOperator( task_id='dummy_operator_4', dag=dag, ) dummy_operator_1 >> dummy_operator_2 dummy_operator_1 >> dummy_operator_3 dummy_operator_2 >> dummy_operator_4 dummy_operator_3 >> dummy_operator_4 snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag)).dep_structure_snapshot))
def get_pipeline_snapshot(self): if self._cached_pipeline_snapshot is None: from dagster.core.snap.pipeline_snapshot import PipelineSnapshot self._cached_pipeline_snapshot = PipelineSnapshot.from_pipeline_def( self) return self._cached_pipeline_snapshot
def from_repository_definition(repository_definition): check.inst_param(repository_definition, 'repository_definition', RepositoryDefinition) return RepositorySnapshot( name=repository_definition.name, pipeline_snapshots=[ PipelineSnapshot.from_pipeline_def(pipeline_definition) for pipeline_definition in repository_definition.get_all_pipelines() ], )
def test_empty_pipeline_snap_props(snapshot): pipeline_snapshot = PipelineSnapshot.from_pipeline_def(get_noop_pipeline()) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description is None assert pipeline_snapshot.tags == {} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def get_pipeline_snapshot_from_cli_args(cli_args): _cli_load_invariant(cli_args.get('pipeline_name') is not None) if cli_args.get('image'): _cli_load_invariant( is_module_available('docker'), msg='--image is not supported without dagster[docker] or the Python package docker installed.', ) repository_snapshot = get_container_snapshot(cli_args.get('image')) return repository_snapshot.get_pipeline_snapshot(cli_args.get('pipeline_name')[0]) else: pipeline_definition = handle_for_pipeline_cli_args(cli_args).build_pipeline_definition() return PipelineSnapshot.from_pipeline_def(pipeline_definition)
def test_deserialize_solid_def_snaps_noneable(): @solid(config=Field(Noneable(str))) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) assert isinstance(recevied_config_type, Noneable) assert isinstance(recevied_config_type.inner_type, String)
def test_one_task_dag(snapshot): dag = DAG( dag_id='one_task_dag', default_args=default_args, schedule_interval=None, ) dummy_operator = DummyOperator( task_id='dummy_operator', dag=dag, ) snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag)).dep_structure_snapshot))
def test_deserialize_solid_def_snaps_permissive(): @solid(config=Field(Permissive({'foo': Field(str)}))) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) assert isinstance(recevied_config_type, Permissive) assert isinstance(recevied_config_type.fields['foo'].config_type, String) _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_deserialize_solid_def_snaps_array(): @solid(config=Field([str])) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) assert isinstance(recevied_config_type, Array) assert isinstance(recevied_config_type.inner_type, String) _array_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_pipeline_snap_all_props(snapshot): @solid def noop_solid(_): pass @pipeline(description='desc', tags={'key': 'value'}) def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description == 'desc' assert pipeline_snapshot.tags == {'key': 'value'} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_basic_dep_fan_out(snapshot): @solid def return_one(_): return 1 @solid(input_defs=[InputDefinition('value', int)]) def passthrough(_, value): return value @pipeline def single_dep_pipeline(): return_one_result = return_one() passthrough.alias('passone')(return_one_result) passthrough.alias('passtwo')(return_one_result) dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids( single_dep_pipeline) index = DependencyStructureIndex(dep_structure_snapshot) assert index.get_invocation('return_one') assert index.get_invocation('passone') assert index.get_invocation('passtwo') assert index.get_upstream_output('passone', 'value') == OutputHandleSnap( 'return_one', 'result') assert index.get_upstream_output('passtwo', 'value') == OutputHandleSnap( 'return_one', 'result') assert set(index.get_downstream_inputs('return_one', 'result')) == set([ InputHandle('passthrough', 'passone', 'value'), InputHandle('passthrough', 'passtwo', 'value'), ]) assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(dep_structure_snapshot)) == dep_structure_snapshot) pipeline_snapshot = PipelineSnapshot.from_pipeline_def(single_dep_pipeline) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_mode_snap(snapshot): @resource(config={'foo': str}) def a_resource(_): pass @resource(description='resource_description') def no_config_resource(_): pass @logger(config={'bar': str}) def a_logger(_): pass @logger(description='logger_description') def no_config_logger(_): pass @pipeline(mode_defs=[ ModeDefinition( name='a_mode', description='a_desc', resource_defs={ 'some_resource': a_resource, 'no_config_resource': no_config_resource, }, logger_defs={ 'some_logger': a_logger, 'no_config_logger': no_config_logger }, ) ]) def a_pipeline(): pass pipeline_snapshot = PipelineSnapshot.from_pipeline_def(a_pipeline) assert len(pipeline_snapshot.mode_def_snaps) == 1 mode_def_snap = pipeline_snapshot.mode_def_snaps[0] snapshot.assert_match(serialize_dagster_namedtuple(mode_def_snap)) assert mode_def_snap == deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(mode_def_snap))
def test_multi_type_config_nested_dicts(nested_dict_types, snapshot): D1, D2, D3 = nested_dict_types @solid(config=D1({'foo': D2({'bar': D3({'baz': Field(int)})})})) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) snapshot.assert_match( serialize_pp(snap_from_config_type(recevied_config_type))) _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_two_invocations_deps_snap(snapshot): @solid def noop_solid(_): pass @pipeline def two_solid_pipeline(): noop_solid.alias('one')() noop_solid.alias('two')() index = DependencyStructureIndex( build_dep_structure_snapshot_from_icontains_solids(two_solid_pipeline)) assert index.get_invocation('one') assert index.get_invocation('two') pipeline_snapshot = PipelineSnapshot.from_pipeline_def(two_solid_pipeline) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_deserialize_solid_def_snaps_enum(): @solid(config=Field( Enum('CowboyType', [EnumValue('good'), EnumValue('bad'), EnumValue('ugly')]))) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('noop_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) assert isinstance(recevied_config_type, Enum) assert recevied_config_type.given_name == 'CowboyType' assert all(enum_value.config_value in ('good', 'bad', 'ugly') for enum_value in recevied_config_type.enum_values)
def test_deserialize_solid_def_snaps_multi_type_config(snapshot): @solid(config=Field( Permissive({ 'foo': Field(Array(float)), 'bar': Selector({ 'baz': Field(Noneable(int)), 'qux': { 'quux': Field(str), 'corge': Field( Enum( 'RGB', [ EnumValue('red'), EnumValue('green'), EnumValue('blue') ], )), }, }), }))) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) snapshot.assert_match( serialize_pp(snap_from_config_type(recevied_config_type))) _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_multi_type_config_array_dict_fields(dict_config_type, snapshot): @solid(config=Array( dict_config_type({ 'foo': Field(int), 'bar': Field(str) }))) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) snapshot.assert_match( serialize_pp(snap_from_config_type(recevied_config_type))) _array_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_basic_fan_in(snapshot): @solid(output_defs=[OutputDefinition(Nothing)]) def return_nothing(_): return None @solid(input_defs=[InputDefinition('nothing', Nothing)]) def take_nothings(_): return None @pipeline def fan_in_test(): take_nothings([ return_nothing.alias('nothing_one')(), return_nothing.alias('nothing_two')() ]) dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids( fan_in_test) index = DependencyStructureIndex(dep_structure_snapshot) assert index.get_invocation('nothing_one') assert index.get_invocation('take_nothings') assert index.get_upstream_outputs('take_nothings', 'nothing') == [ OutputHandleSnap('nothing_one', 'result'), OutputHandleSnap('nothing_two', 'result'), ] assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(dep_structure_snapshot)) == dep_structure_snapshot) pipeline_snapshot = PipelineSnapshot.from_pipeline_def(fan_in_test) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_complex_dag(snapshot): dag = DAG(dag_id="complex_dag", default_args=default_args, schedule_interval=None) # Create create_entry_group = DummyOperator( task_id="create_entry_group", dag=dag, ) create_entry_group_result = DummyOperator( task_id="create_entry_group_result", dag=dag, ) create_entry_group_result2 = DummyOperator( task_id="create_entry_group_result2", dag=dag, ) create_entry_gcs = DummyOperator( task_id="create_entry_gcs", dag=dag, ) create_entry_gcs_result = DummyOperator( task_id="create_entry_gcs_result", dag=dag, ) create_entry_gcs_result2 = DummyOperator( task_id="create_entry_gcs_result2", dag=dag, ) create_tag = DummyOperator( task_id="create_tag", dag=dag, ) create_tag_result = DummyOperator( task_id="create_tag_result", dag=dag, ) create_tag_result2 = DummyOperator( task_id="create_tag_result2", dag=dag, ) create_tag_template = DummyOperator( task_id="create_tag_template", dag=dag, ) create_tag_template_result = DummyOperator( task_id="create_tag_template_result", dag=dag, ) create_tag_template_result2 = DummyOperator( task_id="create_tag_template_result2", dag=dag, ) create_tag_template_field = DummyOperator( task_id="create_tag_template_field", dag=dag, ) create_tag_template_field_result = DummyOperator( task_id="create_tag_template_field_result", dag=dag, ) create_tag_template_field_result2 = DummyOperator( task_id="create_tag_template_field_result", dag=dag, ) # Delete delete_entry = DummyOperator( task_id="delete_entry", dag=dag, ) create_entry_gcs >> delete_entry delete_entry_group = DummyOperator( task_id="delete_entry_group", dag=dag, ) create_entry_group >> delete_entry_group delete_tag = DummyOperator( task_id="delete_tag", dag=dag, ) create_tag >> delete_tag delete_tag_template_field = DummyOperator( task_id="delete_tag_template_field", dag=dag, ) delete_tag_template = DummyOperator( task_id="delete_tag_template", dag=dag, ) # Get get_entry_group = DummyOperator( task_id="get_entry_group", dag=dag, ) get_entry_group_result = DummyOperator( task_id="get_entry_group_result", dag=dag, ) get_entry = DummyOperator( task_id="get_entry", dag=dag, ) get_entry_result = DummyOperator( task_id="get_entry_result", dag=dag, ) get_tag_template = DummyOperator( task_id="get_tag_template", dag=dag, ) get_tag_template_result = DummyOperator( task_id="get_tag_template_result", dag=dag, ) # List list_tags = DummyOperator( task_id="list_tags", dag=dag, ) list_tags_result = DummyOperator( task_id="list_tags_result", dag=dag, ) # Lookup lookup_entry = DummyOperator( task_id="lookup_entry", dag=dag, ) lookup_entry_result = DummyOperator( task_id="lookup_entry_result", dag=dag, ) # Rename rename_tag_template_field = DummyOperator( task_id="rename_tag_template_field", dag=dag, ) # Search search_catalog = DummyOperator( task_id="search_catalog", dag=dag, ) search_catalog_result = DummyOperator( task_id="search_catalog_result", dag=dag, ) # Update update_entry = DummyOperator( task_id="update_entry", dag=dag, ) update_tag = DummyOperator( task_id="update_tag", dag=dag, ) update_tag_template = DummyOperator( task_id="update_tag_template", dag=dag, ) update_tag_template_field = DummyOperator( task_id="update_tag_template_field", dag=dag, ) # Create create_tasks = [ create_entry_group, create_entry_gcs, create_tag_template, create_tag_template_field, create_tag, ] chain(*create_tasks) create_entry_group >> delete_entry_group create_entry_group >> create_entry_group_result create_entry_group >> create_entry_group_result2 create_entry_gcs >> delete_entry create_entry_gcs >> create_entry_gcs_result create_entry_gcs >> create_entry_gcs_result2 create_tag_template >> delete_tag_template_field create_tag_template >> create_tag_template_result create_tag_template >> create_tag_template_result2 create_tag_template_field >> delete_tag_template_field create_tag_template_field >> create_tag_template_field_result create_tag_template_field >> create_tag_template_field_result2 create_tag >> delete_tag create_tag >> create_tag_result create_tag >> create_tag_result2 # Delete delete_tasks = [ delete_tag, delete_tag_template_field, delete_tag_template, delete_entry_group, delete_entry, ] chain(*delete_tasks) # Get create_tag_template >> get_tag_template >> delete_tag_template get_tag_template >> get_tag_template_result create_entry_gcs >> get_entry >> delete_entry get_entry >> get_entry_result create_entry_group >> get_entry_group >> delete_entry_group get_entry_group >> get_entry_group_result # List create_tag >> list_tags >> delete_tag list_tags >> list_tags_result # Lookup create_entry_gcs >> lookup_entry >> delete_entry lookup_entry >> lookup_entry_result # Rename create_tag_template_field >> rename_tag_template_field >> delete_tag_template_field # Search chain(create_tasks, search_catalog, delete_tasks) search_catalog >> search_catalog_result # Update create_entry_gcs >> update_entry >> delete_entry create_tag >> update_tag >> delete_tag create_tag_template >> update_tag_template >> delete_tag_template create_tag_template_field >> update_tag_template_field >> rename_tag_template_field snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag)).dep_structure_snapshot))
def test_empty_pipeline_snap_snapshot(snapshot): snapshot.assert_match( serialize_pp(PipelineSnapshot.from_pipeline_def(get_noop_pipeline())))