def test_single_write_read_with_snapshot(self, storage): run_with_snapshot_id = "lkasjdflkjasdf" pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) run_with_snapshot = PipelineRun( run_id=run_with_snapshot_id, pipeline_name=pipeline_def.name, pipeline_snapshot_id=pipeline_snapshot_id, ) assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert storage.add_pipeline_snapshot( pipeline_snapshot) == pipeline_snapshot_id assert serialize_pp(storage.get_pipeline_snapshot( pipeline_snapshot_id)) == serialize_pp(pipeline_snapshot) storage.add_run(run_with_snapshot) assert storage.get_run_by_id(run_with_snapshot_id) == run_with_snapshot storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_run(run_with_snapshot_id)
def test_add_get_snapshot(self, storage): pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) pipeline_snapshot = pipeline_def.get_pipeline_snapshot() pipeline_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) assert storage.add_pipeline_snapshot(pipeline_snapshot) == pipeline_snapshot_id fetched_pipeline_snapshot = storage.get_pipeline_snapshot(pipeline_snapshot_id) assert fetched_pipeline_snapshot assert serialize_pp(fetched_pipeline_snapshot) == serialize_pp(pipeline_snapshot) assert storage.has_pipeline_snapshot(pipeline_snapshot_id) assert not storage.has_pipeline_snapshot("nope") storage.wipe() assert not storage.has_pipeline_snapshot(pipeline_snapshot_id)
def test_diamond_task_dag(snapshot): dag = DAG( dag_id='diamond_task_dag', default_args=default_args, schedule_interval=None, ) dummy_operator_1 = DummyOperator( task_id='dummy_operator_1', dag=dag, ) dummy_operator_2 = DummyOperator( task_id='dummy_operator_2', dag=dag, ) dummy_operator_3 = DummyOperator( task_id='dummy_operator_3', dag=dag, ) dummy_operator_4 = DummyOperator( task_id='dummy_operator_4', dag=dag, ) dummy_operator_1 >> dummy_operator_2 dummy_operator_1 >> dummy_operator_3 dummy_operator_2 >> dummy_operator_4 dummy_operator_3 >> dummy_operator_4 snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag)).dep_structure_snapshot))
def test_multi_leaf_dag(snapshot): dag = DAG( dag_id="multi_leaf_dag", default_args=default_args, schedule_interval=None, ) dummy_operator_1 = DummyOperator( task_id="dummy_operator_1", dag=dag, ) dummy_operator_2 = DummyOperator( task_id="dummy_operator_2", dag=dag, ) dummy_operator_3 = DummyOperator( task_id="dummy_operator_3", dag=dag, ) dummy_operator_4 = DummyOperator( task_id="dummy_operator_4", dag=dag, ) dummy_operator_1 >> dummy_operator_2 dummy_operator_1 >> dummy_operator_3 dummy_operator_1 >> dummy_operator_4 snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag=dag)).dep_structure_snapshot))
def test_all_snapshot_ids(snapshot): # This ensures that pipeline snapshots remain stable # If you 1) change any pipelines in dagster_graphql_test or 2) change the # schema of PipelineSnapshots you are free to rerecord with instance_for_test(): with get_main_external_repo() as repo: for pipeline in sorted(repo.get_all_external_pipelines(), key=lambda p: p.name): snapshot.assert_match(serialize_pp(pipeline.pipeline_snapshot)) snapshot.assert_match(pipeline.computed_pipeline_snapshot_id)
def test_add_get_execution_snapshot(self, storage): from dagster.core.execution.api import create_execution_plan from dagster.core.snap import snapshot_from_execution_plan pipeline_def = PipelineDefinition(name="some_pipeline", solid_defs=[]) execution_plan = create_execution_plan(pipeline_def) ep_snapshot = snapshot_from_execution_plan( execution_plan, pipeline_def.get_pipeline_snapshot_id()) snapshot_id = storage.add_execution_plan_snapshot(ep_snapshot) fetched_ep_snapshot = storage.get_execution_plan_snapshot(snapshot_id) assert fetched_ep_snapshot assert serialize_pp(fetched_ep_snapshot) == serialize_pp(ep_snapshot) assert storage.has_execution_plan_snapshot(snapshot_id) assert not storage.has_execution_plan_snapshot("nope") storage.wipe() assert not storage.has_execution_plan_snapshot(snapshot_id)
def test_external_repository_data(snapshot): @repository def repo(): return [a_pipeline, a_schedule] external_repo_data = external_repository_data_from_def(repo) assert external_repo_data.get_external_pipeline_data('a_pipeline') assert external_repo_data.get_external_schedule_data('a_schedule') assert external_repo_data.get_external_partition_set_data( 'a_schedule_partitions') snapshot.assert_match(serialize_pp(external_repo_data))
def test_empty_pipeline_snap_props(snapshot): pipeline_snapshot = PipelineSnapshot.from_pipeline_def(get_noop_pipeline()) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description is None assert pipeline_snapshot.tags == {} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_multi_type_config_array_dict_fields(dict_config_type, snapshot): @solid(config=Array(dict_config_type({'foo': Field(int), 'bar': Field(str)}))) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(solid_def_snap) snapshot.assert_match(serialize_pp(snap_from_config_type(recevied_config_type))) _array_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key )
def test_one_task_dag(snapshot): dag = DAG( dag_id='one_task_dag', default_args=default_args, schedule_interval=None, ) dummy_operator = DummyOperator( task_id='dummy_operator', dag=dag, ) snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag)).dep_structure_snapshot))
def test_multi_type_config_array_map(snapshot): @solid(config_schema=Array(Map(str, int))) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_node_def_snap("fancy_solid") recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(solid_def_snap) snapshot.assert_match(serialize_pp(snap_from_config_type(recevied_config_type))) _array_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key )
def test_create_noop_execution_plan_with_tags(snapshot): @solid(tags={'foo': 'bar', 'bar': 'baaz'}) def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_create_noop_execution_plan(snapshot): @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_multi_type_config_nested_dicts(nested_dict_types, snapshot): D1, D2, D3 = nested_dict_types @solid(config=D1({'foo': D2({'bar': D3({'baz': Field(int)})})})) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap(solid_def_snap) snapshot.assert_match(serialize_pp(snap_from_config_type(recevied_config_type))) _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key )
def test_basic_dep_fan_out(snapshot): @solid def return_one(_): return 1 @solid(input_defs=[InputDefinition("value", int)]) def passthrough(_, value): return value @pipeline def single_dep_pipeline(): return_one_result = return_one() passthrough.alias("passone")(return_one_result) passthrough.alias("passtwo")(return_one_result) dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids( single_dep_pipeline.graph ) index = DependencyStructureIndex(dep_structure_snapshot) assert index.get_invocation("return_one") assert index.get_invocation("passone") assert index.get_invocation("passtwo") assert index.get_upstream_output("passone", "value") == OutputHandleSnap("return_one", "result") assert index.get_upstream_output("passtwo", "value") == OutputHandleSnap("return_one", "result") assert set(index.get_downstream_inputs("return_one", "result")) == set( [ InputHandle("passthrough", "passone", "value"), InputHandle("passthrough", "passtwo", "value"), ] ) assert ( deserialize_json_to_dagster_namedtuple(serialize_dagster_namedtuple(dep_structure_snapshot)) == dep_structure_snapshot ) pipeline_snapshot = PipelineSnapshot.from_pipeline_def(single_dep_pipeline) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_pipeline_snap_all_props(snapshot): @solid def noop_solid(_): pass @pipeline(description='desc', tags={'key': 'value'}) def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) assert pipeline_snapshot.name == 'noop_pipeline' assert pipeline_snapshot.description == 'desc' assert pipeline_snapshot.tags == {'key': 'value'} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_pipeline_snap_all_props(snapshot): @solid def noop_solid(_): pass @pipeline(description="desc", tags={"key": "value"}) def noop_pipeline(): noop_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) assert pipeline_snapshot.name == "noop_pipeline" assert pipeline_snapshot.description == "desc" assert pipeline_snapshot.tags == {"key": "value"} assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_two_invocations_deps_snap(snapshot): @solid def noop_solid(_): pass @pipeline def two_solid_pipeline(): noop_solid.alias('one')() noop_solid.alias('two')() index = DependencyStructureIndex( build_dep_structure_snapshot_from_icontains_solids(two_solid_pipeline)) assert index.get_invocation('one') assert index.get_invocation('two') pipeline_snapshot = PipelineSnapshot.from_pipeline_def(two_solid_pipeline) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_create_execution_plan_with_dep(snapshot): @solid def solid_one(_): return 1 @solid def solid_two(_, num): return num + 1 @pipeline def noop_pipeline(): solid_two(solid_one()) execution_plan = create_execution_plan(noop_pipeline) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( noop_pipeline.get_pipeline_snapshot()))))
def test_deserialize_solid_def_snaps_multi_type_config(snapshot): @solid(config_schema=Field( Permissive({ "foo": Field(Array(float)), "bar": Selector({ "baz": Field(Noneable(int)), "qux": { "quux": Field(str), "corge": Field( Enum( "RGB", [ EnumValue("red"), EnumValue("green"), EnumValue("blue") ], )), }, }), }))) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap("fancy_solid") recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) snapshot.assert_match( serialize_pp(snap_from_config_type(recevied_config_type))) _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_deserialize_solid_def_snaps_multi_type_config(snapshot): @solid(config=Field( Permissive({ 'foo': Field(Array(float)), 'bar': Selector({ 'baz': Field(Noneable(int)), 'qux': { 'quux': Field(str), 'corge': Field( Enum( 'RGB', [ EnumValue('red'), EnumValue('green'), EnumValue('blue') ], )), }, }), }))) def fancy_solid(_): pass @pipeline def noop_pipeline(): fancy_solid() pipeline_snapshot = PipelineSnapshot.from_pipeline_def(noop_pipeline) solid_def_snap = pipeline_snapshot.get_solid_def_snap('fancy_solid') recevied_config_type = pipeline_snapshot.get_config_type_from_solid_def_snap( solid_def_snap) snapshot.assert_match( serialize_pp(snap_from_config_type(recevied_config_type))) _map_has_stable_hashes( recevied_config_type, pipeline_snapshot.config_schema_snapshot.all_config_snaps_by_key)
def test_create_with_composite(snapshot): @solid(output_defs=[OutputDefinition(name='out_num', dagster_type=int)]) def return_one(_): return 1 @solid( input_defs=[InputDefinition(name='num', dagster_type=int)], output_defs=[OutputDefinition(int)], ) def add_one(_, num): return num + 1 @composite_solid( output_defs=[OutputDefinition(name='named_output', dagster_type=int)]) def comp_1(): return add_one(return_one()) @composite_solid( output_defs=[OutputDefinition(name='named_output', dagster_type=int)]) def comp_2(): return add_one(return_one()) @solid def add(_, num_one, num_two): return num_one + num_two @pipeline def do_comps(): add(num_one=comp_1(), num_two=comp_2()) execution_plan = create_execution_plan(do_comps) snapshot.assert_match( serialize_pp( snapshot_from_execution_plan( execution_plan, create_pipeline_snapshot_id( do_comps.get_pipeline_snapshot()))))
def test_basic_fan_in(snapshot): @solid(output_defs=[OutputDefinition(Nothing)]) def return_nothing(_): return None @solid(input_defs=[InputDefinition('nothing', Nothing)]) def take_nothings(_): return None @pipeline def fan_in_test(): take_nothings([ return_nothing.alias('nothing_one')(), return_nothing.alias('nothing_two')() ]) dep_structure_snapshot = build_dep_structure_snapshot_from_icontains_solids( fan_in_test) index = DependencyStructureIndex(dep_structure_snapshot) assert index.get_invocation('nothing_one') assert index.get_invocation('take_nothings') assert index.get_upstream_outputs('take_nothings', 'nothing') == [ OutputHandleSnap('nothing_one', 'result'), OutputHandleSnap('nothing_two', 'result'), ] assert (deserialize_json_to_dagster_namedtuple( serialize_dagster_namedtuple(dep_structure_snapshot)) == dep_structure_snapshot) pipeline_snapshot = PipelineSnapshot.from_pipeline_def(fan_in_test) assert pipeline_snapshot == serialize_rt(pipeline_snapshot) snapshot.assert_match(serialize_pp(pipeline_snapshot)) snapshot.assert_match(create_pipeline_snapshot_id(pipeline_snapshot))
def test_external_pipeline_data(snapshot): snapshot.assert_match( serialize_pp(external_pipeline_data_from_def(a_pipeline)))
def test_active_pipeline_data(snapshot): snapshot.assert_match(serialize_pp(active_pipeline_data_from_def(a_pipeline)))
def test_active_repository_data(snapshot): rep_def = RepositoryDefinition(name='repo', pipeline_defs=[a_pipeline]) snapshot.assert_match(serialize_pp(active_repository_data_from_def(rep_def)))
def test_historical_config_type_snap(snapshot): old_snap_json = """{"__class__": "ConfigTypeSnap", "description": "", "enum_values": [], "fields": [], "given_name": "kjdkfjdkfjdkj", "key": "ksjdkfjdkfjd", "kind": {"__enum__": "ConfigTypeKind.STRICT_SHAPE"}, "type_param_keys": []}""" old_snap = deserialize_json_to_dagster_namedtuple(old_snap_json) snapshot.assert_match(serialize_pp(old_snap))
def test_complex_dag(snapshot): dag = DAG(dag_id="complex_dag", default_args=default_args, schedule_interval=None) # Create create_entry_group = DummyOperator( task_id="create_entry_group", dag=dag, ) create_entry_group_result = DummyOperator( task_id="create_entry_group_result", dag=dag, ) create_entry_group_result2 = DummyOperator( task_id="create_entry_group_result2", dag=dag, ) create_entry_gcs = DummyOperator( task_id="create_entry_gcs", dag=dag, ) create_entry_gcs_result = DummyOperator( task_id="create_entry_gcs_result", dag=dag, ) create_entry_gcs_result2 = DummyOperator( task_id="create_entry_gcs_result2", dag=dag, ) create_tag = DummyOperator( task_id="create_tag", dag=dag, ) create_tag_result = DummyOperator( task_id="create_tag_result", dag=dag, ) create_tag_result2 = DummyOperator( task_id="create_tag_result2", dag=dag, ) create_tag_template = DummyOperator( task_id="create_tag_template", dag=dag, ) create_tag_template_result = DummyOperator( task_id="create_tag_template_result", dag=dag, ) create_tag_template_result2 = DummyOperator( task_id="create_tag_template_result2", dag=dag, ) create_tag_template_field = DummyOperator( task_id="create_tag_template_field", dag=dag, ) create_tag_template_field_result = DummyOperator( task_id="create_tag_template_field_result", dag=dag, ) create_tag_template_field_result2 = DummyOperator( task_id="create_tag_template_field_result", dag=dag, ) # Delete delete_entry = DummyOperator( task_id="delete_entry", dag=dag, ) create_entry_gcs >> delete_entry delete_entry_group = DummyOperator( task_id="delete_entry_group", dag=dag, ) create_entry_group >> delete_entry_group delete_tag = DummyOperator( task_id="delete_tag", dag=dag, ) create_tag >> delete_tag delete_tag_template_field = DummyOperator( task_id="delete_tag_template_field", dag=dag, ) delete_tag_template = DummyOperator( task_id="delete_tag_template", dag=dag, ) # Get get_entry_group = DummyOperator( task_id="get_entry_group", dag=dag, ) get_entry_group_result = DummyOperator( task_id="get_entry_group_result", dag=dag, ) get_entry = DummyOperator( task_id="get_entry", dag=dag, ) get_entry_result = DummyOperator( task_id="get_entry_result", dag=dag, ) get_tag_template = DummyOperator( task_id="get_tag_template", dag=dag, ) get_tag_template_result = DummyOperator( task_id="get_tag_template_result", dag=dag, ) # List list_tags = DummyOperator( task_id="list_tags", dag=dag, ) list_tags_result = DummyOperator( task_id="list_tags_result", dag=dag, ) # Lookup lookup_entry = DummyOperator( task_id="lookup_entry", dag=dag, ) lookup_entry_result = DummyOperator( task_id="lookup_entry_result", dag=dag, ) # Rename rename_tag_template_field = DummyOperator( task_id="rename_tag_template_field", dag=dag, ) # Search search_catalog = DummyOperator( task_id="search_catalog", dag=dag, ) search_catalog_result = DummyOperator( task_id="search_catalog_result", dag=dag, ) # Update update_entry = DummyOperator( task_id="update_entry", dag=dag, ) update_tag = DummyOperator( task_id="update_tag", dag=dag, ) update_tag_template = DummyOperator( task_id="update_tag_template", dag=dag, ) update_tag_template_field = DummyOperator( task_id="update_tag_template_field", dag=dag, ) # Create create_tasks = [ create_entry_group, create_entry_gcs, create_tag_template, create_tag_template_field, create_tag, ] chain(*create_tasks) create_entry_group >> delete_entry_group create_entry_group >> create_entry_group_result create_entry_group >> create_entry_group_result2 create_entry_gcs >> delete_entry create_entry_gcs >> create_entry_gcs_result create_entry_gcs >> create_entry_gcs_result2 create_tag_template >> delete_tag_template_field create_tag_template >> create_tag_template_result create_tag_template >> create_tag_template_result2 create_tag_template_field >> delete_tag_template_field create_tag_template_field >> create_tag_template_field_result create_tag_template_field >> create_tag_template_field_result2 create_tag >> delete_tag create_tag >> create_tag_result create_tag >> create_tag_result2 # Delete delete_tasks = [ delete_tag, delete_tag_template_field, delete_tag_template, delete_entry_group, delete_entry, ] chain(*delete_tasks) # Get create_tag_template >> get_tag_template >> delete_tag_template get_tag_template >> get_tag_template_result create_entry_gcs >> get_entry >> delete_entry get_entry >> get_entry_result create_entry_group >> get_entry_group >> delete_entry_group get_entry_group >> get_entry_group_result # List create_tag >> list_tags >> delete_tag list_tags >> list_tags_result # Lookup create_entry_gcs >> lookup_entry >> delete_entry lookup_entry >> lookup_entry_result # Rename create_tag_template_field >> rename_tag_template_field >> delete_tag_template_field # Search chain(create_tasks, search_catalog, delete_tasks) search_catalog >> search_catalog_result # Update create_entry_gcs >> update_entry >> delete_entry create_tag >> update_tag >> delete_tag create_tag_template >> update_tag_template >> delete_tag_template create_tag_template_field >> update_tag_template_field >> rename_tag_template_field snapshot.assert_match( serialize_pp( PipelineSnapshot.from_pipeline_def( make_dagster_pipeline_from_airflow_dag( dag)).dep_structure_snapshot))
def test_empty_pipeline_snap_snapshot(snapshot): snapshot.assert_match( serialize_pp(PipelineSnapshot.from_pipeline_def(get_noop_pipeline())))