def test_0_10_0_schedule_wipe(): src_dir = file_relative_path(__file__, "snapshot_0_10_0_wipe_schedules/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "b22f16781a7c" assert "schedules" in get_sqlite3_tables(db_path) assert "schedule_ticks" in get_sqlite3_tables(db_path) assert "jobs" not in get_sqlite3_tables(db_path) assert "job_ticks" not in get_sqlite3_tables(db_path) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "schedules" not in get_sqlite3_tables(db_path) assert "schedule_ticks" not in get_sqlite3_tables(db_path) assert "jobs" in get_sqlite3_tables(db_path) assert "job_ticks" in get_sqlite3_tables(db_path) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as upgraded_instance: assert len(upgraded_instance.all_instigator_state()) == 0
def test_0_10_0_schedule_wipe(): src_dir = file_relative_path(__file__, "snapshot_0_10_0_wipe_schedules/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "b22f16781a7c" assert "schedules" in get_sqlite3_tables(db_path) assert "schedule_ticks" in get_sqlite3_tables(db_path) assert "jobs" not in get_sqlite3_tables(db_path) assert "job_ticks" not in get_sqlite3_tables(db_path) with pytest.raises(DagsterInstanceMigrationRequired): with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance.optimize_for_dagit(statement_timeout=500) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "schedules" not in get_sqlite3_tables(db_path) assert "schedule_ticks" not in get_sqlite3_tables(db_path) assert "jobs" in get_sqlite3_tables(db_path) assert "job_ticks" in get_sqlite3_tables(db_path) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as upgraded_instance: assert len(upgraded_instance.all_stored_job_state()) == 0
def test_backcompat_get_asset_records(): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") # should contain materialization events for asset keys a, b, c, d, e, f # events a and b have been wiped, but b has been rematerialized def _validate_materialization(asset_key, event, expected_tags): assert isinstance(event, EventLogEntry) assert event.dagster_event assert event.dagster_event.is_step_materialization assert event.dagster_event.step_materialization_data.materialization.asset_key == asset_key assert event.dagster_event.step_materialization_data.materialization.tags == expected_tags b = AssetKey("b") with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: storage = instance.event_log_storage records = storage.get_asset_records([b]) asset_entry = records[0].asset_entry assert asset_entry.asset_key == b _validate_materialization(b, asset_entry.last_materialization, expected_tags={})
def test_run_created_in_0_7_9_snapshot_id_change(): src_dir = file_relative_path( __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6' old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d" old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d" historical_pipeline = instance.get_historical_pipeline( old_pipeline_snapshot_id) pipeline_snapshot = historical_pipeline.pipeline_snapshot ep_snapshot = instance.get_execution_plan_snapshot( old_execution_plan_snapshot_id) # It is the pipeline snapshot that changed # Verify that snapshot ids are not equal. This changed in 0.7.10 created_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot) assert created_snapshot_id != old_pipeline_snapshot_id # verify that both are accessible off of the historical pipeline assert historical_pipeline.computed_pipeline_snapshot_id == created_snapshot_id assert historical_pipeline.identifying_pipeline_snapshot_id == old_pipeline_snapshot_id # We also changed execution plan schema in 0.7.11.post1 assert create_execution_plan_snapshot_id( ep_snapshot) != old_execution_plan_snapshot_id # This previously failed with a check error assert ExternalExecutionPlan(ep_snapshot)
def test_start_time_end_time(): src_dir = file_relative_path(__file__, "snapshot_0_13_12_pre_add_start_time_and_end_time") with copy_directory(src_dir) as test_dir: @job def _test(): pass db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5" assert "start_time" not in set(get_sqlite3_columns(db_path, "runs")) assert "end_time" not in set(get_sqlite3_columns(db_path, "runs")) # this migration was optional, so make sure things work before migrating instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert "start_time" not in set(get_sqlite3_columns(db_path, "runs")) assert "end_time" not in set(get_sqlite3_columns(db_path, "runs")) assert instance.get_run_records() assert instance.create_run_for_pipeline(_test) instance.upgrade() # Make sure the schema is migrated assert "start_time" in set(get_sqlite3_columns(db_path, "runs")) assert instance.get_run_records() assert instance.create_run_for_pipeline(_test) instance._run_storage._alembic_downgrade(rev="7f2b1a4ca7a5") assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5" assert True
def test_run_created_in_0_7_9_snapshot_id_change(): src_dir = file_relative_path( __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6' old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d" old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d" with pytest.warns( UserWarning, match=re.escape( '"input_hydration_schema_key" is deprecated and will be removed in 0.10.0, use ' '"loader_schema_key" instead.'), ): historical_pipeline = instance.get_historical_pipeline( old_pipeline_snapshot_id) pipeline_snapshot = historical_pipeline.pipeline_snapshot ep_snapshot = instance.get_execution_plan_snapshot( old_execution_plan_snapshot_id) # It is the pipeline snapshot that changed # Verify that snapshot ids are not equal. This changed in 0.7.10 assert create_pipeline_snapshot_id( pipeline_snapshot) != old_pipeline_snapshot_id # We also changed execution plan schema in 0.7.11.post1 assert create_execution_plan_snapshot_id( ep_snapshot) != old_execution_plan_snapshot_id # This previously failed with a check error assert ExternalExecutionPlan(ep_snapshot, historical_pipeline)
def test_asset_key_structure(): src_dir = file_relative_path(__file__, "compat_tests/snapshot_0_9_16_asset_key_structure") with copy_directory(src_dir) as test_dir: asset_storage = ConsolidatedSqliteEventLogStorage(test_dir) asset_keys = asset_storage.get_all_asset_keys() assert len(asset_keys) == 5 # get a structured asset key asset_key = AssetKey(["dashboards", "cost_dashboard"]) # check that backcompat events are read assert asset_storage.has_asset_key(asset_key) events = asset_storage.get_asset_events(asset_key) assert len(events) == 1 run_ids = asset_storage.get_asset_run_ids(asset_key) assert len(run_ids) == 1 # check that backcompat events are merged with newly stored events run_id = "fake_run_id" asset_storage.store_event(_materialization_event_record(run_id, asset_key)) assert asset_storage.has_asset_key(asset_key) events = asset_storage.get_asset_events(asset_key) assert len(events) == 2 run_ids = asset_storage.get_asset_run_ids(asset_key) assert len(run_ids) == 2
def test_event_log_asset_key_migration(): src_dir = file_relative_path( __file__, "snapshot_0_9_22_lazy_asset_index_migration/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref( InstanceRef.from_dir( test_dir, overrides={ "event_log_storage": { "module": "dagster.core.storage.event_log.sqlite.consolidated_sqlite_event_log", "class": "ConsolidatedSqliteEventLogStorage", "config": { "base_dir": os.path.join(test_dir, "history") }, } }, )) # ensure everything is upgraded instance.upgrade() assert isinstance(instance._event_storage, SqlEventLogStorage) assert not instance._event_storage.has_secondary_index( SECONDARY_INDEX_ASSET_KEY) old_keys = instance.all_asset_keys() assert instance._event_storage.has_secondary_index( SECONDARY_INDEX_ASSET_KEY) new_keys = instance.all_asset_keys() assert set(old_keys) == set(new_keys)
def test_asset_lazy_migration(): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") # should contain materialization events for asset keys a, b, c, d, e, f # events a and b have been wiped, but b has been rematerialized @op def materialize(): yield AssetMaterialization(AssetKey("a")) yield AssetMaterialization(AssetKey("b")) yield AssetMaterialization(AssetKey("c")) yield AssetMaterialization(AssetKey("d")) yield AssetMaterialization(AssetKey("e")) yield AssetMaterialization(AssetKey("f")) yield Output(None) @job def my_job(): materialize() with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: storage = instance.event_log_storage assert not storage.has_asset_key_index_cols() assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # run the schema migration without reindexing the asset keys storage.upgrade() assert storage.has_asset_key_index_cols() assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # fetch all asset keys instance.all_asset_keys() assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # wipe a, b in order to populate wipe_timestamp storage.wipe_asset(AssetKey("a")) storage.wipe_asset(AssetKey("b")) # materialize all the assets to populate materialization_timestamp my_job.execute_in_process(instance=instance) # still should not be migrated (on write) assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # fetching partial results should not trigger migration instance.get_asset_keys(prefix=["b"]) instance.get_asset_keys(cursor=str(AssetKey("b"))) instance.get_latest_materialization_events( asset_keys=[AssetKey("b")]) assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS) # on read, we should see that all the data has already been migrated and we can now mark # the asset key index as migrated instance.all_asset_keys() assert storage.has_secondary_index(ASSET_KEY_INDEX_COLS)
def test_snapshot_0_7_6_pre_add_pipeline_snapshot(): run_id = "fb0b3905-068b-4444-8f00-76fcbaef7e8b" src_dir = file_relative_path( __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite") with copy_directory(src_dir) as test_dir: # invariant check to make sure migration has not been run yet db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "9fe9e746268c" assert "snapshots" not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) @solid def noop_solid(_): pass @pipeline def noop_pipeline(): noop_solid() with pytest.raises( DagsterInstanceMigrationRequired, match=_run_storage_migration_regex( current_revision="9fe9e746268c"), ): execute_pipeline(noop_pipeline, instance=instance) assert len(instance.get_runs()) == 1 # Make sure the schema is migrated instance.upgrade() assert "snapshots" in get_sqlite3_tables(db_path) assert {"id", "snapshot_id", "snapshot_body", "snapshot_type" } == set(get_sqlite3_columns(db_path, "snapshots")) assert len(instance.get_runs()) == 1 run = instance.get_run_by_id(run_id) assert run.run_id == run_id assert run.pipeline_snapshot_id is None result = execute_pipeline(noop_pipeline, instance=instance) assert result.success runs = instance.get_runs() assert len(runs) == 2 new_run_id = result.run_id new_run = instance.get_run_by_id(new_run_id) assert new_run.pipeline_snapshot_id
def test_0_10_6_add_bulk_actions_table(): src_dir = file_relative_path(__file__, "snapshot_0_10_6_add_bulk_actions_table/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "0da417ae1b81" assert "bulk_actions" not in get_sqlite3_tables(db_path) with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "bulk_actions" in get_sqlite3_tables(db_path)
def test_backcompat_asset_read(): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") # should contain materialization events for asset keys a, b, c, d, e, f # events a and b have been wiped, but b has been rematerialized def _validate_instance_assets(instance): assert instance.all_asset_keys() == [ AssetKey("b"), AssetKey("c"), AssetKey("d"), AssetKey("e"), AssetKey("f"), ] assert instance.get_asset_keys() == [ AssetKey("b"), AssetKey("c"), AssetKey("d"), AssetKey("e"), AssetKey("f"), ] assert instance.get_asset_keys(prefix=["d"]) == [AssetKey("d")] assert instance.get_asset_keys(limit=3) == [ AssetKey("b"), AssetKey("c"), AssetKey("d"), ] assert instance.get_asset_keys(cursor='["b"]', limit=3) == [ AssetKey("c"), AssetKey("d"), AssetKey("e"), ] @op def materialize(): yield AssetMaterialization(AssetKey("e")) yield AssetMaterialization(AssetKey("f")) yield Output(None) @job def my_job(): materialize() with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: _validate_instance_assets(instance) my_job.execute_in_process(instance=instance) _validate_instance_assets(instance) instance.upgrade() _validate_instance_assets(instance) my_job.execute_in_process(instance=instance) _validate_instance_assets(instance) instance.reindex() _validate_instance_assets(instance) my_job.execute_in_process(instance=instance) _validate_instance_assets(instance)
def test_get_materialization_count_by_partition(asset_aware_context): src_dir = file_relative_path( __file__, "compat_tests/snapshot_0_11_0_asset_materialization") d = AssetKey("c") with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: storage = instance.event_log_storage materialization_count_by_key = storage.get_materialization_count_by_partition( [d]) assert materialization_count_by_key.get(d) == {} a = AssetKey("no_materializations_asset") b = AssetKey("no_partitions_asset") c = AssetKey("two_partitions_asset") @op def materialize(): yield AssetMaterialization(b) yield AssetMaterialization(c, partition="a") yield Output(None) @job def my_job(): materialize() @op def materialize_two(): yield AssetMaterialization(c, partition="a") yield AssetMaterialization(c, partition="b") yield Output(None) @job def job_two(): materialize_two() with asset_aware_context() as ctx: instance, event_log_storage = ctx my_job.execute_in_process(instance=instance) materialization_count_by_key = event_log_storage.get_materialization_count_by_partition( [a, b, c]) assert materialization_count_by_key.get(a) == {} assert materialization_count_by_key.get(b) == {} assert materialization_count_by_key.get(c)["a"] == 1 assert len(materialization_count_by_key.get(c)) == 1 job_two.execute_in_process(instance=instance) materialization_count_by_key = event_log_storage.get_materialization_count_by_partition( [a, b, c]) assert materialization_count_by_key.get(c)["a"] == 2 assert materialization_count_by_key.get(c)["b"] == 1
def test_0_11_0_add_asset_columns(): src_dir = file_relative_path(__file__, "snapshot_0_11_0_pre_asset_details/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "history", "runs", "index.db") assert get_current_alembic_version(db_path) == "0da417ae1b81" assert "last_materialization" not in set(get_sqlite3_columns(db_path, "asset_keys")) assert "last_run_id" not in set(get_sqlite3_columns(db_path, "asset_keys")) assert "asset_details" not in get_sqlite3_tables(db_path) with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "last_materialization" in set(get_sqlite3_columns(db_path, "asset_keys")) assert "last_run_id" in set(get_sqlite3_columns(db_path, "asset_keys")) assert "asset_details" in set(get_sqlite3_columns(db_path, "asset_keys"))
def test_schedule_secondary_index_table_backcompat(): src_dir = file_relative_path(__file__, "snapshot_0_14_6_schedule_migration_table/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "0da417ae1b81" assert "secondary_indexes" not in get_sqlite3_tables(db_path) with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "secondary_indexes" in get_sqlite3_tables(db_path)
def test_tick_selector_index_migration(): src_dir = file_relative_path(__file__, "snapshot_0_14_6_post_schema_pre_data_migration/sqlite") import sqlalchemy as db with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "c892b3fe0a9f" with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: assert "idx_tick_selector_timestamp" not in get_sqlite3_indexes(db_path, "job_ticks") instance.upgrade() assert "idx_tick_selector_timestamp" in get_sqlite3_indexes(db_path, "job_ticks")
def test_jobs_selector_id_migration(): src_dir = file_relative_path(__file__, "snapshot_0_14_6_post_schema_pre_data_migration/sqlite") import sqlalchemy as db from dagster.core.storage.schedules.migration import SCHEDULE_JOBS_SELECTOR_ID from dagster.core.storage.schedules.schema import InstigatorsTable, JobTable, JobTickTable with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "c892b3fe0a9f" with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: # runs the required data migrations instance.upgrade() assert instance.schedule_storage.has_built_index(SCHEDULE_JOBS_SELECTOR_ID) legacy_count = len(instance.all_instigator_state()) migrated_instigator_count = instance.schedule_storage.execute( db.select([db.func.count()]).select_from(InstigatorsTable) )[0][0] assert migrated_instigator_count == legacy_count migrated_job_count = instance.schedule_storage.execute( db.select([db.func.count()]) .select_from(JobTable) .where(JobTable.c.selector_id.isnot(None)) )[0][0] assert migrated_job_count == legacy_count legacy_tick_count = instance.schedule_storage.execute( db.select([db.func.count()]).select_from(JobTickTable) )[0][0] assert legacy_tick_count > 0 # tick migrations are optional migrated_tick_count = instance.schedule_storage.execute( db.select([db.func.count()]) .select_from(JobTickTable) .where(JobTickTable.c.selector_id.isnot(None)) )[0][0] assert migrated_tick_count == 0 # run the optional migrations instance.reindex() migrated_tick_count = instance.schedule_storage.execute( db.select([db.func.count()]) .select_from(JobTickTable) .where(JobTickTable.c.selector_id.isnot(None)) )[0][0] assert migrated_tick_count == legacy_tick_count
def test_event_log_asset_partition_migration(): src_dir = file_relative_path(__file__, "snapshot_0_9_22_pre_asset_partition/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join( test_dir, "history", "runs", "1a1d3c4b-1284-4c74-830c-c8988bd4d779.db" ) assert get_current_alembic_version(db_path) == "c34498c29964" assert "partition" not in set(get_sqlite3_columns(db_path, "event_logs")) # Make sure the schema is migrated instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.upgrade() assert "partition" in set(get_sqlite3_columns(db_path, "event_logs"))
def test_event_log_asset_key_migration(): src_dir = file_relative_path(__file__, "snapshot_0_7_8_pre_asset_key_migration/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join( test_dir, "history", "runs", "722183e4-119f-4a00-853f-e1257be82ddb.db" ) assert get_current_alembic_version(db_path) == "3b1e175a2be3" assert "asset_key" not in set(get_sqlite3_columns(db_path, "event_logs")) # Make sure the schema is migrated instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.upgrade() assert "asset_key" in set(get_sqlite3_columns(db_path, "event_logs"))
def test_0_6_4(): src_dir = file_relative_path(__file__, "snapshot_0_6_4") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) runs = instance.get_runs() with pytest.raises( DagsterInstanceMigrationRequired, match=_event_log_migration_regex( run_id="c7a6c4d7-6c88-46d0-8baa-d4937c3cefe5", current_revision=None), ): for run in runs: instance.all_logs(run.run_id)
def test_execution_plan_snapshot_backcompat(): src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/") snapshot_dirs = [ f for f in os.listdir(src_dir) if not os.path.isfile(os.path.join(src_dir, f)) ] for snapshot_dir_path in snapshot_dirs: print(f"Executing a saved run from {snapshot_dir_path}") # pylint: disable=print-call with copy_directory(os.path.join(src_dir, snapshot_dir_path)) as test_dir: with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: runs = instance.get_runs() assert len(runs) == 1 run = runs[0] assert run.status == PipelineRunStatus.NOT_STARTED the_pipeline = InMemoryPipeline(dynamic_pipeline) # First create a brand new plan from the pipeline and validate it new_plan = create_execution_plan(the_pipeline, run_config=run.run_config) _validate_execution_plan(new_plan) # Create a snapshot and rebuild it, validate the rebuilt plan new_plan_snapshot = snapshot_from_execution_plan( new_plan, run.pipeline_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", new_plan_snapshot) _validate_execution_plan(rebuilt_plan) # Then validate the plan built from the historical snapshot on the run stored_snapshot = instance.get_execution_plan_snapshot( run.execution_plan_snapshot_id) rebuilt_plan = ExecutionPlan.rebuild_from_snapshot( "dynamic_pipeline", stored_snapshot) _validate_execution_plan(rebuilt_plan) # Finally, execute the run (using the historical execution plan snapshot) result = execute_run(the_pipeline, run, instance, raise_on_error=True) assert result.success
def test_run_partition_data_migration(): src_dir = file_relative_path( __file__, "snapshot_0_9_22_post_schema_pre_data_partition/sqlite") with copy_directory(src_dir) as test_dir: from dagster.core.storage.runs.sql_run_storage import SqlRunStorage from dagster.core.storage.runs.migration import RUN_PARTITIONS # load db that has migrated schema, but not populated data for run partitions db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "375e95bad550" # Make sure the schema is migrated assert "partition" in set(get_sqlite3_columns(db_path, "runs")) assert "partition_set" in set(get_sqlite3_columns(db_path, "runs")) with DagsterInstance.from_ref( InstanceRef.from_dir(test_dir)) as instance: instance._run_storage.upgrade() run_storage = instance._run_storage assert isinstance(run_storage, SqlRunStorage) partition_set_name = "ingest_and_train" partition_name = "2020-01-02" # ensure old tag-based reads are working assert not run_storage.has_built_index(RUN_PARTITIONS) assert len( run_storage._get_partition_runs(partition_set_name, partition_name)) == 2 # turn on reads for the partition column, without migrating the data run_storage.mark_index_built(RUN_PARTITIONS) # ensure that no runs are returned because the data has not been migrated assert run_storage.has_built_index(RUN_PARTITIONS) assert len( run_storage._get_partition_runs(partition_set_name, partition_name)) == 0 # actually migrate the data run_storage.build_missing_indexes(force_rebuild_all=True) # ensure that we get the same partitioned runs returned assert run_storage.has_built_index(RUN_PARTITIONS) assert len( run_storage._get_partition_runs(partition_set_name, partition_name)) == 2
def test_schedule_namedtuple_job_instigator_backcompat(): src_dir = file_relative_path(__file__, "snapshot_0_13_19_instigator_named_tuples/sqlite") with copy_directory(src_dir) as test_dir: with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: states = instance.all_instigator_state() assert len(states) == 2 check.is_list(states, of_type=InstigatorState) for state in states: assert state.instigator_type assert state.instigator_data ticks = instance.get_ticks(state.instigator_origin_id, state.selector_id) check.is_list(ticks, of_type=InstigatorTick) for tick in ticks: assert tick.tick_data assert tick.instigator_type assert tick.instigator_name
def test_instigators_table_backcompat(): src_dir = file_relative_path(__file__, "snapshot_0_14_6_instigators_table/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "schedules", "schedules.db") assert get_current_alembic_version(db_path) == "54666da3db5c" assert "instigators" not in get_sqlite3_tables(db_path) assert "selector_id" not in set(get_sqlite3_columns(db_path, "jobs")) assert "selector_id" not in set(get_sqlite3_columns(db_path, "job_ticks")) with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance: instance.upgrade() assert "instigators" in get_sqlite3_tables(db_path) assert "selector_id" in set(get_sqlite3_columns(db_path, "jobs")) assert "selector_id" in set(get_sqlite3_columns(db_path, "job_ticks"))
def test_0_8_0_scheduler_migration(): src_dir = file_relative_path(__file__, "snapshot_0_8_0_scheduler_change") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) with pytest.raises( DagsterInstanceMigrationRequired, match=_schedule_storage_migration_regex( current_revision="da7cd32b690d"), ): instance.all_stored_schedule_state() instance.upgrade() # upgrade just drops tables, and user upgrade flow is cli entry - so # emulate by new-ing up instance which will create new tables instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.all_stored_schedule_state()
def test_downgrade_and_upgrade(): src_dir = file_relative_path(__file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite") with copy_directory(src_dir) as test_dir: # invariant check to make sure migration has not been run yet db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "9fe9e746268c" assert "snapshots" not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert len(instance.get_runs()) == 1 # Make sure the schema is migrated instance.upgrade() assert "snapshots" in get_sqlite3_tables(db_path) assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"} == set( get_sqlite3_columns(db_path, "snapshots") ) assert len(instance.get_runs()) == 1 instance._run_storage._alembic_downgrade(rev="9fe9e746268c") assert get_current_alembic_version(db_path) == "9fe9e746268c" assert "snapshots" not in get_sqlite3_tables(db_path) instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) assert len(instance.get_runs()) == 1 instance.upgrade() assert "snapshots" in get_sqlite3_tables(db_path) assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"} == set( get_sqlite3_columns(db_path, "snapshots") ) assert len(instance.get_runs()) == 1
def test_run_partition_migration(): src_dir = file_relative_path(__file__, "snapshot_0_9_22_pre_run_partition/sqlite") with copy_directory(src_dir) as test_dir: db_path = os.path.join(test_dir, "history", "runs.db") assert get_current_alembic_version(db_path) == "224640159acf" assert "partition" not in set(get_sqlite3_columns(db_path, "runs")) assert "partition_set" not in set(get_sqlite3_columns(db_path, "runs")) # Make sure the schema is migrated instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.upgrade() assert "partition" in set(get_sqlite3_columns(db_path, "runs")) assert "partition_set" in set(get_sqlite3_columns(db_path, "runs")) instance._run_storage._alembic_downgrade(rev="224640159acf") assert get_current_alembic_version(db_path) == "224640159acf" assert "partition" not in set(get_sqlite3_columns(db_path, "runs")) assert "partition_set" not in set(get_sqlite3_columns(db_path, "runs"))
def test_0_6_6_sqlite_migrate(): src_dir = file_relative_path(__file__, "snapshot_0_6_6/sqlite") assert os.path.exists( file_relative_path(__file__, "snapshot_0_6_6/sqlite/runs.db")) assert not os.path.exists( file_relative_path(__file__, "snapshot_0_6_6/sqlite/history/runs.db")) with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 run_ids = instance._event_storage.get_all_run_ids() assert run_ids == ["89296095-892d-4a15-aa0d-9018d1580945"] instance._event_storage.get_logs_for_run( "89296095-892d-4a15-aa0d-9018d1580945") assert not os.path.exists(os.path.join(test_dir, "runs.db")) assert os.path.exists(os.path.join(test_dir, "history/runs.db"))
def test_event_log_step_key_migration(): src_dir = file_relative_path(__file__, "snapshot_0_7_6_pre_event_log_migration/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) # Make sure the schema is migrated instance.upgrade() runs = instance.get_runs() assert len(runs) == 1 run_ids = instance._event_storage.get_all_run_ids() assert run_ids == ["6405c4a0-3ccc-4600-af81-b5ee197f8528"] assert isinstance(instance._event_storage, SqlEventLogStorage) events_by_id = instance._event_storage.get_logs_for_run_by_log_id( "6405c4a0-3ccc-4600-af81-b5ee197f8528" ) assert len(events_by_id) == 40 step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data( "6405c4a0-3ccc-4600-af81-b5ee197f8528", record_id ) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) == 0 # run the event_log backfill migration migrate_event_log_data(instance=instance) step_key_records = [] for record_id, _event in events_by_id.items(): row_data = instance._event_storage.get_event_log_table_data( "6405c4a0-3ccc-4600-af81-b5ee197f8528", record_id ) if row_data.step_key is not None: step_key_records.append(row_data) assert len(step_key_records) > 0
def test_0_6_6_sqlite_exc(): src_dir = file_relative_path(__file__, "snapshot_0_6_6/sqlite") with copy_directory(src_dir) as test_dir: instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) runs = instance.get_runs() # Note that this is a deliberate choice -- old runs are simply invisible, and their # presence won't raise DagsterInstanceMigrationRequired. This is a reasonable choice since # the runs.db has moved and otherwise we would have to do a check for the existence of an # old runs.db every time we accessed the runs. Instead, we'll do this only in the upgrade # method. assert len(runs) == 0 run_ids = instance._event_storage.get_all_run_ids() assert run_ids == ["89296095-892d-4a15-aa0d-9018d1580945"] with pytest.raises( DagsterInstanceMigrationRequired, match=_event_log_migration_regex( run_id="89296095-892d-4a15-aa0d-9018d1580945", current_revision=None), ): instance._event_storage.get_logs_for_run( "89296095-892d-4a15-aa0d-9018d1580945")