Esempio n. 1
0
def test_0_10_0_schedule_wipe():
    src_dir = file_relative_path(__file__,
                                 "snapshot_0_10_0_wipe_schedules/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "b22f16781a7c"

        assert "schedules" in get_sqlite3_tables(db_path)
        assert "schedule_ticks" in get_sqlite3_tables(db_path)

        assert "jobs" not in get_sqlite3_tables(db_path)
        assert "job_ticks" not in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()

        assert "schedules" not in get_sqlite3_tables(db_path)
        assert "schedule_ticks" not in get_sqlite3_tables(db_path)

        assert "jobs" in get_sqlite3_tables(db_path)
        assert "job_ticks" in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as upgraded_instance:
            assert len(upgraded_instance.all_instigator_state()) == 0
Esempio n. 2
0
def test_0_10_0_schedule_wipe():
    src_dir = file_relative_path(__file__,
                                 "snapshot_0_10_0_wipe_schedules/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "b22f16781a7c"

        assert "schedules" in get_sqlite3_tables(db_path)
        assert "schedule_ticks" in get_sqlite3_tables(db_path)

        assert "jobs" not in get_sqlite3_tables(db_path)
        assert "job_ticks" not in get_sqlite3_tables(db_path)

        with pytest.raises(DagsterInstanceMigrationRequired):
            with DagsterInstance.from_ref(
                    InstanceRef.from_dir(test_dir)) as instance:
                instance.optimize_for_dagit(statement_timeout=500)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()

        assert "schedules" not in get_sqlite3_tables(db_path)
        assert "schedule_ticks" not in get_sqlite3_tables(db_path)

        assert "jobs" in get_sqlite3_tables(db_path)
        assert "job_ticks" in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as upgraded_instance:
            assert len(upgraded_instance.all_stored_job_state()) == 0
Esempio n. 3
0
def test_backcompat_get_asset_records():
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")

    # should contain materialization events for asset keys a, b, c, d, e, f
    # events a and b have been wiped, but b has been rematerialized

    def _validate_materialization(asset_key, event, expected_tags):
        assert isinstance(event, EventLogEntry)
        assert event.dagster_event
        assert event.dagster_event.is_step_materialization
        assert event.dagster_event.step_materialization_data.materialization.asset_key == asset_key
        assert event.dagster_event.step_materialization_data.materialization.tags == expected_tags

    b = AssetKey("b")

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            storage = instance.event_log_storage

            records = storage.get_asset_records([b])
            asset_entry = records[0].asset_entry
            assert asset_entry.asset_key == b
            _validate_materialization(b,
                                      asset_entry.last_materialization,
                                      expected_tags={})
Esempio n. 4
0
def test_run_created_in_0_7_9_snapshot_id_change():
    src_dir = file_relative_path(
        __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite")
    with copy_directory(src_dir) as test_dir:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d"
        old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d"

        historical_pipeline = instance.get_historical_pipeline(
            old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        created_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        assert created_snapshot_id != old_pipeline_snapshot_id

        # verify that both are accessible off of the historical pipeline
        assert historical_pipeline.computed_pipeline_snapshot_id == created_snapshot_id
        assert historical_pipeline.identifying_pipeline_snapshot_id == old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot)
Esempio n. 5
0
def test_start_time_end_time():
    src_dir = file_relative_path(__file__, "snapshot_0_13_12_pre_add_start_time_and_end_time")
    with copy_directory(src_dir) as test_dir:

        @job
        def _test():
            pass

        db_path = os.path.join(test_dir, "history", "runs.db")
        assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5"
        assert "start_time" not in set(get_sqlite3_columns(db_path, "runs"))
        assert "end_time" not in set(get_sqlite3_columns(db_path, "runs"))

        # this migration was optional, so make sure things work before migrating
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        assert "start_time" not in set(get_sqlite3_columns(db_path, "runs"))
        assert "end_time" not in set(get_sqlite3_columns(db_path, "runs"))
        assert instance.get_run_records()
        assert instance.create_run_for_pipeline(_test)

        instance.upgrade()

        # Make sure the schema is migrated
        assert "start_time" in set(get_sqlite3_columns(db_path, "runs"))
        assert instance.get_run_records()
        assert instance.create_run_for_pipeline(_test)

        instance._run_storage._alembic_downgrade(rev="7f2b1a4ca7a5")

        assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5"
        assert True
Esempio n. 6
0
def test_run_created_in_0_7_9_snapshot_id_change():
    src_dir = file_relative_path(
        __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite")
    with copy_directory(src_dir) as test_dir:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d"
        old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d"
        with pytest.warns(
                UserWarning,
                match=re.escape(
                    '"input_hydration_schema_key" is deprecated and will be removed in 0.10.0, use '
                    '"loader_schema_key" instead.'),
        ):
            historical_pipeline = instance.get_historical_pipeline(
                old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        assert create_pipeline_snapshot_id(
            pipeline_snapshot) != old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot, historical_pipeline)
Esempio n. 7
0
def test_asset_key_structure():
    src_dir = file_relative_path(__file__, "compat_tests/snapshot_0_9_16_asset_key_structure")
    with copy_directory(src_dir) as test_dir:
        asset_storage = ConsolidatedSqliteEventLogStorage(test_dir)
        asset_keys = asset_storage.get_all_asset_keys()
        assert len(asset_keys) == 5

        # get a structured asset key
        asset_key = AssetKey(["dashboards", "cost_dashboard"])

        # check that backcompat events are read
        assert asset_storage.has_asset_key(asset_key)
        events = asset_storage.get_asset_events(asset_key)
        assert len(events) == 1
        run_ids = asset_storage.get_asset_run_ids(asset_key)
        assert len(run_ids) == 1

        # check that backcompat events are merged with newly stored events
        run_id = "fake_run_id"
        asset_storage.store_event(_materialization_event_record(run_id, asset_key))
        assert asset_storage.has_asset_key(asset_key)
        events = asset_storage.get_asset_events(asset_key)
        assert len(events) == 2
        run_ids = asset_storage.get_asset_run_ids(asset_key)
        assert len(run_ids) == 2
def test_event_log_asset_key_migration():
    src_dir = file_relative_path(
        __file__, "snapshot_0_9_22_lazy_asset_index_migration/sqlite")
    with copy_directory(src_dir) as test_dir:
        instance = DagsterInstance.from_ref(
            InstanceRef.from_dir(
                test_dir,
                overrides={
                    "event_log_storage": {
                        "module":
                        "dagster.core.storage.event_log.sqlite.consolidated_sqlite_event_log",
                        "class": "ConsolidatedSqliteEventLogStorage",
                        "config": {
                            "base_dir": os.path.join(test_dir, "history")
                        },
                    }
                },
            ))

        # ensure everything is upgraded
        instance.upgrade()

        assert isinstance(instance._event_storage, SqlEventLogStorage)
        assert not instance._event_storage.has_secondary_index(
            SECONDARY_INDEX_ASSET_KEY)

        old_keys = instance.all_asset_keys()

        assert instance._event_storage.has_secondary_index(
            SECONDARY_INDEX_ASSET_KEY)

        new_keys = instance.all_asset_keys()

        assert set(old_keys) == set(new_keys)
Esempio n. 9
0
def test_asset_lazy_migration():
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")
    # should contain materialization events for asset keys a, b, c, d, e, f
    # events a and b have been wiped, but b has been rematerialized

    @op
    def materialize():
        yield AssetMaterialization(AssetKey("a"))
        yield AssetMaterialization(AssetKey("b"))
        yield AssetMaterialization(AssetKey("c"))
        yield AssetMaterialization(AssetKey("d"))
        yield AssetMaterialization(AssetKey("e"))
        yield AssetMaterialization(AssetKey("f"))
        yield Output(None)

    @job
    def my_job():
        materialize()

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            storage = instance.event_log_storage
            assert not storage.has_asset_key_index_cols()
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # run the schema migration without reindexing the asset keys
            storage.upgrade()
            assert storage.has_asset_key_index_cols()
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # fetch all asset keys
            instance.all_asset_keys()
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # wipe a, b in order to populate wipe_timestamp
            storage.wipe_asset(AssetKey("a"))
            storage.wipe_asset(AssetKey("b"))

            # materialize all the assets to populate materialization_timestamp
            my_job.execute_in_process(instance=instance)

            # still should not be migrated (on write)
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # fetching partial results should not trigger migration
            instance.get_asset_keys(prefix=["b"])
            instance.get_asset_keys(cursor=str(AssetKey("b")))
            instance.get_latest_materialization_events(
                asset_keys=[AssetKey("b")])

            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # on read, we should see that all the data has already been migrated and we can now mark
            # the asset key index as migrated
            instance.all_asset_keys()
            assert storage.has_secondary_index(ASSET_KEY_INDEX_COLS)
Esempio n. 10
0
def test_snapshot_0_7_6_pre_add_pipeline_snapshot():
    run_id = "fb0b3905-068b-4444-8f00-76fcbaef7e8b"
    src_dir = file_relative_path(
        __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite")
    with copy_directory(src_dir) as test_dir:
        # invariant check to make sure migration has not been run yet

        db_path = os.path.join(test_dir, "history", "runs.db")

        assert get_current_alembic_version(db_path) == "9fe9e746268c"

        assert "snapshots" not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        @solid
        def noop_solid(_):
            pass

        @pipeline
        def noop_pipeline():
            noop_solid()

        with pytest.raises(
                DagsterInstanceMigrationRequired,
                match=_run_storage_migration_regex(
                    current_revision="9fe9e746268c"),
        ):
            execute_pipeline(noop_pipeline, instance=instance)

        assert len(instance.get_runs()) == 1

        # Make sure the schema is migrated
        instance.upgrade()

        assert "snapshots" in get_sqlite3_tables(db_path)
        assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"
                } == set(get_sqlite3_columns(db_path, "snapshots"))

        assert len(instance.get_runs()) == 1

        run = instance.get_run_by_id(run_id)

        assert run.run_id == run_id
        assert run.pipeline_snapshot_id is None

        result = execute_pipeline(noop_pipeline, instance=instance)

        assert result.success

        runs = instance.get_runs()
        assert len(runs) == 2

        new_run_id = result.run_id

        new_run = instance.get_run_by_id(new_run_id)

        assert new_run.pipeline_snapshot_id
Esempio n. 11
0
def test_0_10_6_add_bulk_actions_table():
    src_dir = file_relative_path(__file__, "snapshot_0_10_6_add_bulk_actions_table/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "history", "runs.db")
        assert get_current_alembic_version(db_path) == "0da417ae1b81"
        assert "bulk_actions" not in get_sqlite3_tables(db_path)
        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()
            assert "bulk_actions" in get_sqlite3_tables(db_path)
Esempio n. 12
0
def test_backcompat_asset_read():
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")

    # should contain materialization events for asset keys a, b, c, d, e, f
    # events a and b have been wiped, but b has been rematerialized
    def _validate_instance_assets(instance):
        assert instance.all_asset_keys() == [
            AssetKey("b"),
            AssetKey("c"),
            AssetKey("d"),
            AssetKey("e"),
            AssetKey("f"),
        ]
        assert instance.get_asset_keys() == [
            AssetKey("b"),
            AssetKey("c"),
            AssetKey("d"),
            AssetKey("e"),
            AssetKey("f"),
        ]
        assert instance.get_asset_keys(prefix=["d"]) == [AssetKey("d")]
        assert instance.get_asset_keys(limit=3) == [
            AssetKey("b"),
            AssetKey("c"),
            AssetKey("d"),
        ]
        assert instance.get_asset_keys(cursor='["b"]', limit=3) == [
            AssetKey("c"),
            AssetKey("d"),
            AssetKey("e"),
        ]

    @op
    def materialize():
        yield AssetMaterialization(AssetKey("e"))
        yield AssetMaterialization(AssetKey("f"))
        yield Output(None)

    @job
    def my_job():
        materialize()

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            _validate_instance_assets(instance)
            my_job.execute_in_process(instance=instance)
            _validate_instance_assets(instance)
            instance.upgrade()
            _validate_instance_assets(instance)
            my_job.execute_in_process(instance=instance)
            _validate_instance_assets(instance)
            instance.reindex()
            _validate_instance_assets(instance)
            my_job.execute_in_process(instance=instance)
            _validate_instance_assets(instance)
Esempio n. 13
0
def test_get_materialization_count_by_partition(asset_aware_context):
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")

    d = AssetKey("c")

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            storage = instance.event_log_storage

            materialization_count_by_key = storage.get_materialization_count_by_partition(
                [d])

            assert materialization_count_by_key.get(d) == {}

    a = AssetKey("no_materializations_asset")
    b = AssetKey("no_partitions_asset")
    c = AssetKey("two_partitions_asset")

    @op
    def materialize():
        yield AssetMaterialization(b)
        yield AssetMaterialization(c, partition="a")
        yield Output(None)

    @job
    def my_job():
        materialize()

    @op
    def materialize_two():
        yield AssetMaterialization(c, partition="a")
        yield AssetMaterialization(c, partition="b")
        yield Output(None)

    @job
    def job_two():
        materialize_two()

    with asset_aware_context() as ctx:
        instance, event_log_storage = ctx
        my_job.execute_in_process(instance=instance)

        materialization_count_by_key = event_log_storage.get_materialization_count_by_partition(
            [a, b, c])

        assert materialization_count_by_key.get(a) == {}
        assert materialization_count_by_key.get(b) == {}
        assert materialization_count_by_key.get(c)["a"] == 1
        assert len(materialization_count_by_key.get(c)) == 1

        job_two.execute_in_process(instance=instance)
        materialization_count_by_key = event_log_storage.get_materialization_count_by_partition(
            [a, b, c])
        assert materialization_count_by_key.get(c)["a"] == 2
        assert materialization_count_by_key.get(c)["b"] == 1
Esempio n. 14
0
def test_0_11_0_add_asset_columns():
    src_dir = file_relative_path(__file__, "snapshot_0_11_0_pre_asset_details/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "history", "runs", "index.db")
        assert get_current_alembic_version(db_path) == "0da417ae1b81"
        assert "last_materialization" not in set(get_sqlite3_columns(db_path, "asset_keys"))
        assert "last_run_id" not in set(get_sqlite3_columns(db_path, "asset_keys"))
        assert "asset_details" not in get_sqlite3_tables(db_path)
        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()
            assert "last_materialization" in set(get_sqlite3_columns(db_path, "asset_keys"))
            assert "last_run_id" in set(get_sqlite3_columns(db_path, "asset_keys"))
            assert "asset_details" in set(get_sqlite3_columns(db_path, "asset_keys"))
Esempio n. 15
0
def test_schedule_secondary_index_table_backcompat():
    src_dir = file_relative_path(__file__, "snapshot_0_14_6_schedule_migration_table/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "0da417ae1b81"

        assert "secondary_indexes" not in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()

        assert "secondary_indexes" in get_sqlite3_tables(db_path)
Esempio n. 16
0
def test_tick_selector_index_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_14_6_post_schema_pre_data_migration/sqlite")
    import sqlalchemy as db

    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "c892b3fe0a9f"

        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            assert "idx_tick_selector_timestamp" not in get_sqlite3_indexes(db_path, "job_ticks")
            instance.upgrade()
            assert "idx_tick_selector_timestamp" in get_sqlite3_indexes(db_path, "job_ticks")
Esempio n. 17
0
def test_jobs_selector_id_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_14_6_post_schema_pre_data_migration/sqlite")
    import sqlalchemy as db

    from dagster.core.storage.schedules.migration import SCHEDULE_JOBS_SELECTOR_ID
    from dagster.core.storage.schedules.schema import InstigatorsTable, JobTable, JobTickTable

    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "c892b3fe0a9f"

        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            # runs the required data migrations
            instance.upgrade()
            assert instance.schedule_storage.has_built_index(SCHEDULE_JOBS_SELECTOR_ID)
            legacy_count = len(instance.all_instigator_state())
            migrated_instigator_count = instance.schedule_storage.execute(
                db.select([db.func.count()]).select_from(InstigatorsTable)
            )[0][0]
            assert migrated_instigator_count == legacy_count

            migrated_job_count = instance.schedule_storage.execute(
                db.select([db.func.count()])
                .select_from(JobTable)
                .where(JobTable.c.selector_id.isnot(None))
            )[0][0]
            assert migrated_job_count == legacy_count

            legacy_tick_count = instance.schedule_storage.execute(
                db.select([db.func.count()]).select_from(JobTickTable)
            )[0][0]
            assert legacy_tick_count > 0

            # tick migrations are optional
            migrated_tick_count = instance.schedule_storage.execute(
                db.select([db.func.count()])
                .select_from(JobTickTable)
                .where(JobTickTable.c.selector_id.isnot(None))
            )[0][0]
            assert migrated_tick_count == 0

            # run the optional migrations
            instance.reindex()

            migrated_tick_count = instance.schedule_storage.execute(
                db.select([db.func.count()])
                .select_from(JobTickTable)
                .where(JobTickTable.c.selector_id.isnot(None))
            )[0][0]
            assert migrated_tick_count == legacy_tick_count
Esempio n. 18
0
def test_event_log_asset_partition_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_9_22_pre_asset_partition/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(
            test_dir, "history", "runs", "1a1d3c4b-1284-4c74-830c-c8988bd4d779.db"
        )
        assert get_current_alembic_version(db_path) == "c34498c29964"
        assert "partition" not in set(get_sqlite3_columns(db_path, "event_logs"))

        # Make sure the schema is migrated
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        instance.upgrade()

        assert "partition" in set(get_sqlite3_columns(db_path, "event_logs"))
Esempio n. 19
0
def test_event_log_asset_key_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_7_8_pre_asset_key_migration/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(
            test_dir, "history", "runs", "722183e4-119f-4a00-853f-e1257be82ddb.db"
        )
        assert get_current_alembic_version(db_path) == "3b1e175a2be3"
        assert "asset_key" not in set(get_sqlite3_columns(db_path, "event_logs"))

        # Make sure the schema is migrated
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        instance.upgrade()

        assert "asset_key" in set(get_sqlite3_columns(db_path, "event_logs"))
Esempio n. 20
0
def test_0_6_4():
    src_dir = file_relative_path(__file__, "snapshot_0_6_4")
    with copy_directory(src_dir) as test_dir:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        runs = instance.get_runs()
        with pytest.raises(
                DagsterInstanceMigrationRequired,
                match=_event_log_migration_regex(
                    run_id="c7a6c4d7-6c88-46d0-8baa-d4937c3cefe5",
                    current_revision=None),
        ):
            for run in runs:
                instance.all_logs(run.run_id)
Esempio n. 21
0
def test_execution_plan_snapshot_backcompat():

    src_dir = file_relative_path(__file__, "test_execution_plan_snapshots/")
    snapshot_dirs = [
        f for f in os.listdir(src_dir)
        if not os.path.isfile(os.path.join(src_dir, f))
    ]
    for snapshot_dir_path in snapshot_dirs:
        print(f"Executing a saved run from {snapshot_dir_path}")  # pylint: disable=print-call

        with copy_directory(os.path.join(src_dir,
                                         snapshot_dir_path)) as test_dir:
            with DagsterInstance.from_ref(
                    InstanceRef.from_dir(test_dir)) as instance:
                runs = instance.get_runs()
                assert len(runs) == 1

                run = runs[0]
                assert run.status == PipelineRunStatus.NOT_STARTED

                the_pipeline = InMemoryPipeline(dynamic_pipeline)

                # First create a brand new plan from the pipeline and validate it
                new_plan = create_execution_plan(the_pipeline,
                                                 run_config=run.run_config)
                _validate_execution_plan(new_plan)

                # Create a snapshot and rebuild it, validate the rebuilt plan
                new_plan_snapshot = snapshot_from_execution_plan(
                    new_plan, run.pipeline_snapshot_id)
                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", new_plan_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Then validate the plan built from the historical snapshot on the run
                stored_snapshot = instance.get_execution_plan_snapshot(
                    run.execution_plan_snapshot_id)

                rebuilt_plan = ExecutionPlan.rebuild_from_snapshot(
                    "dynamic_pipeline", stored_snapshot)
                _validate_execution_plan(rebuilt_plan)

                # Finally, execute the run (using the historical execution plan snapshot)
                result = execute_run(the_pipeline,
                                     run,
                                     instance,
                                     raise_on_error=True)
                assert result.success
Esempio n. 22
0
def test_run_partition_data_migration():
    src_dir = file_relative_path(
        __file__, "snapshot_0_9_22_post_schema_pre_data_partition/sqlite")
    with copy_directory(src_dir) as test_dir:
        from dagster.core.storage.runs.sql_run_storage import SqlRunStorage
        from dagster.core.storage.runs.migration import RUN_PARTITIONS

        # load db that has migrated schema, but not populated data for run partitions
        db_path = os.path.join(test_dir, "history", "runs.db")
        assert get_current_alembic_version(db_path) == "375e95bad550"

        # Make sure the schema is migrated
        assert "partition" in set(get_sqlite3_columns(db_path, "runs"))
        assert "partition_set" in set(get_sqlite3_columns(db_path, "runs"))

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            instance._run_storage.upgrade()

        run_storage = instance._run_storage
        assert isinstance(run_storage, SqlRunStorage)

        partition_set_name = "ingest_and_train"
        partition_name = "2020-01-02"

        # ensure old tag-based reads are working
        assert not run_storage.has_built_index(RUN_PARTITIONS)
        assert len(
            run_storage._get_partition_runs(partition_set_name,
                                            partition_name)) == 2

        # turn on reads for the partition column, without migrating the data
        run_storage.mark_index_built(RUN_PARTITIONS)

        # ensure that no runs are returned because the data has not been migrated
        assert run_storage.has_built_index(RUN_PARTITIONS)
        assert len(
            run_storage._get_partition_runs(partition_set_name,
                                            partition_name)) == 0

        # actually migrate the data
        run_storage.build_missing_indexes(force_rebuild_all=True)

        # ensure that we get the same partitioned runs returned
        assert run_storage.has_built_index(RUN_PARTITIONS)
        assert len(
            run_storage._get_partition_runs(partition_set_name,
                                            partition_name)) == 2
Esempio n. 23
0
def test_schedule_namedtuple_job_instigator_backcompat():
    src_dir = file_relative_path(__file__, "snapshot_0_13_19_instigator_named_tuples/sqlite")
    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            states = instance.all_instigator_state()
            assert len(states) == 2
            check.is_list(states, of_type=InstigatorState)
            for state in states:
                assert state.instigator_type
                assert state.instigator_data
                ticks = instance.get_ticks(state.instigator_origin_id, state.selector_id)
                check.is_list(ticks, of_type=InstigatorTick)
                for tick in ticks:
                    assert tick.tick_data
                    assert tick.instigator_type
                    assert tick.instigator_name
Esempio n. 24
0
def test_instigators_table_backcompat():
    src_dir = file_relative_path(__file__, "snapshot_0_14_6_instigators_table/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "54666da3db5c"

        assert "instigators" not in get_sqlite3_tables(db_path)
        assert "selector_id" not in set(get_sqlite3_columns(db_path, "jobs"))
        assert "selector_id" not in set(get_sqlite3_columns(db_path, "job_ticks"))

        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()

        assert "instigators" in get_sqlite3_tables(db_path)
        assert "selector_id" in set(get_sqlite3_columns(db_path, "jobs"))
        assert "selector_id" in set(get_sqlite3_columns(db_path, "job_ticks"))
Esempio n. 25
0
def test_0_8_0_scheduler_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_8_0_scheduler_change")
    with copy_directory(src_dir) as test_dir:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        with pytest.raises(
                DagsterInstanceMigrationRequired,
                match=_schedule_storage_migration_regex(
                    current_revision="da7cd32b690d"),
        ):
            instance.all_stored_schedule_state()

        instance.upgrade()

        # upgrade just drops tables, and user upgrade flow is cli entry - so
        # emulate by new-ing up instance which will create new tables
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        instance.all_stored_schedule_state()
Esempio n. 26
0
def test_downgrade_and_upgrade():
    src_dir = file_relative_path(__file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite")
    with copy_directory(src_dir) as test_dir:
        # invariant check to make sure migration has not been run yet

        db_path = os.path.join(test_dir, "history", "runs.db")

        assert get_current_alembic_version(db_path) == "9fe9e746268c"

        assert "snapshots" not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        assert len(instance.get_runs()) == 1

        # Make sure the schema is migrated
        instance.upgrade()

        assert "snapshots" in get_sqlite3_tables(db_path)
        assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"} == set(
            get_sqlite3_columns(db_path, "snapshots")
        )

        assert len(instance.get_runs()) == 1

        instance._run_storage._alembic_downgrade(rev="9fe9e746268c")

        assert get_current_alembic_version(db_path) == "9fe9e746268c"

        assert "snapshots" not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        assert len(instance.get_runs()) == 1

        instance.upgrade()

        assert "snapshots" in get_sqlite3_tables(db_path)
        assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"} == set(
            get_sqlite3_columns(db_path, "snapshots")
        )

        assert len(instance.get_runs()) == 1
Esempio n. 27
0
def test_run_partition_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_9_22_pre_run_partition/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "history", "runs.db")
        assert get_current_alembic_version(db_path) == "224640159acf"
        assert "partition" not in set(get_sqlite3_columns(db_path, "runs"))
        assert "partition_set" not in set(get_sqlite3_columns(db_path, "runs"))

        # Make sure the schema is migrated
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        instance.upgrade()

        assert "partition" in set(get_sqlite3_columns(db_path, "runs"))
        assert "partition_set" in set(get_sqlite3_columns(db_path, "runs"))

        instance._run_storage._alembic_downgrade(rev="224640159acf")
        assert get_current_alembic_version(db_path) == "224640159acf"

        assert "partition" not in set(get_sqlite3_columns(db_path, "runs"))
        assert "partition_set" not in set(get_sqlite3_columns(db_path, "runs"))
Esempio n. 28
0
def test_0_6_6_sqlite_migrate():
    src_dir = file_relative_path(__file__, "snapshot_0_6_6/sqlite")
    assert os.path.exists(
        file_relative_path(__file__, "snapshot_0_6_6/sqlite/runs.db"))
    assert not os.path.exists(
        file_relative_path(__file__, "snapshot_0_6_6/sqlite/history/runs.db"))

    with copy_directory(src_dir) as test_dir:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        instance.upgrade()

        runs = instance.get_runs()
        assert len(runs) == 1

        run_ids = instance._event_storage.get_all_run_ids()
        assert run_ids == ["89296095-892d-4a15-aa0d-9018d1580945"]

        instance._event_storage.get_logs_for_run(
            "89296095-892d-4a15-aa0d-9018d1580945")

        assert not os.path.exists(os.path.join(test_dir, "runs.db"))
        assert os.path.exists(os.path.join(test_dir, "history/runs.db"))
Esempio n. 29
0
def test_event_log_step_key_migration():
    src_dir = file_relative_path(__file__, "snapshot_0_7_6_pre_event_log_migration/sqlite")
    with copy_directory(src_dir) as test_dir:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        # Make sure the schema is migrated
        instance.upgrade()

        runs = instance.get_runs()
        assert len(runs) == 1
        run_ids = instance._event_storage.get_all_run_ids()
        assert run_ids == ["6405c4a0-3ccc-4600-af81-b5ee197f8528"]
        assert isinstance(instance._event_storage, SqlEventLogStorage)
        events_by_id = instance._event_storage.get_logs_for_run_by_log_id(
            "6405c4a0-3ccc-4600-af81-b5ee197f8528"
        )
        assert len(events_by_id) == 40

        step_key_records = []
        for record_id, _event in events_by_id.items():
            row_data = instance._event_storage.get_event_log_table_data(
                "6405c4a0-3ccc-4600-af81-b5ee197f8528", record_id
            )
            if row_data.step_key is not None:
                step_key_records.append(row_data)
        assert len(step_key_records) == 0

        # run the event_log backfill migration
        migrate_event_log_data(instance=instance)

        step_key_records = []
        for record_id, _event in events_by_id.items():
            row_data = instance._event_storage.get_event_log_table_data(
                "6405c4a0-3ccc-4600-af81-b5ee197f8528", record_id
            )
            if row_data.step_key is not None:
                step_key_records.append(row_data)
        assert len(step_key_records) > 0
Esempio n. 30
0
def test_0_6_6_sqlite_exc():
    src_dir = file_relative_path(__file__, "snapshot_0_6_6/sqlite")
    with copy_directory(src_dir) as test_dir:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        runs = instance.get_runs()
        # Note that this is a deliberate choice -- old runs are simply invisible, and their
        # presence won't raise DagsterInstanceMigrationRequired. This is a reasonable choice since
        # the runs.db has moved and otherwise we would have to do a check for the existence of an
        # old runs.db every time we accessed the runs. Instead, we'll do this only in the upgrade
        # method.
        assert len(runs) == 0

        run_ids = instance._event_storage.get_all_run_ids()
        assert run_ids == ["89296095-892d-4a15-aa0d-9018d1580945"]

        with pytest.raises(
                DagsterInstanceMigrationRequired,
                match=_event_log_migration_regex(
                    run_id="89296095-892d-4a15-aa0d-9018d1580945",
                    current_revision=None),
        ):
            instance._event_storage.get_logs_for_run(
                "89296095-892d-4a15-aa0d-9018d1580945")