Beispiel #1
0
def test_0_10_0_schedule_wipe():
    src_dir = file_relative_path(__file__,
                                 "snapshot_0_10_0_wipe_schedules/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "b22f16781a7c"

        assert "schedules" in get_sqlite3_tables(db_path)
        assert "schedule_ticks" in get_sqlite3_tables(db_path)

        assert "jobs" not in get_sqlite3_tables(db_path)
        assert "job_ticks" not in get_sqlite3_tables(db_path)

        with pytest.raises(DagsterInstanceMigrationRequired):
            with DagsterInstance.from_ref(
                    InstanceRef.from_dir(test_dir)) as instance:
                instance.optimize_for_dagit(statement_timeout=500)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()

        assert "schedules" not in get_sqlite3_tables(db_path)
        assert "schedule_ticks" not in get_sqlite3_tables(db_path)

        assert "jobs" in get_sqlite3_tables(db_path)
        assert "job_ticks" in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as upgraded_instance:
            assert len(upgraded_instance.all_stored_job_state()) == 0
Beispiel #2
0
def test_0_10_0_schedule_wipe():
    src_dir = file_relative_path(__file__,
                                 "snapshot_0_10_0_wipe_schedules/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "schedules", "schedules.db")

        assert get_current_alembic_version(db_path) == "b22f16781a7c"

        assert "schedules" in get_sqlite3_tables(db_path)
        assert "schedule_ticks" in get_sqlite3_tables(db_path)

        assert "jobs" not in get_sqlite3_tables(db_path)
        assert "job_ticks" not in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()

        assert "schedules" not in get_sqlite3_tables(db_path)
        assert "schedule_ticks" not in get_sqlite3_tables(db_path)

        assert "jobs" in get_sqlite3_tables(db_path)
        assert "job_ticks" in get_sqlite3_tables(db_path)

        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as upgraded_instance:
            assert len(upgraded_instance.all_instigator_state()) == 0
Beispiel #3
0
def test_get_run_by_id():
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))

        assert instance.get_runs() == []
        pipeline_run = PipelineRun("foo_pipeline", "new_run")
        assert instance.get_run_by_id(pipeline_run.run_id) is None

        instance._run_storage.add_run(pipeline_run)  # pylint: disable=protected-access

        assert instance.get_runs() == [pipeline_run]

        assert instance.get_run_by_id(pipeline_run.run_id) == pipeline_run

    # Run is created after we check whether it exists
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun(pipeline_name="foo_pipeline", run_id="bar_run")

        def _has_run(self, run_id):
            # This is uglier than we would like because there is no nonlocal keyword in py2
            global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
            # pylint: disable=protected-access
            if not self._run_storage.has_run(
                    run_id) and not MOCK_HAS_RUN_CALLED:
                self._run_storage.add_run(
                    PipelineRun(pipeline_name="foo_pipeline", run_id=run_id))
                return False
            else:
                return self._run_storage.has_run(run_id)

        instance.has_run = types.MethodType(_has_run, instance)

        assert instance.get_run_by_id(run.run_id) is None

    # Run is created after we check whether it exists, but deleted before we can get it
    global MOCK_HAS_RUN_CALLED  # pylint:disable=global-statement
    MOCK_HAS_RUN_CALLED = False
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun(pipeline_name="foo_pipeline", run_id="bar_run")

        def _has_run(self, run_id):
            global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
            # pylint: disable=protected-access
            if not self._run_storage.has_run(
                    run_id) and not MOCK_HAS_RUN_CALLED:
                self._run_storage.add_run(
                    PipelineRun(pipeline_name="foo_pipeline", run_id=run_id))
                MOCK_HAS_RUN_CALLED = True
                return False
            elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED:
                MOCK_HAS_RUN_CALLED = False
                return True
            else:
                return False

        instance.has_run = types.MethodType(_has_run, instance)
        assert instance.get_run_by_id(run.run_id) is None
Beispiel #4
0
def test_get_or_create_run():
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run')
        assert instance.get_or_create_run(run) == run
        assert instance.has_run(run.run_id)
        assert instance.get_or_create_run(run) == run

    # Run is created after we check whether it exists
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run')

        def _has_run(self, run_id):
            # This is uglier than we would like because there is no nonlocal keyword in py2
            global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
            # pylint: disable=protected-access
            if not self._run_storage.has_run(
                    run_id) and not MOCK_HAS_RUN_CALLED:
                self._run_storage.add_run(
                    PipelineRun.create_empty_run('foo_pipeline', run_id))
                return False
            else:
                return self._run_storage.has_run(run_id)

        instance.has_run = types.MethodType(_has_run, instance)
        assert instance.get_or_create_run(run) == run

    # Run is created after we check whether it exists, but deleted before we can get it
    global MOCK_HAS_RUN_CALLED  # pylint:disable=global-statement
    MOCK_HAS_RUN_CALLED = False
    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        run = PipelineRun.create_empty_run('foo_pipeline', 'bar_run')

        def _has_run(self, run_id):
            global MOCK_HAS_RUN_CALLED  # pylint: disable=global-statement
            # pylint: disable=protected-access
            if not self._run_storage.has_run(
                    run_id) and not MOCK_HAS_RUN_CALLED:
                self._run_storage.add_run(
                    PipelineRun.create_empty_run('foo_pipeline', run_id))
                MOCK_HAS_RUN_CALLED = True
                return False
            elif self._run_storage.has_run(run_id) and MOCK_HAS_RUN_CALLED:
                MOCK_HAS_RUN_CALLED = False
                return True
            else:
                return False

        instance.has_run = types.MethodType(_has_run, instance)
        with pytest.raises(check.CheckError, match='Inconsistent run storage'):
            instance.get_or_create_run(run)
Beispiel #5
0
def test_run_created_in_0_7_9_snapshot_id_change():
    src_dir = file_relative_path(
        __file__, "snapshot_0_7_9_shapshot_id_creation_change/sqlite")
    with copy_directory(src_dir) as test_dir:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = "88528edde2ed64da3c39cca0da8ba2f7586c1a5d"
        old_execution_plan_snapshot_id = "2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d"

        historical_pipeline = instance.get_historical_pipeline(
            old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        created_snapshot_id = create_pipeline_snapshot_id(pipeline_snapshot)
        assert created_snapshot_id != old_pipeline_snapshot_id

        # verify that both are accessible off of the historical pipeline
        assert historical_pipeline.computed_pipeline_snapshot_id == created_snapshot_id
        assert historical_pipeline.identifying_pipeline_snapshot_id == old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot)
Beispiel #6
0
def test_backcompat_get_asset_records():
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")

    # should contain materialization events for asset keys a, b, c, d, e, f
    # events a and b have been wiped, but b has been rematerialized

    def _validate_materialization(asset_key, event, expected_tags):
        assert isinstance(event, EventLogEntry)
        assert event.dagster_event
        assert event.dagster_event.is_step_materialization
        assert event.dagster_event.step_materialization_data.materialization.asset_key == asset_key
        assert event.dagster_event.step_materialization_data.materialization.tags == expected_tags

    b = AssetKey("b")

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            storage = instance.event_log_storage

            records = storage.get_asset_records([b])
            asset_entry = records[0].asset_entry
            assert asset_entry.asset_key == b
            _validate_materialization(b,
                                      asset_entry.last_materialization,
                                      expected_tags={})
def test_input_manager_with_failure():
    @root_input_manager
    def should_fail(_):
        raise Failure(
            description="Foolure",
            metadata_entries=[
                EventMetadataEntry.text(label="label", text="text", description="description")
            ],
        )

    @solid(input_defs=[InputDefinition("_fail_input", root_manager_key="should_fail")])
    def fail_on_input(_, _fail_input):
        assert False, "should not be called"

    @pipeline(mode_defs=[ModeDefinition(resource_defs={"should_fail": should_fail})])
    def simple():
        fail_on_input()

    with tempfile.TemporaryDirectory() as tmpdir_path:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))

        result = execute_pipeline(simple, instance=instance, raise_on_error=False)

        assert not result.success

        failure_data = result.result_for_solid("fail_on_input").failure_data

        assert failure_data.error.cls_name == "Failure"

        assert failure_data.user_failure_data.description == "Foolure"
        assert failure_data.user_failure_data.metadata_entries[0].label == "label"
        assert failure_data.user_failure_data.metadata_entries[0].entry_data.text == "text"
        assert failure_data.user_failure_data.metadata_entries[0].description == "description"
Beispiel #8
0
def test_run_created_in_0_7_9_snapshot_id_change():
    test_dir = file_relative_path(
        __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite')
    with restore_directory(test_dir):

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d'
        old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d'

        pipeline_snapshot = instance.get_pipeline_snapshot(
            old_pipeline_snapshot_id)
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        assert create_pipeline_snapshot_id(
            pipeline_snapshot) != old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExecutionPlanIndex(ep_snapshot,
                                  PipelineIndex(pipeline_snapshot))
Beispiel #9
0
def test_snapshot_0_7_6_pre_add_pipeline_snapshot():
    run_id = "fb0b3905-068b-4444-8f00-76fcbaef7e8b"
    test_dir = file_relative_path(
        __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite")
    with restore_directory(test_dir):
        # invariant check to make sure migration has not been run yet

        db_path = os.path.join(test_dir, "history", "runs.db")

        assert get_current_alembic_version(db_path) == "9fe9e746268c"

        assert "snapshots" not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        @solid
        def noop_solid(_):
            pass

        @pipeline
        def noop_pipeline():
            noop_solid()

        with pytest.raises(
                DagsterInstanceMigrationRequired,
                match=_run_storage_migration_regex(
                    current_revision="9fe9e746268c"),
        ):
            execute_pipeline(noop_pipeline, instance=instance)

        assert len(instance.get_runs()) == 1

        # Make sure the schema is migrated
        instance.upgrade()

        assert get_current_alembic_version(db_path) == "c63a27054f08"

        assert "snapshots" in get_sqlite3_tables(db_path)
        assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"
                } == set(get_sqlite3_columns(db_path, "snapshots"))

        assert len(instance.get_runs()) == 1

        run = instance.get_run_by_id(run_id)

        assert run.run_id == run_id
        assert run.pipeline_snapshot_id is None

        result = execute_pipeline(noop_pipeline, instance=instance)

        assert result.success

        runs = instance.get_runs()
        assert len(runs) == 2

        new_run_id = result.run_id

        new_run = instance.get_run_by_id(new_run_id)

        assert new_run.pipeline_snapshot_id
Beispiel #10
0
def test_0_6_6_sqlite_migrate():
    test_dir = file_relative_path(__file__, 'snapshot_0_6_6/sqlite')
    assert os.path.exists(
        file_relative_path(__file__, 'snapshot_0_6_6/sqlite/runs.db'))
    assert not os.path.exists(
        file_relative_path(__file__, 'snapshot_0_6_6/sqlite/history/runs.db'))

    with restore_directory(test_dir):
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        instance.upgrade()

        runs = instance.get_runs()
        assert len(runs) == 1

        run_ids = instance._event_storage.get_all_run_ids()
        assert run_ids == ['89296095-892d-4a15-aa0d-9018d1580945']

        instance._event_storage.get_logs_for_run(
            '89296095-892d-4a15-aa0d-9018d1580945')

        assert not os.path.exists(
            file_relative_path(__file__, 'snapshot_0_6_6/sqlite/runs.db'))
        assert os.path.exists(
            file_relative_path(__file__,
                               'snapshot_0_6_6/sqlite/history/runs.db'))
Beispiel #11
0
def test_start_time_end_time():
    src_dir = file_relative_path(__file__, "snapshot_0_13_12_pre_add_start_time_and_end_time")
    with copy_directory(src_dir) as test_dir:

        @job
        def _test():
            pass

        db_path = os.path.join(test_dir, "history", "runs.db")
        assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5"
        assert "start_time" not in set(get_sqlite3_columns(db_path, "runs"))
        assert "end_time" not in set(get_sqlite3_columns(db_path, "runs"))

        # this migration was optional, so make sure things work before migrating
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        assert "start_time" not in set(get_sqlite3_columns(db_path, "runs"))
        assert "end_time" not in set(get_sqlite3_columns(db_path, "runs"))
        assert instance.get_run_records()
        assert instance.create_run_for_pipeline(_test)

        instance.upgrade()

        # Make sure the schema is migrated
        assert "start_time" in set(get_sqlite3_columns(db_path, "runs"))
        assert instance.get_run_records()
        assert instance.create_run_for_pipeline(_test)

        instance._run_storage._alembic_downgrade(rev="7f2b1a4ca7a5")

        assert get_current_alembic_version(db_path) == "7f2b1a4ca7a5"
        assert True
Beispiel #12
0
def test_run_step_stats_with_retries():
    _called = None

    @pipeline
    def simple():
        @solid
        def should_succeed(context):
            context.log.info("succeed")
            return "yay"

        @solid(input_defs=[InputDefinition("_input", str)],
               output_defs=[OutputDefinition(str)])
        def should_retry(context, _input):
            raise RetryRequested(max_retries=3)

        @solid
        def should_not_execute(_, x):
            _called = True
            return x

        should_not_execute(should_retry(should_succeed()))

    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        result = execute_pipeline(simple,
                                  instance=instance,
                                  raise_on_error=False)
        step_stats = instance.get_run_step_stats(
            result.run_id, step_keys=["should_retry.compute"])
        assert len(step_stats) == 1
        assert step_stats[0].step_key == "should_retry.compute"
        assert step_stats[0].status == StepEventStatus.FAILURE
        assert step_stats[0].end_time > step_stats[0].start_time
        assert step_stats[0].attempts == 4
        assert not _called
Beispiel #13
0
def test_run_created_in_0_7_9_snapshot_id_change():
    test_dir = file_relative_path(
        __file__, 'snapshot_0_7_9_shapshot_id_creation_change/sqlite')
    with restore_directory(test_dir):

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        # run_id = 'e297fa70-49e8-43f8-abfe-1634f02644f6'

        old_pipeline_snapshot_id = '88528edde2ed64da3c39cca0da8ba2f7586c1a5d'
        old_execution_plan_snapshot_id = '2246f8e5a10d21e15fbfa3773d7b2d0bc1fa9d3d'
        with pytest.warns(
                UserWarning,
                match=re.escape(
                    '"input_hydration_schema_key" is deprecated and will be removed in 0.10.0, use '
                    '"loader_schema_key" instead.'),
        ):
            historical_pipeline = instance.get_historical_pipeline(
                old_pipeline_snapshot_id)
        pipeline_snapshot = historical_pipeline.pipeline_snapshot
        ep_snapshot = instance.get_execution_plan_snapshot(
            old_execution_plan_snapshot_id)

        # It is the pipeline snapshot that changed
        # Verify that snapshot ids are not equal. This changed in 0.7.10
        assert create_pipeline_snapshot_id(
            pipeline_snapshot) != old_pipeline_snapshot_id

        # We also changed execution plan schema in 0.7.11.post1
        assert create_execution_plan_snapshot_id(
            ep_snapshot) != old_execution_plan_snapshot_id

        # This previously failed with a check error
        assert ExternalExecutionPlan(ep_snapshot, historical_pipeline)
Beispiel #14
0
def test_0_6_4():
    instance = DagsterInstance.from_ref(
        InstanceRef.from_dir(file_relative_path(__file__, 'snapshot_0_6_4')))

    runs = instance.all_runs()
    for run in runs:
        instance.all_logs(run.run_id)
def test_event_log_asset_key_migration():
    src_dir = file_relative_path(
        __file__, "snapshot_0_9_22_lazy_asset_index_migration/sqlite")
    with copy_directory(src_dir) as test_dir:
        instance = DagsterInstance.from_ref(
            InstanceRef.from_dir(
                test_dir,
                overrides={
                    "event_log_storage": {
                        "module":
                        "dagster.core.storage.event_log.sqlite.consolidated_sqlite_event_log",
                        "class": "ConsolidatedSqliteEventLogStorage",
                        "config": {
                            "base_dir": os.path.join(test_dir, "history")
                        },
                    }
                },
            ))

        # ensure everything is upgraded
        instance.upgrade()

        assert isinstance(instance._event_storage, SqlEventLogStorage)
        assert not instance._event_storage.has_secondary_index(
            SECONDARY_INDEX_ASSET_KEY)

        old_keys = instance.all_asset_keys()

        assert instance._event_storage.has_secondary_index(
            SECONDARY_INDEX_ASSET_KEY)

        new_keys = instance.all_asset_keys()

        assert set(old_keys) == set(new_keys)
def test_run_step_stats():
    @pipeline
    def simple():
        @solid
        def should_succeed(context):
            context.log.info('succeed')
            return 'yay'

        @solid(input_defs=[InputDefinition('_input', str)], output_defs=[OutputDefinition(str)])
        def should_fail(context, _input):
            context.log.info('fail')
            raise Exception('booo')

        @solid
        def should_skip(context, _input):
            context.log.info('skip')
            return _input

        should_skip(should_fail(should_succeed()))

    with seven.TemporaryDirectory() as tmpdir_path:
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))
        result = execute_pipeline(simple, instance=instance, raise_on_error=False)
        step_stats = sorted(instance.get_run_step_stats(result.run_id), key=lambda x: x.end_time)
        assert len(step_stats) == 3
        assert step_stats[0].step_key == 'should_succeed.compute'
        assert step_stats[0].status == StepEventStatus.SUCCESS
        assert step_stats[0].end_time > step_stats[0].start_time
        assert step_stats[1].step_key == 'should_fail.compute'
        assert step_stats[1].status == StepEventStatus.FAILURE
        assert step_stats[1].end_time > step_stats[0].start_time
        assert step_stats[2].step_key == 'should_skip.compute'
        assert step_stats[2].status == StepEventStatus.SKIPPED
        assert step_stats[2].end_time > step_stats[0].start_time
Beispiel #17
0
def test_connection_leak(hostname, conn_string):
    num_instances = 20

    tempdir = tempfile.TemporaryDirectory()
    copies = []
    for _ in range(num_instances):
        copies.append(
            DagsterInstance.from_ref(
                InstanceRef.from_dir(tempdir.name,
                                     overrides=yaml.safe_load(
                                         full_pg_config(hostname)))))

    with get_conn(conn_string).cursor() as curs:
        # count open connections
        curs.execute("SELECT count(*) from pg_stat_activity")
        res = curs.fetchall()

    # This includes a number of internal connections, so just ensure it did not scale
    # with number of instances
    assert res[0][0] < num_instances

    for copy in copies:
        copy.dispose()

    tempdir.cleanup()
Beispiel #18
0
def test_downgrade_and_upgrade():
    test_dir = file_relative_path(__file__, 'snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite')
    with restore_directory(test_dir):
        # invariant check to make sure migration has not been run yet

        db_path = os.path.join(test_dir, 'history', 'runs.db')

        assert get_current_alembic_version(db_path) == '9fe9e746268c'

        assert 'snapshots' not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        assert len(instance.get_runs()) == 1

        # Make sure the schema is migrated
        instance.upgrade()

        assert get_current_alembic_version(db_path) == 'c63a27054f08'

        assert 'snapshots' in get_sqlite3_tables(db_path)
        assert {'id', 'snapshot_id', 'snapshot_body', 'snapshot_type'} == set(
            get_sqlite3_columns(db_path, 'snapshots')
        )

        assert len(instance.get_runs()) == 1

        instance._run_storage._alembic_downgrade(rev='9fe9e746268c')

        assert get_current_alembic_version(db_path) == '9fe9e746268c'

        assert 'snapshots' not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        assert len(instance.get_runs()) == 1

        instance.upgrade()

        assert get_current_alembic_version(db_path) == 'c63a27054f08'

        assert 'snapshots' in get_sqlite3_tables(db_path)
        assert {'id', 'snapshot_id', 'snapshot_body', 'snapshot_type'} == set(
            get_sqlite3_columns(db_path, 'snapshots')
        )

        assert len(instance.get_runs()) == 1
Beispiel #19
0
 def config_defaults(base_dir):
     defaults = InstanceRef.config_defaults(base_dir)
     defaults["run_coordinator"] = ConfigurableClassData(
         "dagster.core.run_coordinator.queued_run_coordinator",
         "QueuedRunCoordinator",
         yaml.dump({}),
     )
     return defaults
Beispiel #20
0
def test_valid_managed_loggers_instance_yaml():
    ref = InstanceRef.from_dir(
        base_dir=file_relative_path(__file__,
                                    "../../../docs_snippets/concepts/logging"),
        config_filename="python_logging_managed_loggers_config.yaml",
    )
    instance = DagsterInstance.from_ref(ref)
    assert instance.managed_python_loggers == ["root"]
Beispiel #21
0
def test_valid_handler_instance_yaml():
    ref = InstanceRef.from_dir(
        base_dir=file_relative_path(__file__,
                                    "../../../docs_snippets/concepts/logging"),
        config_filename="python_logging_handler_config.yaml",
    )
    instance = DagsterInstance.from_ref(ref)
    assert len(instance.get_handlers()) == 2
Beispiel #22
0
def test_valid_log_level_instance_yaml():
    ref = InstanceRef.from_dir(
        base_dir=file_relative_path(__file__,
                                    "../../../docs_snippets/concepts/logging"),
        config_filename="python_logging_python_log_level_config.yaml",
    )
    instance = DagsterInstance.from_ref(ref)
    assert instance.python_log_level == "INFO"
Beispiel #23
0
def test_downgrade_and_upgrade():
    test_dir = file_relative_path(
        __file__, "snapshot_0_7_6_pre_add_pipeline_snapshot/sqlite")
    with restore_directory(test_dir):
        # invariant check to make sure migration has not been run yet

        db_path = os.path.join(test_dir, "history", "runs.db")

        assert get_current_alembic_version(db_path) == "9fe9e746268c"

        assert "snapshots" not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        assert len(instance.get_runs()) == 1

        # Make sure the schema is migrated
        instance.upgrade()

        assert get_current_alembic_version(db_path) == "c63a27054f08"

        assert "snapshots" in get_sqlite3_tables(db_path)
        assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"
                } == set(get_sqlite3_columns(db_path, "snapshots"))

        assert len(instance.get_runs()) == 1

        instance._run_storage._alembic_downgrade(rev="9fe9e746268c")

        assert get_current_alembic_version(db_path) == "9fe9e746268c"

        assert "snapshots" not in get_sqlite3_tables(db_path)

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        assert len(instance.get_runs()) == 1

        instance.upgrade()

        assert get_current_alembic_version(db_path) == "c63a27054f08"

        assert "snapshots" in get_sqlite3_tables(db_path)
        assert {"id", "snapshot_id", "snapshot_body", "snapshot_type"
                } == set(get_sqlite3_columns(db_path, "snapshots"))

        assert len(instance.get_runs()) == 1
Beispiel #24
0
def test_asset_lazy_migration():
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")
    # should contain materialization events for asset keys a, b, c, d, e, f
    # events a and b have been wiped, but b has been rematerialized

    @op
    def materialize():
        yield AssetMaterialization(AssetKey("a"))
        yield AssetMaterialization(AssetKey("b"))
        yield AssetMaterialization(AssetKey("c"))
        yield AssetMaterialization(AssetKey("d"))
        yield AssetMaterialization(AssetKey("e"))
        yield AssetMaterialization(AssetKey("f"))
        yield Output(None)

    @job
    def my_job():
        materialize()

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            storage = instance.event_log_storage
            assert not storage.has_asset_key_index_cols()
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # run the schema migration without reindexing the asset keys
            storage.upgrade()
            assert storage.has_asset_key_index_cols()
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # fetch all asset keys
            instance.all_asset_keys()
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # wipe a, b in order to populate wipe_timestamp
            storage.wipe_asset(AssetKey("a"))
            storage.wipe_asset(AssetKey("b"))

            # materialize all the assets to populate materialization_timestamp
            my_job.execute_in_process(instance=instance)

            # still should not be migrated (on write)
            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # fetching partial results should not trigger migration
            instance.get_asset_keys(prefix=["b"])
            instance.get_asset_keys(cursor=str(AssetKey("b")))
            instance.get_latest_materialization_events(
                asset_keys=[AssetKey("b")])

            assert not storage.has_secondary_index(ASSET_KEY_INDEX_COLS)

            # on read, we should see that all the data has already been migrated and we can now mark
            # the asset key index as migrated
            instance.all_asset_keys()
            assert storage.has_secondary_index(ASSET_KEY_INDEX_COLS)
Beispiel #25
0
def test_backcompat_asset_read():
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")

    # should contain materialization events for asset keys a, b, c, d, e, f
    # events a and b have been wiped, but b has been rematerialized
    def _validate_instance_assets(instance):
        assert instance.all_asset_keys() == [
            AssetKey("b"),
            AssetKey("c"),
            AssetKey("d"),
            AssetKey("e"),
            AssetKey("f"),
        ]
        assert instance.get_asset_keys() == [
            AssetKey("b"),
            AssetKey("c"),
            AssetKey("d"),
            AssetKey("e"),
            AssetKey("f"),
        ]
        assert instance.get_asset_keys(prefix=["d"]) == [AssetKey("d")]
        assert instance.get_asset_keys(limit=3) == [
            AssetKey("b"),
            AssetKey("c"),
            AssetKey("d"),
        ]
        assert instance.get_asset_keys(cursor='["b"]', limit=3) == [
            AssetKey("c"),
            AssetKey("d"),
            AssetKey("e"),
        ]

    @op
    def materialize():
        yield AssetMaterialization(AssetKey("e"))
        yield AssetMaterialization(AssetKey("f"))
        yield Output(None)

    @job
    def my_job():
        materialize()

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            _validate_instance_assets(instance)
            my_job.execute_in_process(instance=instance)
            _validate_instance_assets(instance)
            instance.upgrade()
            _validate_instance_assets(instance)
            my_job.execute_in_process(instance=instance)
            _validate_instance_assets(instance)
            instance.reindex()
            _validate_instance_assets(instance)
            my_job.execute_in_process(instance=instance)
            _validate_instance_assets(instance)
Beispiel #26
0
def test_get_materialization_count_by_partition(asset_aware_context):
    src_dir = file_relative_path(
        __file__, "compat_tests/snapshot_0_11_0_asset_materialization")

    d = AssetKey("c")

    with copy_directory(src_dir) as test_dir:
        with DagsterInstance.from_ref(
                InstanceRef.from_dir(test_dir)) as instance:
            storage = instance.event_log_storage

            materialization_count_by_key = storage.get_materialization_count_by_partition(
                [d])

            assert materialization_count_by_key.get(d) == {}

    a = AssetKey("no_materializations_asset")
    b = AssetKey("no_partitions_asset")
    c = AssetKey("two_partitions_asset")

    @op
    def materialize():
        yield AssetMaterialization(b)
        yield AssetMaterialization(c, partition="a")
        yield Output(None)

    @job
    def my_job():
        materialize()

    @op
    def materialize_two():
        yield AssetMaterialization(c, partition="a")
        yield AssetMaterialization(c, partition="b")
        yield Output(None)

    @job
    def job_two():
        materialize_two()

    with asset_aware_context() as ctx:
        instance, event_log_storage = ctx
        my_job.execute_in_process(instance=instance)

        materialization_count_by_key = event_log_storage.get_materialization_count_by_partition(
            [a, b, c])

        assert materialization_count_by_key.get(a) == {}
        assert materialization_count_by_key.get(b) == {}
        assert materialization_count_by_key.get(c)["a"] == 1
        assert len(materialization_count_by_key.get(c)) == 1

        job_two.execute_in_process(instance=instance)
        materialization_count_by_key = event_log_storage.get_materialization_count_by_partition(
            [a, b, c])
        assert materialization_count_by_key.get(c)["a"] == 2
        assert materialization_count_by_key.get(c)["b"] == 1
Beispiel #27
0
def test_0_8_0_scheduler_migration():
    test_dir = file_relative_path(__file__, 'snapshot_0_8_0_scheduler_change')
    with restore_directory(test_dir):

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))
        with pytest.raises(
            DagsterInstanceMigrationRequired,
            match=_schedule_storage_migration_regex(current_revision='da7cd32b690d'),
        ):
            instance.all_stored_schedule_state()

        instance.upgrade()

        # upgrade just drops tables, and user upgrade flow is cli entry - so
        # emulate by new-ing up instance which will create new tables
        instance = DagsterInstance.from_ref(InstanceRef.from_dir(test_dir))

        instance.all_stored_schedule_state()
Beispiel #28
0
def test_output_manager_with_failure():
    _called_input_manager = False
    _called_solid = False

    @output_manager
    def should_fail(_, _obj):
        raise Failure(
            description="Foolure",
            metadata_entries=[
                EventMetadataEntry.text(label="label", text="text", description="description")
            ],
        )

    @input_manager
    def should_not_enter(_):
        _called_input_manager = True

    @solid(output_defs=[OutputDefinition(manager_key="should_fail")])
    def emit_str(_):
        return "emit"

    @solid(
        input_defs=[
            InputDefinition(name="_input_str", dagster_type=str, manager_key="should_not_enter")
        ]
    )
    def should_not_call(_, _input_str):
        _called_solid = True

    @pipeline(
        mode_defs=[
            ModeDefinition(
                resource_defs={"should_fail": should_fail, "should_not_enter": should_not_enter}
            )
        ]
    )
    def simple():
        should_not_call(emit_str())

    with tempfile.TemporaryDirectory() as tmpdir_path:

        instance = DagsterInstance.from_ref(InstanceRef.from_dir(tmpdir_path))

        result = execute_pipeline(simple, instance=instance, raise_on_error=False)

        assert not result.success

        failure_data = result.result_for_solid("emit_str").failure_data

        assert failure_data.error.cls_name == "Failure"

        assert failure_data.user_failure_data.description == "Foolure"
        assert failure_data.user_failure_data.metadata_entries[0].label == "label"
        assert failure_data.user_failure_data.metadata_entries[0].entry_data.text == "text"
        assert failure_data.user_failure_data.metadata_entries[0].description == "description"

        assert not _called_input_manager and not _called_solid
Beispiel #29
0
def test_0_10_6_add_bulk_actions_table():
    src_dir = file_relative_path(__file__, "snapshot_0_10_6_add_bulk_actions_table/sqlite")
    with copy_directory(src_dir) as test_dir:
        db_path = os.path.join(test_dir, "history", "runs.db")
        assert get_current_alembic_version(db_path) == "0da417ae1b81"
        assert "bulk_actions" not in get_sqlite3_tables(db_path)
        with DagsterInstance.from_ref(InstanceRef.from_dir(test_dir)) as instance:
            instance.upgrade()
            assert "bulk_actions" in get_sqlite3_tables(db_path)
Beispiel #30
0
def test_init_compute_log_with_bad_config_override():
    with seven.TemporaryDirectory() as tmpdir_path:
        with pytest.raises(DagsterInvalidConfigError,
                           match='Undefined field "garbage"'):
            DagsterInstance.from_ref(
                InstanceRef.from_dir(
                    tmpdir_path,
                    overrides={'compute_logs': {
                        'garbage': 'flargh'
                    }}))