Beispiel #1
0
def test_large_backfill(external_repo_context):
    with instance_for_context(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        external_partition_set = external_repo.get_external_partition_set(
            "large_partition_set")
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="simple",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        list(
            execute_backfill_iteration(
                instance, grpc_server_registry,
                get_default_daemon_logger("BackfillDaemon")))

        assert instance.get_runs_count() == 3
Beispiel #2
0
def test_canceled_backfill():
    with instance_for_context(default_repo) as (
            instance,
            workspace,
            external_repo,
    ):
        external_partition_set = external_repo.get_external_partition_set(
            "simple_partition_set")
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="simple",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        iterator = execute_backfill_iteration(
            instance, workspace, get_default_daemon_logger("BackfillDaemon"))
        next(iterator)
        assert instance.get_runs_count() == 1
        backfill = instance.get_backfills()[0]
        assert backfill.status == BulkActionStatus.REQUESTED
        instance.update_backfill(
            backfill.with_status(BulkActionStatus.CANCELED))
        list(iterator)
        backfill = instance.get_backfill(backfill.backfill_id)
        assert backfill.status == BulkActionStatus.CANCELED
        assert instance.get_runs_count() == 1
Beispiel #3
0
def test_unloadable_backfill(external_repo_context):
    with instance_for_context(external_repo_context) as (
            instance,
            workspace,
            _external_repo,
    ):
        unloadable_origin = _unloadable_partition_set_origin()
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="simple",
                partition_set_origin=unloadable_origin,
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        list(
            execute_backfill_iteration(
                instance, workspace,
                get_default_daemon_logger("BackfillDaemon")))

        assert instance.get_runs_count() == 0
        backfill = instance.get_backfill("simple")
        assert backfill.status == BulkActionStatus.FAILED
        assert isinstance(backfill.error, SerializableErrorInfo)
Beispiel #4
0
def test_simple_backfill(external_repo_context):
    with instance_for_context(external_repo_context) as (instance,
                                                         external_repo):
        external_partition_set = external_repo.get_external_partition_set(
            "simple_partition_set")
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="simple",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        list(
            execute_backfill_iteration(
                instance, get_default_daemon_logger("BackfillDaemon")))

        assert instance.get_runs_count() == 3
        runs = instance.get_runs()
        three, two, one = runs
        assert one.tags[BACKFILL_ID_TAG] == "simple"
        assert one.tags[PARTITION_NAME_TAG] == "one"
        assert two.tags[BACKFILL_ID_TAG] == "simple"
        assert two.tags[PARTITION_NAME_TAG] == "two"
        assert three.tags[BACKFILL_ID_TAG] == "simple"
        assert three.tags[PARTITION_NAME_TAG] == "three"
Beispiel #5
0
def test_backfill_from_failure_for_subselection():
    with instance_for_context(default_repo) as (
        instance,
        workspace,
        external_repo,
    ):
        partition = parallel_failure_partition_set.get_partition("one")
        run_config = parallel_failure_partition_set.run_config_for_partition(partition)
        tags = parallel_failure_partition_set.tags_for_partition(partition)
        external_partition_set = external_repo.get_external_partition_set(
            "parallel_failure_partition_set"
        )

        execute_pipeline(
            parallel_failure_pipeline,
            run_config=run_config,
            tags=tags,
            instance=instance,
            solid_selection=["fail_three", "success_four"],
            raise_on_error=False,
        )

        assert instance.get_runs_count() == 1
        wait_for_all_runs_to_finish(instance)
        run = instance.get_runs()[0]
        assert run.status == PipelineRunStatus.FAILURE

        instance.add_backfill(
            PartitionBackfill(
                backfill_id="fromfailure",
                partition_set_origin=external_partition_set.get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one"],
                from_failure=True,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            )
        )

        list(
            execute_backfill_iteration(
                instance, workspace, get_default_daemon_logger("BackfillDaemon")
            )
        )
        assert instance.get_runs_count() == 2
        run = instance.get_runs(limit=1)[0]
        assert run.solids_to_execute
        assert run.solid_selection
        assert len(run.solids_to_execute) == 2
        assert len(run.solid_selection) == 2
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags):
    execution_datetime = pendulum.datetime(
        year=2021,
        month=2,
        day=17,
    ).in_tz("US/Central")
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with pendulum.test(execution_datetime):
                list(
                    execute_backfill_iteration(
                        instance,
                        get_default_daemon_logger("BackfillDaemon"),
                        debug_crash_flags=debug_crash_flags,
                    ))
        finally:
            cleanup_test_instance(instance)
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags):
    execution_datetime = to_timezone(
        create_pendulum_time(
            year=2021,
            month=2,
            day=17,
        ),
        "US/Central",
    )
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with pendulum.test(execution_datetime), ProcessGrpcServerRegistry(
                    wait_for_processes_on_exit=True) as grpc_server_registry:
                list(
                    execute_backfill_iteration(
                        instance,
                        grpc_server_registry,
                        get_default_daemon_logger("BackfillDaemon"),
                        debug_crash_flags=debug_crash_flags,
                    ))
        finally:
            cleanup_test_instance(instance)
Beispiel #8
0
def _test_backfill_in_subprocess(instance_ref, debug_crash_flags):
    execution_datetime = to_timezone(
        create_pendulum_time(
            year=2021,
            month=2,
            day=17,
        ),
        "US/Central",
    )
    with DagsterInstance.from_ref(instance_ref) as instance:
        try:
            with pendulum.test(execution_datetime), create_test_daemon_workspace() as workspace:
                list(
                    execute_backfill_iteration(
                        instance,
                        workspace,
                        get_default_daemon_logger("BackfillDaemon"),
                        debug_crash_flags=debug_crash_flags,
                    )
                )
        finally:
            cleanup_test_instance(instance)
Beispiel #9
0
def test_backfill_from_partitioned_job(external_repo_context):
    partition_name_list = [
        partition.name
        for partition in my_config.partitions_def.get_partitions()
    ]
    with instance_for_context(external_repo_context) as (
            instance,
            workspace,
            external_repo,
    ):
        external_partition_set = external_repo.get_external_partition_set(
            "comp_always_succeed_default_partition_set")
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="partition_schedule_from_job",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=partition_name_list[:3],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        list(
            execute_backfill_iteration(
                instance, workspace,
                get_default_daemon_logger("BackfillDaemon")))

        assert instance.get_runs_count() == 3
        runs = reversed(instance.get_runs())
        for idx, run in enumerate(runs):
            assert run.tags[BACKFILL_ID_TAG] == "partition_schedule_from_job"
            assert run.tags[PARTITION_NAME_TAG] == partition_name_list[idx]
            assert run.tags[
                PARTITION_SET_TAG] == "comp_always_succeed_default_partition_set"
Beispiel #10
0
def test_partial_backfill(external_repo_context):
    with instance_for_context(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        external_partition_set = external_repo.get_external_partition_set(
            "partial_partition_set")

        # create full runs, where every step is executed
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="full",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0
        list(
            execute_backfill_iteration(
                instance, grpc_server_registry,
                get_default_daemon_logger("BackfillDaemon")))
        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 3
        runs = instance.get_runs()
        three, two, one = runs

        assert one.tags[BACKFILL_ID_TAG] == "full"
        assert one.tags[PARTITION_NAME_TAG] == "one"
        assert one.status == PipelineRunStatus.SUCCESS
        assert step_succeeded(instance, one, "step_one")
        assert step_succeeded(instance, one, "step_two")
        assert step_succeeded(instance, one, "step_three")

        assert two.tags[BACKFILL_ID_TAG] == "full"
        assert two.tags[PARTITION_NAME_TAG] == "two"
        assert two.status == PipelineRunStatus.SUCCESS
        assert step_succeeded(instance, two, "step_one")
        assert step_succeeded(instance, two, "step_two")
        assert step_succeeded(instance, two, "step_three")

        assert three.tags[BACKFILL_ID_TAG] == "full"
        assert three.tags[PARTITION_NAME_TAG] == "three"
        assert three.status == PipelineRunStatus.SUCCESS
        assert step_succeeded(instance, three, "step_one")
        assert step_succeeded(instance, three, "step_two")
        assert step_succeeded(instance, three, "step_three")

        # delete one of the runs, the partial reexecution should still succeed because the steps
        # can be executed independently, require no input/output config
        instance.delete_run(one.run_id)
        assert instance.get_runs_count() == 2

        # create partial runs
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="partial",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=["step_one"],
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        list(
            execute_backfill_iteration(
                instance, grpc_server_registry,
                get_default_daemon_logger("BackfillDaemon")))
        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 5
        partial_filter = PipelineRunsFilter(tags={BACKFILL_ID_TAG: "partial"})
        assert instance.get_runs_count(filters=partial_filter) == 3
        runs = instance.get_runs(filters=partial_filter)
        three, two, one = runs

        assert one.status == PipelineRunStatus.SUCCESS
        assert step_succeeded(instance, one, "step_one")
        assert step_did_not_run(instance, one, "step_two")
        assert step_did_not_run(instance, one, "step_three")

        assert two.status == PipelineRunStatus.SUCCESS
        assert step_succeeded(instance, two, "step_one")
        assert step_did_not_run(instance, two, "step_two")
        assert step_did_not_run(instance, two, "step_three")

        assert three.status == PipelineRunStatus.SUCCESS
        assert step_succeeded(instance, three, "step_one")
        assert step_did_not_run(instance, three, "step_two")
        assert step_did_not_run(instance, three, "step_three")
Beispiel #11
0
def test_failure_backfill(external_repo_context):
    output_file = _failure_flag_file()
    with instance_for_context(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        external_partition_set = external_repo.get_external_partition_set(
            "conditionally_fail_partition_set")
        instance.add_backfill(
            PartitionBackfill(
                backfill_id="shouldfail",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=False,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))
        assert instance.get_runs_count() == 0

        try:
            touch_file(output_file)
            list(
                execute_backfill_iteration(
                    instance, grpc_server_registry,
                    get_default_daemon_logger("BackfillDaemon")))
            wait_for_all_runs_to_start(instance)
        finally:
            os.remove(output_file)

        assert instance.get_runs_count() == 3
        runs = instance.get_runs()
        three, two, one = runs
        assert one.tags[BACKFILL_ID_TAG] == "shouldfail"
        assert one.tags[PARTITION_NAME_TAG] == "one"
        assert one.status == PipelineRunStatus.FAILURE
        assert step_succeeded(instance, one, "always_succeed")
        assert step_failed(instance, one, "conditionally_fail")
        assert step_did_not_run(instance, one, "after_failure")

        assert two.tags[BACKFILL_ID_TAG] == "shouldfail"
        assert two.tags[PARTITION_NAME_TAG] == "two"
        assert two.status == PipelineRunStatus.FAILURE
        assert step_succeeded(instance, two, "always_succeed")
        assert step_failed(instance, two, "conditionally_fail")
        assert step_did_not_run(instance, two, "after_failure")

        assert three.tags[BACKFILL_ID_TAG] == "shouldfail"
        assert three.tags[PARTITION_NAME_TAG] == "three"
        assert three.status == PipelineRunStatus.FAILURE
        assert step_succeeded(instance, three, "always_succeed")
        assert step_failed(instance, three, "conditionally_fail")
        assert step_did_not_run(instance, three, "after_failure")

        instance.add_backfill(
            PartitionBackfill(
                backfill_id="fromfailure",
                partition_set_origin=external_partition_set.
                get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one", "two", "three"],
                from_failure=True,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            ))

        assert not os.path.isfile(_failure_flag_file())
        list(
            execute_backfill_iteration(
                instance, grpc_server_registry,
                get_default_daemon_logger("BackfillDaemon")))
        wait_for_all_runs_to_start(instance)

        assert instance.get_runs_count() == 6
        from_failure_filter = PipelineRunsFilter(
            tags={BACKFILL_ID_TAG: "fromfailure"})
        assert instance.get_runs_count(filters=from_failure_filter) == 3

        runs = instance.get_runs(filters=from_failure_filter)
        three, two, one = runs

        assert one.tags[BACKFILL_ID_TAG] == "fromfailure"
        assert one.tags[PARTITION_NAME_TAG] == "one"
        assert one.status == PipelineRunStatus.SUCCESS
        assert step_did_not_run(instance, one, "always_succeed")
        assert step_succeeded(instance, one, "conditionally_fail")
        assert step_succeeded(instance, one, "after_failure")

        assert two.tags[BACKFILL_ID_TAG] == "fromfailure"
        assert two.tags[PARTITION_NAME_TAG] == "two"
        assert two.status == PipelineRunStatus.SUCCESS
        assert step_did_not_run(instance, one, "always_succeed")
        assert step_succeeded(instance, one, "conditionally_fail")
        assert step_succeeded(instance, one, "after_failure")

        assert three.tags[BACKFILL_ID_TAG] == "fromfailure"
        assert three.tags[PARTITION_NAME_TAG] == "three"
        assert three.status == PipelineRunStatus.SUCCESS
        assert step_did_not_run(instance, one, "always_succeed")
        assert step_succeeded(instance, one, "conditionally_fail")
        assert step_succeeded(instance, one, "after_failure")
Beispiel #12
0
 def run_iteration(self, instance, workspace):
     yield from execute_backfill_iteration(instance, workspace, self._logger)
Beispiel #13
0
 def run_iteration(self, instance, daemon_shutdown_event,
                   grpc_server_registry):
     yield from execute_backfill_iteration(instance, grpc_server_registry,
                                           self._logger)
Beispiel #14
0
 def run_iteration(self, instance):
     return execute_backfill_iteration(instance, self._logger)