Beispiel #1
0
def test_backfill_from_failure_for_subselection():
    with instance_for_context(default_repo) as (
        instance,
        workspace,
        external_repo,
    ):
        partition = parallel_failure_partition_set.get_partition("one")
        run_config = parallel_failure_partition_set.run_config_for_partition(partition)
        tags = parallel_failure_partition_set.tags_for_partition(partition)
        external_partition_set = external_repo.get_external_partition_set(
            "parallel_failure_partition_set"
        )

        execute_pipeline(
            parallel_failure_pipeline,
            run_config=run_config,
            tags=tags,
            instance=instance,
            solid_selection=["fail_three", "success_four"],
            raise_on_error=False,
        )

        assert instance.get_runs_count() == 1
        wait_for_all_runs_to_finish(instance)
        run = instance.get_runs()[0]
        assert run.status == PipelineRunStatus.FAILURE

        instance.add_backfill(
            PartitionBackfill(
                backfill_id="fromfailure",
                partition_set_origin=external_partition_set.get_external_origin(),
                status=BulkActionStatus.REQUESTED,
                partition_names=["one"],
                from_failure=True,
                reexecution_steps=None,
                tags=None,
                backfill_timestamp=pendulum.now().timestamp(),
            )
        )

        list(
            execute_backfill_iteration(
                instance, workspace, get_default_daemon_logger("BackfillDaemon")
            )
        )
        assert instance.get_runs_count() == 2
        run = instance.get_runs(limit=1)[0]
        assert run.solids_to_execute
        assert run.solid_selection
        assert len(run.solids_to_execute) == 2
        assert len(run.solid_selection) == 2
Beispiel #2
0
def test_intermediate_storage_def_to_io_manager_def():
    called = {}

    @intermediate_storage()
    def no_config_intermediate_storage(init_context):
        called["ran"] = True
        object_store = InMemoryObjectStore()
        return build_intermediate_storage_from_object_store(
            object_store=object_store, init_context=init_context)

    @solid
    def return_one(_):
        return 1

    @pipeline(mode_defs=[
        ModeDefinition(
            resource_defs={
                "io_manager":
                io_manager_from_intermediate_storage(
                    no_config_intermediate_storage)
            })
    ])
    def foo():
        return_one()

    assert execute_pipeline(foo).success
def test_intermediate_storage_reexecution():
    @solid
    def return_one(_):
        return 1

    @solid
    def plus_one(_, one):
        return one + 1

    @pipeline
    def foo():
        plus_one(return_one())

    run_config = {"intermediate_storage": {"filesystem": {}}}

    instance = DagsterInstance.ephemeral()
    result = execute_pipeline(foo, run_config=run_config, instance=instance)
    assert result.success
    reexecution_result = reexecute_pipeline(foo,
                                            run_config=run_config,
                                            parent_run_id=result.run_id,
                                            instance=instance)
    assert reexecution_result.success

    partial_reexecution_result = reexecute_pipeline(
        foo,
        run_config=run_config,
        step_selection=["plus_one"],
        parent_run_id=result.run_id,
        instance=instance,
    )
    assert partial_reexecution_result.success
Beispiel #4
0
def test_dynamic(gcs_bucket):
    @solid(output_defs=[DynamicOutputDefinition()])
    def numbers(_):
        for i in range(3):
            yield DynamicOutput(i, mapping_key=str(i))

    @solid
    def echo(_, x):
        return x

    @pipeline(mode_defs=[
        ModeDefinition(resource_defs={
            "io_manager": gcs_pickle_io_manager,
            "gcs": gcs_resource
        })
    ])
    def dynamic():
        numbers().map(echo)

    result = execute_pipeline(dynamic,
                              run_config={
                                  "resources": {
                                      "io_manager": {
                                          "config": {
                                              "gcs_bucket": gcs_bucket
                                          }
                                      }
                                  }
                              })
    assert result.success
def test_execute_intervals():
    TestStepHandler.reset()
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(foo_pipline),
            instance=instance,
            run_config={
                "execution": {
                    "test_step_delegating_executor": {
                        "config": {
                            "check_step_health_interval_seconds": 60
                        }
                    }
                }
            },
        )
        TestStepHandler.wait_for_processes()

    assert result.success
    assert TestStepHandler.launch_step_count == 3
    assert TestStepHandler.terminate_step_count == 0
    # pipeline should complete before 60s
    assert TestStepHandler.check_step_health_count == 0

    TestStepHandler.reset()
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(foo_pipline),
            instance=instance,
            run_config={
                "execution": {
                    "test_step_delegating_executor": {
                        "config": {
                            "check_step_health_interval_seconds": 0
                        }
                    }
                }
            },
        )
        TestStepHandler.wait_for_processes()

    assert result.success
    assert TestStepHandler.launch_step_count == 3
    assert TestStepHandler.terminate_step_count == 0
    # every step should get checked at least once
    assert TestStepHandler.check_step_health_count >= 3
def test_intermediate_storage_deprecation_warning():
    @solid
    def return_one(_):
        return 1

    @pipeline
    def foo():
        return_one()

    with assert_no_warnings():
        execute_pipeline(foo)

    with pytest.warns(
        UserWarning,
        match=re.escape(
            "Intermediate Storages are deprecated in 0.10.0 and will be removed in 0.11.0."
        ),
    ):
        execute_pipeline(foo, run_config={"intermediate_storage": {"filesystem": {}}})
Beispiel #7
0
def test_skipping():
    from .test_jobs import define_skpping_job

    TestStepHandler.reset()
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(define_skpping_job),
            instance=instance,
        )
        TestStepHandler.wait_for_processes()

    assert result.success
Beispiel #8
0
def test_dynamic_execute():
    from .test_jobs import define_dynamic_job

    TestStepHandler.reset()
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(define_dynamic_job),
            instance=instance,
        )
        TestStepHandler.wait_for_processes()

    assert result.success
    assert (len([
        e for e in result.event_list
        if e.event_type_value == DagsterEventType.STEP_START.value
    ]) == 11)
Beispiel #9
0
def test_docker_executor():
    """
    Note that this test relies on having AWS credentials in the environment.
    """

    executor_config = {
        "execution": {
            "docker": {
                "config": {
                    "networks": ["container:test-postgres-db-docker"],
                    "env_vars": [
                        "AWS_ACCESS_KEY_ID",
                        "AWS_SECRET_ACCESS_KEY",
                    ],
                }
            }
        }
    }

    docker_image = get_test_project_docker_image()
    if IS_BUILDKITE:
        executor_config["execution"]["docker"]["config"][
            "registry"
        ] = get_buildkite_registry_config()
    else:
        find_local_test_image(docker_image)

    run_config = merge_dicts(
        merge_yamls(
            [
                os.path.join(get_test_project_environments_path(), "env.yaml"),
                os.path.join(get_test_project_environments_path(), "env_s3.yaml"),
            ]
        ),
        executor_config,
    )

    with environ({"DOCKER_LAUNCHER_NETWORK": "container:test-postgres-db-docker"}):
        with docker_postgres_instance() as instance:
            recon_pipeline = get_test_project_recon_pipeline("demo_pipeline_docker", docker_image)
            assert execute_pipeline(
                recon_pipeline, run_config=run_config, instance=instance
            ).success
def test_intermediate_storage_event_message():
    @solid
    def return_one(_):
        return 1

    @solid
    def plus_one(_, one):
        return one + 1

    @pipeline
    def foo():
        plus_one(return_one())

    run_config = {"intermediate_storage": {"filesystem": {}}}

    result = execute_pipeline(foo, run_config=run_config)

    for i in filter(lambda i: i.is_handled_output, result.event_list):
        assert "output manager" not in i.message

    for i in filter(lambda i: i.is_loaded_input, result.event_list):
        assert "input manager" not in i.message
Beispiel #11
0
def test_execute_verify_step():
    TestStepHandler.reset()
    with instance_for_test() as instance:
        result = execute_pipeline(
            reconstructable(foo_pipline_verify_step),
            instance=instance,
            run_config={
                "execution": {
                    "test_step_delegating_executor_verify_step": {
                        "config": {}
                    }
                }
            },
        )
        TestStepHandler.wait_for_processes()

    assert any([
        "Starting execution with step handler TestStepHandler" in event
        for event in result.event_list
    ])
    assert result.success
    assert TestStepHandler.verify_step_count == 3
Beispiel #12
0
def test_launch_once(external_repo_context, capfd):
    freeze_datetime = to_timezone(
        create_pendulum_time(
            year=2019,
            month=2,
            day=27,
            hour=23,
            minute=59,
            second=59,
            tz="UTC",
        ),
        "US/Central",
    )
    with instance_with_sensors(external_repo_context) as (
            instance,
            grpc_server_registry,
            external_repo,
    ):
        with pendulum.test(freeze_datetime):

            external_sensor = external_repo.get_external_sensor(
                "run_key_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            evaluate_sensors(instance, grpc_server_registry)
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SUCCESS,
                expected_run_ids=[run.run_id],
            )

        # run again (after 30 seconds), to ensure that the run key maintains idempotence
        freeze_datetime = freeze_datetime.add(seconds=30)
        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            assert instance.get_runs_count() == 1
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
            captured = capfd.readouterr()
            assert (
                'Skipping 1 run for sensor run_key_sensor already completed with run keys: ["only_once"]'
                in captured.out)

            launched_run = instance.get_runs()[0]

            # Manually create a new run with the same tags
            execute_pipeline(
                the_pipeline,
                run_config=launched_run.run_config,
                tags=launched_run.tags,
                instance=instance,
            )

            # Sensor loop still executes
        freeze_datetime = freeze_datetime.add(seconds=30)
        with pendulum.test(freeze_datetime):
            evaluate_sensors(instance, grpc_server_registry)
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())

            assert len(ticks) == 3
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
Beispiel #13
0
def run_in_prod():
    execute_pipeline(download_pipeline, mode="prod")
Beispiel #14
0
def test_local():
    result = execute_pipeline(download_pipeline, mode="unit_test")
    assert result.success
Beispiel #15
0
def test_launch_once(external_repo_context, capfd):
    freeze_datetime = pendulum.datetime(
        year=2019,
        month=2,
        day=27,
        hour=23,
        minute=59,
        second=59,
    ).in_tz("US/Central")
    with instance_with_sensors(external_repo_context) as (instance,
                                                          external_repo):
        with pendulum.test(freeze_datetime):

            external_sensor = external_repo.get_external_sensor(
                "run_key_sensor")
            instance.add_job_state(
                JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                         JobStatus.RUNNING))
            assert instance.get_runs_count() == 0
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 0

            list(
                execute_sensor_iteration(
                    instance, get_default_daemon_logger("SensorDaemon")))
            wait_for_all_runs_to_start(instance)

            assert instance.get_runs_count() == 1
            run = instance.get_runs()[0]
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 1
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SUCCESS,
                expected_run_ids=[run.run_id],
            )

            # run again, ensure
            list(
                execute_sensor_iteration(
                    instance, get_default_daemon_logger("SensorDaemon")))
            assert instance.get_runs_count() == 1
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())
            assert len(ticks) == 2
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )
            captured = capfd.readouterr()
            assert (
                f"Run {run.run_id} already completed with the run key `only_once` for run_key_sensor"
                in captured.out)

            launched_run = instance.get_runs()[0]

            # Manually create a new run with the same tags
            execute_pipeline(
                the_pipeline,
                run_config=launched_run.run_config,
                tags=launched_run.tags,
                instance=instance,
            )

            # Sensor loop still executes
            list(
                execute_sensor_iteration(
                    instance, get_default_daemon_logger("SensorDaemon")))
            ticks = instance.get_job_ticks(
                external_sensor.get_external_origin_id())

            assert len(ticks) == 3
            validate_tick(
                ticks[0],
                external_sensor,
                freeze_datetime,
                JobTickStatus.SKIPPED,
            )