Esempio n. 1
0
def test_execute_step_verify_step():
    with get_foo_pipeline_handle() as pipeline_handle:
        runner = CliRunner()

        with instance_for_test(
                overrides={
                    "compute_logs": {
                        "module":
                        "dagster.core.storage.noop_compute_log_manager",
                        "class": "NoOpComputeLogManager",
                    }
                }) as instance:
            run = create_run_for_test(
                instance,
                pipeline_name="foo",
                run_id="new_run",
                run_config={"storage": {
                    "filesystem": {}
                }},
            )

            input_json = serialize_dagster_namedtuple(
                ExecuteStepArgs(
                    pipeline_origin=pipeline_handle.get_python_origin(),
                    pipeline_run_id=run.run_id,
                    step_keys_to_execute=None,
                    instance_ref=instance.get_ref(),
                ))

            # Check that verify succeeds for step that has hasn't been fun (case 3)
            retries = Retries.from_config({"enabled": {}})
            assert verify_step(instance,
                               run,
                               retries,
                               step_keys_to_execute=["do_something"])

            # Check that verify fails when trying to retry with no original attempt (case 3)
            retries = Retries.from_config({"enabled": {}})
            retries.mark_attempt("do_something")
            assert not verify_step(
                instance, run, retries, step_keys_to_execute=["do_something"])

            # Test trying to re-run a retry fails verify_step (case 2)
            with mock.patch("dagster.cli.api.get_step_stats_by_key"
                            ) as _step_stats_by_key:
                _step_stats_by_key.return_value = {
                    "do_something":
                    RunStepKeyStatsSnapshot(run_id=run.run_id,
                                            step_key="do_something",
                                            attempts=2)
                }

                retries = Retries.from_config({"enabled": {}})
                retries.mark_attempt("do_something")
                assert not verify_step(instance,
                                       run,
                                       retries,
                                       step_keys_to_execute=["do_something"])

            runner_execute_step(
                runner,
                [input_json],
            )

            # # Check that verify fails for step that has already run (case 1)
            retries = Retries.from_config({"enabled": {}})
            assert not verify_step(
                instance, run, retries, step_keys_to_execute=["do_something"])
Esempio n. 2
0
def test_mock_start_worker(worker_patch):
    with instance_for_test():
        start_worker("dagster_test_worker")
        assert_called(worker_patch)
Esempio n. 3
0
def test_partitions_for_hourly_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):
            start_date = datetime(year=2019, month=1, day=1)

            # You can specify a start date with no timezone and it will be assumed to be
            # in the execution timezone

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
            )
            def hourly_central_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            assert hourly_central_schedule.execution_timezone == "US/Central"

            _check_partitions(
                hourly_central_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                relativedelta(hours=1),
            )

            valid_time = create_pendulum_time(
                year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central"
            )
            context_with_valid_time = build_schedule_context(instance, valid_time)

            execution_data = hourly_central_schedule.get_execution_data(context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=1, day=27, hour=0, tz="US/Central"
                ).isoformat()
            }

            # You can specify a start date in a different timezone and it will be transformed into the
            # execution timezone
            start_date_with_different_timezone = create_pendulum_time(
                2019, 1, 1, 0, tz="US/Pacific"
            )

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
            )
            def hourly_central_schedule_with_timezone_start_time(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_central_schedule_with_timezone_start_time,
                HOURS_UNTIL_FEBRUARY_27 - 2,  # start date is two hours later since it's in PT
                to_timezone(start_date_with_different_timezone, "US/Central"),
                DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                relativedelta(hours=1),
            )

            # test partition_hours_offset=0

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
                partition_hours_offset=0,
            )
            def hourly_schedule_for_current_hour(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            valid_time = create_pendulum_time(
                year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central"
            )
            context_with_valid_time = build_schedule_context(instance, valid_time)

            execution_data = hourly_schedule_for_current_hour.get_execution_data(
                context_with_valid_time
            )
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=1, day=27, hour=1, tz="US/Central"
                ).isoformat()
            }

            # test partition_hours_offset=2

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
                partition_hours_offset=2,
            )
            def hourly_schedule_for_two_hours_ago(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            valid_time = create_pendulum_time(
                year=2019, month=1, day=27, hour=1, minute=25, tz="US/Central"
            )
            context_with_valid_time = build_schedule_context(instance, valid_time)

            execution_data = hourly_schedule_for_two_hours_ago.get_execution_data(
                context_with_valid_time
            )
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time": create_pendulum_time(
                    year=2019, month=1, day=26, hour=23, tz="US/Central"
                ).isoformat()
            }
def test_engine_events(get_external_pipeline, run_config):  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=math_diamond, run_config=run_config)
        run_id = pipeline_run.run_id

        assert instance.get_run_by_id(
            run_id).status == PipelineRunStatus.NOT_STARTED

        with get_external_pipeline(
                pipeline_run.pipeline_name) as external_pipeline:
            instance.launch_run(pipeline_run.run_id, external_pipeline)
            finished_pipeline_run = poll_for_finished_run(instance, run_id)

            assert finished_pipeline_run
            assert finished_pipeline_run.run_id == run_id
            assert finished_pipeline_run.status == PipelineRunStatus.SUCCESS

            poll_for_event(instance,
                           run_id,
                           event_type="ENGINE_EVENT",
                           message="Process for pipeline exited")
            event_records = instance.all_logs(run_id)

            engine_events = _get_engine_events(event_records)

            if _is_multiprocess(run_config):
                messages = [
                    "Started process for pipeline",
                    "Starting initialization of resources",
                    "Finished initialization of resources",
                    "Executing steps using multiprocess executor",
                    "Launching subprocess for return_one",
                    "Executing step return_one in subprocess",
                    "Starting initialization of resources",
                    "Finished initialization of resources",
                    # multiply_by_2 and multiply_by_3 launch and execute in non-deterministic order
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "",
                    "Launching subprocess for add",
                    "Executing step add in subprocess",
                    "Starting initialization of resources",
                    "Finished initialization of resources",
                    "Multiprocess executor: parent process exiting",
                    "Process for pipeline exited",
                ]
            else:
                messages = [
                    "Started process for pipeline",
                    "Starting initialization of resources",
                    "Finished initialization of resources",
                    "Executing steps in process",
                    "Finished steps in process",
                    "Process for pipeline exited",
                ]

            events_iter = iter(engine_events)
            assert len(engine_events) == len(messages)

            for message in messages:
                next_log = next(events_iter)
                assert message in next_log.message
Esempio n. 5
0
def test_partitions_for_hourly_schedule_decorators_with_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                pendulum.create(2019, 2, 27, 0, 1, 1, tz="US/Central")):
            start_date = datetime(year=2019, month=1, day=1)

            # You can specify a start date with no timezone and it will be assumed to be
            # in the execution timezone

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
            )
            def hourly_central_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            assert hourly_central_schedule.execution_timezone == "US/Central"

            _check_partitions(
                hourly_central_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                relativedelta(hours=1),
            )

            valid_time = pendulum.create(year=2019,
                                         month=1,
                                         day=27,
                                         hour=1,
                                         minute=25,
                                         tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance, valid_time)

            assert hourly_central_schedule.get_run_config(
                context_with_valid_time) == {
                    "hourly_time":
                    pendulum.create(year=2019,
                                    month=1,
                                    day=27,
                                    hour=0,
                                    tz="US/Central").isoformat()
                }

            assert hourly_central_schedule.should_execute(
                context_with_valid_time)

            # You can specify a start date in a different timezone and it will be transformed into the
            # execution timezone
            start_date_with_different_timezone = pendulum.create(
                2019, 1, 1, 0, tz="US/Pacific")

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date_with_different_timezone,
                execution_time=time(hour=0, minute=25),
                execution_timezone="US/Central",
            )
            def hourly_central_schedule_with_timezone_start_time(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_central_schedule_with_timezone_start_time,
                HOURS_UNTIL_FEBRUARY_27 -
                2,  # start date is two hours later since it's in PT
                start_date_with_different_timezone.in_tz("US/Central"),
                DEFAULT_HOURLY_FORMAT_WITH_TIMEZONE,
                relativedelta(hours=1),
            )
Esempio n. 6
0
def test_execute_canceled_state():
    def event_callback(_record):
        pass

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        with pytest.raises(DagsterInvariantViolationError):
            execute_run(
                InMemoryPipeline(pipeline_def),
                pipeline_run,
                instance=instance,
            )

        logs = instance.all_logs(pipeline_run.run_id)

        assert len(logs) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in logs[0].message)

        iter_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        iter_events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 iter_run,
                                 instance=instance))

        assert len(iter_events) == 1
        assert (
            "Not starting execution since the run was canceled before execution could start"
            in iter_events[0].message)
Esempio n. 7
0
def test_filesystem_persist_one_run(tmpdir):
    with instance_for_test(temp_dir=str(tmpdir)) as instance:
        do_test_single_write_read(instance)
Esempio n. 8
0
def instance_fixture():
    with instance_for_test() as instance:
        yield instance
Esempio n. 9
0
def test_run_list():
    with instance_for_test():
        runner = CliRunner()
        result = runner.invoke(run_list_command)
        assert result.exit_code == 0
Esempio n. 10
0
def test_memoized_plan_inits_resources_once():
    @solid(output_defs=[OutputDefinition(io_manager_key="foo")], version="foo")
    def foo_solid():
        pass

    @solid(output_defs=[OutputDefinition(io_manager_key="bar")], version="bar")
    def bar_solid():
        pass

    foo_capture = []
    bar_capture = []
    resource_dep_capture = []
    default_capture = []

    @io_manager(required_resource_keys={"my_resource"})
    def foo_manager():
        foo_capture.append("entered")
        return VersionedInMemoryIOManager()

    @io_manager(required_resource_keys={"my_resource"})
    def bar_manager():
        bar_capture.append("entered")
        return VersionedInMemoryIOManager()

    @io_manager
    def default_manager():
        default_capture.append("entered")
        return VersionedInMemoryIOManager()

    @resource
    def my_resource():
        resource_dep_capture.append("entered")
        return None

    @pipeline(
        mode_defs=[
            ModeDefinition(
                name="fakemode",
                resource_defs={
                    "foo": foo_manager,
                    "bar": bar_manager,
                    "my_resource": my_resource,
                    "io_manager": default_manager,
                },
            ),
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def wrap_pipeline():
        foo_solid()
        foo_solid.alias("another_foo")()
        bar_solid()
        bar_solid.alias("another_bar")()

    with instance_for_test() as instance:
        create_execution_plan(wrap_pipeline, instance_ref=instance.get_ref())

    assert len(foo_capture) == 1
    assert len(bar_capture) == 1
    assert len(resource_dep_capture) == 1
    assert len(default_capture) == 0
Esempio n. 11
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventLogEntry)
        records.append(record)

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={"loggers": {
                "callback": {}
            }},
            mode="default",
        )

        iterator = execute_run_iterator(InMemoryPipeline(pipeline_def),
                                        pipeline_run,
                                        instance=instance)

        event_type = None
        while event_type != "STEP_START":
            event = next(iterator)
            event_type = event.event_type_value

        iterator.close()
        events = [
            record.dagster_event for record in records
            if record.is_dagster_event
        ]
        messages = [
            record.user_message for record in records
            if not record.is_dagster_event
        ]
        pipeline_failure_events = [
            event for event in events if event.is_pipeline_failure
        ]
        assert len(pipeline_failure_events) == 1
        assert "GeneratorExit" in pipeline_failure_events[
            0].pipeline_failure_data.error.message
        assert len(
            [message for message in messages if message == "CLEANING A"]) > 0
        assert len(
            [message for message in messages if message == "CLEANING B"]) > 0

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.SUCCESS)

        with pytest.raises(
                check.CheckError,
                match=r"Pipeline run basic_resource_pipeline \({}\) in state"
                r" PipelineRunStatus.SUCCESS, expected NOT_STARTED or STARTING"
                .format(pipeline_run.run_id),
        ):
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 pipeline_run,
                                 instance=instance)

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 pipeline_run,
                                 instance=instance))

        assert len(events) == 1
        assert (
            events[0].message ==
            "Not starting execution since the run was canceled before execution could start"
        )
Esempio n. 12
0
def test_memoized_plan_affected_by_resource_config():
    @solid(required_resource_keys={"my_resource"}, version="39")
    def solid_reqs_resource():
        pass

    @resource(version="42", config_schema={"foo": str})
    def basic():
        pass

    manager = VersionedInMemoryIOManager()

    @pipeline(
        mode_defs=[
            ModeDefinition(resource_defs={
                "my_resource":
                basic,
                "io_manager":
                IOManagerDefinition.hardcoded_io_manager(manager),
            }, )
        ],
        tags={MEMOIZED_RUN_TAG: "true"},
    )
    def my_pipeline():
        solid_reqs_resource()

    with instance_for_test() as instance:
        my_resource_config = {"foo": "bar"}
        run_config = {
            "resources": {
                "my_resource": {
                    "config": my_resource_config
                }
            }
        }

        unmemoized_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref())

        assert unmemoized_plan.step_keys_to_execute == ["solid_reqs_resource"]

        step_output_handle = StepOutputHandle("solid_reqs_resource", "result")
        version = unmemoized_plan.get_version_for_step_output_handle(
            step_output_handle)

        manager.values[step_output_handle.step_key,
                       step_output_handle.output_name, version] = 5

        memoized_plan = create_execution_plan(my_pipeline,
                                              run_config=run_config,
                                              instance_ref=instance.get_ref())

        assert len(memoized_plan.step_keys_to_execute) == 0

        my_resource_config["foo"] = "baz"

        changed_config_plan = create_execution_plan(
            my_pipeline,
            run_config=run_config,
            instance_ref=instance.get_ref())

        assert changed_config_plan.step_keys_to_execute == [
            "solid_reqs_resource"
        ]
Esempio n. 13
0
def test_template_task_dag():
    dag = DAG(
        dag_id="dag",
        default_args=default_args,
        schedule_interval=None,
    )

    t1 = BashOperator(
        task_id="print_hello",
        bash_command="echo hello dagsir",
        dag=dag,
    )

    t2 = BashOperator(
        task_id="sleep",
        bash_command="sleep 2",
        dag=dag,
    )

    templated_command = """
    {% for i in range(5) %}
        echo '{{ ds }}'
        echo '{{ macros.ds_add(ds, 7)}}'
        echo '{{ params.my_param }}'
    {% endfor %}
    """

    t3 = BashOperator(
        task_id="templated",
        depends_on_past=False,
        bash_command=templated_command,
        params={"my_param": "Parameter I passed in"},
        dag=dag,
    )

    # pylint: disable=pointless-statement
    t1 >> [t2, t3]

    with instance_for_test() as instance:
        manager = instance.compute_log_manager

        execution_date = get_current_datetime_in_utc()
        execution_date_add_one_week = execution_date + datetime.timedelta(
            days=7)
        execution_date_iso = execution_date.strftime("%Y-%m-%d")
        execution_date_add_one_week_iso = execution_date_add_one_week.strftime(
            "%Y-%m-%d")

        result = execute_pipeline(
            make_dagster_pipeline_from_airflow_dag(
                dag=dag, tags={AIRFLOW_EXECUTION_DATE_STR:
                               execution_date_iso}),
            instance=instance,
        )

        compute_steps = [
            event.step_key for event in result.step_event_list
            if event.event_type == DagsterEventType.STEP_START
        ]

        assert compute_steps == [
            "airflow_print_hello",
            "airflow_sleep",
            "airflow_templated",
        ]

        for step_key in compute_steps:
            compute_io_path = manager.get_local_path(result.run_id, step_key,
                                                     ComputeIOType.STDOUT)
            assert os.path.exists(compute_io_path)
            stdout_file = open(compute_io_path, "r")
            file_contents = normalize_file_content(stdout_file.read())
            stdout_file.close()

            if step_key == "airflow_print_hello":
                assert file_contents.count(
                    "INFO - Running command: echo hello dagsir\n") == 1
                assert file_contents.count(
                    "INFO - Command exited with return code 0") == 1

            elif step_key == "airflow_sleep":
                assert file_contents.count(
                    "INFO - Running command: sleep 2\n") == 1
                assert file_contents.count("INFO - Output:\n") == 1
                assert file_contents.count(
                    "INFO - Command exited with return code 0") == 1

            elif step_key == "airflow_templated":
                assert (file_contents.count(
                    "INFO - Running command: \n    \n        "
                    "echo '{execution_date_iso}'\n        "
                    "echo '{execution_date_add_one_week_iso}'\n        "
                    "echo 'Parameter I passed in'\n    \n        "
                    "echo '{execution_date_iso}'\n        "
                    "echo '{execution_date_add_one_week_iso}'\n        "
                    "echo 'Parameter I passed in'\n    \n        "
                    "echo '{execution_date_iso}'\n        "
                    "echo '{execution_date_add_one_week_iso}'\n        "
                    "echo 'Parameter I passed in'\n    \n        "
                    "echo '{execution_date_iso}'\n        "
                    "echo '{execution_date_add_one_week_iso}'\n        "
                    "echo 'Parameter I passed in'\n    \n        "
                    "echo '{execution_date_iso}'\n        "
                    "echo '{execution_date_add_one_week_iso}'\n        "
                    "echo 'Parameter I passed in'\n    \n    \n".format(
                        execution_date_iso=execution_date_iso,
                        execution_date_add_one_week_iso=
                        execution_date_add_one_week_iso,
                    )) == 1)
                assert (file_contents.count(
                    "INFO - {execution_date_iso}\n".format(
                        execution_date_iso=execution_date_iso)) == 5)
                assert (file_contents.count(
                    "INFO - {execution_date_add_one_week_iso}\n".format(
                        execution_date_add_one_week_iso=
                        execution_date_add_one_week_iso)) == 5)
                assert file_contents.count(
                    "INFO - Parameter I passed in\n") == 5
                assert file_contents.count(
                    "INFO - Command exited with return code 0") == 1
Esempio n. 14
0
def test_terminated_run(get_external_pipeline, in_process):  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=sleepy_pipeline, run_config=None)

        with get_external_pipeline(
                pipeline_run.pipeline_name) as external_pipeline:
            run_id = pipeline_run.run_id

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            launcher = instance.run_launcher
            launcher.launch_run(instance, pipeline_run, external_pipeline)

            poll_for_step_start(instance, run_id)

            assert launcher.can_terminate(run_id)
            assert launcher.terminate(run_id)

            terminated_pipeline_run = poll_for_finished_run(instance,
                                                            run_id,
                                                            timeout=30)
            terminated_pipeline_run = instance.get_run_by_id(run_id)
            assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE

            poll_for_event(instance,
                           run_id,
                           event_type="ENGINE_EVENT",
                           message="Process for pipeline exited")
            poll_for_event(
                instance,
                run_id,
                event_type="ENGINE_EVENT",
                message="Received pipeline termination request",
            )

            run_logs = instance.all_logs(run_id)
            event_types = [
                event.dagster_event.event_type_value for event in run_logs
            ]

            if in_process:
                poll_for_event(
                    instance,
                    run_id,
                    event_type="ENGINE_EVENT",
                    message="Pipeline was terminated successfully",
                )
                assert event_types == [
                    "ENGINE_EVENT",
                    "ENGINE_EVENT",
                    "PIPELINE_START",
                    "ENGINE_EVENT",
                    "STEP_START",
                    "ENGINE_EVENT",
                    "STEP_FAILURE",
                    "PIPELINE_FAILURE",
                    "ENGINE_EVENT",
                    "ENGINE_EVENT",
                ]
            else:
                poll_for_event(
                    instance,
                    run_id,
                    event_type="ENGINE_EVENT",
                    message="Pipeline execution terminated by interrupt",
                )
                assert event_types == [
                    "ENGINE_EVENT",
                    "PIPELINE_START",
                    "ENGINE_EVENT",
                    "STEP_START",
                    "ENGINE_EVENT",
                    "STEP_FAILURE",
                    "PIPELINE_FAILURE",
                    "ENGINE_EVENT",
                    "ENGINE_EVENT",
                ]
Esempio n. 15
0
def test_filtered_runs():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        run_id_1 = execute_pipeline(repo.get_pipeline("foo_pipeline"),
                                    instance=instance,
                                    tags={
                                        "run": "one"
                                    }).run_id
        run_id_2 = execute_pipeline(repo.get_pipeline("foo_pipeline"),
                                    instance=instance,
                                    tags={
                                        "run": "two"
                                    }).run_id
        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context:
            result = execute_dagster_graphql(
                context,
                FILTERED_RUN_QUERY,
                variables={"filter": {
                    "runIds": [run_id_1]
                }})
            assert result.data
            run_ids = [
                run["runId"]
                for run in result.data["pipelineRunsOrError"]["results"]
            ]
            assert len(run_ids) == 1
            assert run_ids[0] == run_id_1

            result = execute_dagster_graphql(
                context,
                FILTERED_RUN_QUERY,
                variables={
                    "filter": {
                        "tags": [{
                            "key": "run",
                            "value": "one"
                        }]
                    }
                },
            )
            assert result.data
            run_ids = [
                run["runId"]
                for run in result.data["pipelineRunsOrError"]["results"]
            ]
            assert len(run_ids) == 1
            assert run_ids[0] == run_id_1

            # test multiple run ids
            result = execute_dagster_graphql(
                context,
                FILTERED_RUN_QUERY,
                variables={"filter": {
                    "runIds": [run_id_1, run_id_2]
                }})
            assert result.data
            run_ids = [
                run["runId"]
                for run in result.data["pipelineRunsOrError"]["results"]
            ]
            assert len(run_ids) == 2
            assert set(run_ids) == set([run_id_1, run_id_2])
Esempio n. 16
0
def test_run_wipe_incorrect_delete_message():
    with instance_for_test():
        runner = CliRunner()
        result = runner.invoke(run_wipe_command, input="WRONG\n")
        assert "Exiting without deleting all run history and event logs" in result.output
        assert result.exit_code == 0
Esempio n. 17
0
def test_execute_mode_command():
    runner = CliRunner()

    with instance_for_test():
        add_result = runner_pipeline_execute(
            runner,
            [
                "-f",
                file_relative_path(__file__, "../../general_tests/test_repository.py"),
                "-a",
                "dagster_test_repository",
                "--config",
                file_relative_path(
                    __file__, "../../environments/multi_mode_with_resources/add_mode.yaml"
                ),
                "--mode",
                "add_mode",
                "-p",
                "multi_mode_with_resources",  # pipeline name
            ],
        )

        assert add_result

        mult_result = runner_pipeline_execute(
            runner,
            [
                "-f",
                file_relative_path(__file__, "../../general_tests/test_repository.py"),
                "-a",
                "dagster_test_repository",
                "--config",
                file_relative_path(
                    __file__, "../../environments/multi_mode_with_resources/mult_mode.yaml"
                ),
                "--mode",
                "mult_mode",
                "-p",
                "multi_mode_with_resources",  # pipeline name
            ],
        )

        assert mult_result

        double_adder_result = runner_pipeline_execute(
            runner,
            [
                "-f",
                file_relative_path(__file__, "../../general_tests/test_repository.py"),
                "-a",
                "dagster_test_repository",
                "--config",
                file_relative_path(
                    __file__, "../../environments/multi_mode_with_resources/double_adder_mode.yaml"
                ),
                "--mode",
                "double_adder_mode",
                "-p",
                "multi_mode_with_resources",  # pipeline name
            ],
        )

        assert double_adder_result
Esempio n. 18
0
def test_grpc_default_settings():
    with instance_for_test() as instance:
        assert (
            instance.code_server_process_startup_timeout
            == DEFAULT_LOCAL_CODE_SERVER_STARTUP_TIMEOUT
        )
Esempio n. 19
0
def test_execute_run_iterator():
    records = []

    def event_callback(record):
        assert isinstance(record, EventLogEntry)
        records.append(record)

    with instance_for_test() as instance:
        pipeline_def = PipelineDefinition(
            name="basic_resource_pipeline",
            solid_defs=[resource_solid],
            mode_defs=[
                ModeDefinition(
                    resource_defs={
                        "a": resource_a,
                        "b": resource_b
                    },
                    logger_defs={
                        "callback": construct_event_logger(event_callback)
                    },
                )
            ],
        )
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={"loggers": {
                "callback": {}
            }},
            mode="default",
        )

        iterator = execute_run_iterator(InMemoryPipeline(pipeline_def),
                                        pipeline_run,
                                        instance=instance)

        event_type = None
        while event_type != "STEP_START":
            event = next(iterator)
            event_type = event.event_type_value

        iterator.close()
        events = [
            record.dagster_event for record in records
            if record.is_dagster_event
        ]
        messages = [
            record.user_message for record in records
            if not record.is_dagster_event
        ]
        pipeline_failure_events = [
            event for event in events if event.is_pipeline_failure
        ]
        assert len(pipeline_failure_events) == 1
        assert "GeneratorExit" in pipeline_failure_events[
            0].pipeline_failure_data.error.message
        assert len(
            [message for message in messages if message == "CLEANING A"]) > 0
        assert len(
            [message for message in messages if message == "CLEANING B"]) > 0

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.SUCCESS)

        with pytest.raises(
                Exception,
                match=r"basic_resource_pipeline \({}\) started a new "
                r"run while the run was already in state DagsterRunStatus.SUCCESS."
                .format(pipeline_run.run_id),
        ):
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 pipeline_run,
                                 instance=instance)

        with instance_for_test(
                overrides={
                    "run_launcher": {
                        "module":
                        "dagster_tests.daemon_tests.test_monitoring_daemon",
                        "class": "TestRunLauncher",
                    },
                    "run_monitoring": {
                        "enabled": True
                    },
                }) as run_monitoring_instance:
            event = next(
                execute_run_iterator(InMemoryPipeline(pipeline_def),
                                     pipeline_run,
                                     instance=run_monitoring_instance))
            assert (
                "Ignoring a duplicate run that was started from somewhere other than the run monitor daemon"
                in event.message)

            with pytest.raises(
                    check.CheckError,
                    match=
                    r"in state DagsterRunStatus.SUCCESS, expected STARTED or STARTING "
                    r"because it's resuming from a run worker failure",
            ):
                execute_run_iterator(
                    InMemoryPipeline(pipeline_def),
                    pipeline_run,
                    instance=run_monitoring_instance,
                    resume_from_failure=True,
                )

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=pipeline_def,
            run_config={
                "loggers": {
                    "callback": {}
                }
            },
            mode="default",
        ).with_status(PipelineRunStatus.CANCELED)

        events = list(
            execute_run_iterator(InMemoryPipeline(pipeline_def),
                                 pipeline_run,
                                 instance=instance))

        assert len(events) == 1
        assert (
            events[0].message ==
            "Not starting execution since the run was canceled before execution could start"
        )
Esempio n. 20
0
def test_grpc_override_settings():
    with instance_for_test(overrides={"code_servers": {"local_startup_timeout": 60}}) as instance:
        assert instance.code_server_process_startup_timeout == 60
def test_terminated_run(get_external_pipeline, run_config):  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=sleepy_pipeline,
            run_config=run_config,
        )

        with get_external_pipeline(
                pipeline_run.pipeline_name) as external_pipeline:
            run_id = pipeline_run.run_id

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            instance.launch_run(pipeline_run.run_id, external_pipeline)

            poll_for_step_start(instance, run_id)

            launcher = instance.run_launcher
            assert launcher.can_terminate(run_id)
            assert launcher.terminate(run_id)

            terminated_pipeline_run = poll_for_finished_run(instance,
                                                            run_id,
                                                            timeout=30)
            terminated_pipeline_run = instance.get_run_by_id(run_id)
            assert terminated_pipeline_run.status == PipelineRunStatus.CANCELED

            poll_for_event(
                instance,
                run_id,
                event_type="ENGINE_EVENT",
                message="Process for pipeline exited",
            )

            run_logs = instance.all_logs(run_id)

            if _is_multiprocess(run_config):
                _check_event_log_contains(
                    run_logs,
                    [
                        ("PIPELINE_CANCELING",
                         "Sending pipeline termination request."),
                        (
                            "ENGINE_EVENT",
                            "Multiprocess executor: received termination signal - forwarding to active child process",
                        ),
                        (
                            "ENGINE_EVENT",
                            "Multiprocess executor: interrupted all active child processes",
                        ),
                        ("STEP_FAILURE",
                         'Execution of step "sleepy_solid" failed.'),
                        (
                            "PIPELINE_CANCELED",
                            'Execution of pipeline "sleepy_pipeline" canceled.',
                        ),
                        ("ENGINE_EVENT", "Process for pipeline exited"),
                    ],
                )
            else:
                _check_event_log_contains(
                    run_logs,
                    [
                        ("PIPELINE_CANCELING",
                         "Sending pipeline termination request."),
                        ("STEP_FAILURE",
                         'Execution of step "sleepy_solid" failed.'),
                        (
                            "PIPELINE_CANCELED",
                            'Execution of pipeline "sleepy_pipeline" canceled.',
                        ),
                        ("ENGINE_EVENT",
                         "Pipeline execution terminated by interrupt"),
                        ("ENGINE_EVENT", "Process for pipeline exited"),
                    ],
                )
Esempio n. 22
0
def test_partitions_for_hourly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):

            context_without_time = ScheduleExecutionContext(instance, None)

            start_date = datetime(year=2019, month=1, day=1)

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
            )
            def hourly_foo_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_foo_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
                relativedelta(hours=1),
            )

            execution_data = hourly_foo_schedule.get_execution_data(
                context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=2,
                                     day=26,
                                     hour=23,
                                     tz="US/Central").isoformat()
            }

            # time that's invalid since it corresponds to a partition before the start date
            # should not execute and should yield a SkipReason if it tries to generate run config
            execution_time_with_invalid_partition = create_pendulum_time(
                year=2018,
                month=12,
                day=30,
                hour=3,
                minute=25,
                tz="US/Central")
            context_with_invalid_time = ScheduleExecutionContext(
                instance, execution_time_with_invalid_partition)

            execution_data = hourly_foo_schedule.get_execution_data(
                context_with_invalid_time)

            assert len(execution_data) == 1
            skip_data = execution_data[0]
            assert isinstance(skip_data, SkipReason)
            assert (
                "Partition selector did not return a partition. "
                "Make sure that the timezone on your partition set matches your execution timezone."
                in skip_data.skip_message)

            valid_time = create_pendulum_time(year=2019,
                                              month=1,
                                              day=27,
                                              hour=1,
                                              minute=25,
                                              tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance, valid_time)

            execution_data = hourly_foo_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "hourly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=27,
                                     hour=0,
                                     tz="US/Central").isoformat()
            }
Esempio n. 23
0
def test_partitions_for_hourly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                pendulum.create(2019, 2, 27, 0, 1, 1, tz="US/Central")):

            context_without_time = ScheduleExecutionContext(instance, None)

            start_date = datetime(year=2019, month=1, day=1)

            @hourly_schedule(
                pipeline_name="foo_pipeline",
                start_date=start_date,
                execution_time=time(hour=0, minute=25),
            )
            def hourly_foo_schedule(hourly_time):
                return {"hourly_time": hourly_time.isoformat()}

            _check_partitions(
                hourly_foo_schedule,
                HOURS_UNTIL_FEBRUARY_27,
                start_date,
                DEFAULT_HOURLY_FORMAT_WITHOUT_TIMEZONE,
                relativedelta(hours=1),
            )

            assert hourly_foo_schedule.get_run_config(
                context_without_time) == {
                    "hourly_time":
                    pendulum.create(year=2019,
                                    month=2,
                                    day=26,
                                    hour=23,
                                    tz="US/Central").isoformat()
                }
            assert hourly_foo_schedule.should_execute(context_without_time)

            # time that's invalid since it corresponds to a partition that hasn't happened yet
            # should not execute and should throw if it tries to generate run config
            execution_time_with_invalid_partition = datetime(year=2019,
                                                             month=2,
                                                             day=27,
                                                             hour=3,
                                                             minute=25)
            context_with_invalid_time = ScheduleExecutionContext(
                instance, execution_time_with_invalid_partition)

            assert not hourly_foo_schedule.should_execute(
                context_with_invalid_time)

            with pytest.raises(
                    DagsterInvariantViolationError,
                    match=
                    "The partition selection function `default_partition_selector` did not return a partition from PartitionSet hourly_foo_schedule_partitions",
            ):
                hourly_foo_schedule.get_run_config(context_with_invalid_time)

            valid_time = datetime(year=2019,
                                  month=1,
                                  day=27,
                                  hour=1,
                                  minute=25)
            context_with_valid_time = ScheduleExecutionContext(
                instance, valid_time)

            assert hourly_foo_schedule.get_run_config(
                context_with_valid_time) == {
                    "hourly_time":
                    pendulum.create(year=2019,
                                    month=1,
                                    day=27,
                                    hour=0,
                                    tz="US/Central").isoformat()
                }

            assert hourly_foo_schedule.should_execute(context_with_valid_time)
Esempio n. 24
0
def test_partitions_for_weekly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
                create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="US/Central")):
            context_without_time = ScheduleExecutionContext(instance, None)

            start_date = datetime(year=2019, month=1, day=1)

            @weekly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_week=3,
                start_date=start_date,
                execution_time=time(9, 30),
            )
            def weekly_foo_schedule(weekly_time):
                return {"weekly_time": weekly_time.isoformat()}

            valid_weekly_time = create_pendulum_time(year=2019,
                                                     month=1,
                                                     day=30,
                                                     hour=9,
                                                     minute=30,
                                                     tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance, valid_weekly_time)

            execution_data = weekly_foo_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "weekly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=22,
                                     tz="US/Central").isoformat()
            }

            execution_data = weekly_foo_schedule.get_execution_data(
                context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "weekly_time":
                create_pendulum_time(year=2019,
                                     month=2,
                                     day=19,
                                     tz="US/Central").isoformat()
            }

            _check_partitions(
                weekly_foo_schedule,
                8,
                pendulum.instance(start_date, tz="US/Central"),
                DEFAULT_DATE_FORMAT,
                relativedelta(weeks=1),
            )

            # Test partition_weeks_offset=0

            @weekly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_week=3,
                start_date=start_date,
                execution_time=time(9, 30),
                partition_weeks_offset=0,
            )
            def weekly_foo_same_week_schedule(weekly_time):
                return {"weekly_time": weekly_time.isoformat()}

            valid_weekly_time = create_pendulum_time(year=2019,
                                                     month=1,
                                                     day=30,
                                                     hour=9,
                                                     minute=30,
                                                     tz="US/Central")
            context_with_valid_time = ScheduleExecutionContext(
                instance, valid_weekly_time)

            execution_data = weekly_foo_same_week_schedule.get_execution_data(
                context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "weekly_time":
                create_pendulum_time(year=2019,
                                     month=1,
                                     day=29,
                                     tz="US/Central").isoformat()
            }
def test_terminated_run(get_external_pipeline, run_config):  # pylint: disable=redefined-outer-name
    with instance_for_test() as instance:
        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=sleepy_pipeline,
            run_config=run_config,
        )

        with get_external_pipeline(
                pipeline_run.pipeline_name) as external_pipeline:
            run_id = pipeline_run.run_id

            assert instance.get_run_by_id(
                run_id).status == PipelineRunStatus.NOT_STARTED

            launcher = instance.run_launcher
            launcher.launch_run(instance, pipeline_run, external_pipeline)

            poll_for_step_start(instance, run_id)

            assert launcher.can_terminate(run_id)
            assert launcher.terminate(run_id)

            terminated_pipeline_run = poll_for_finished_run(instance,
                                                            run_id,
                                                            timeout=30)
            terminated_pipeline_run = instance.get_run_by_id(run_id)
            assert terminated_pipeline_run.status == PipelineRunStatus.FAILURE

            poll_for_event(
                instance,
                run_id,
                event_type="ENGINE_EVENT",
                message="Process for pipeline exited",
            )

            run_logs = instance.all_logs(run_id)

            if _is_multiprocess(run_config):
                _check_event_log(
                    run_logs,
                    [
                        ("ENGINE_EVENT", "Started process for pipeline"),
                        ("PIPELINE_START",
                         'Started execution of pipeline "sleepy_pipeline".'),
                        ("ENGINE_EVENT",
                         "Executing steps using multiprocess executor"),
                        ("ENGINE_EVENT",
                         "Launching subprocess for sleepy_solid.compute"),
                        ("ENGINE_EVENT",
                         "Executing step sleepy_solid.compute in subprocess"),
                        ("STEP_START",
                         'Started execution of step "sleepy_solid.compute".'),
                        ("ENGINE_EVENT",
                         "Received pipeline termination request"),
                        (
                            "ENGINE_EVENT",
                            "Multiprocess executor: received termination signal - forwarding to active child process",
                        ),
                        ("STEP_FAILURE",
                         'Execution of step "sleepy_solid.compute" failed.'),
                        (
                            "PIPELINE_FAILURE",
                            'Execution of pipeline "sleepy_pipeline" failed. An exception was thrown during execution.',
                        ),
                        ("ENGINE_EVENT", "Process for pipeline exited"),
                    ],
                )
            else:
                _check_event_log(
                    run_logs,
                    [
                        ("ENGINE_EVENT", "Started process for pipeline"),
                        ("PIPELINE_START",
                         'Started execution of pipeline "sleepy_pipeline".'),
                        ("ENGINE_EVENT", "Executing steps in process"),
                        ("STEP_START",
                         'Started execution of step "sleepy_solid.compute".'),
                        ("ENGINE_EVENT",
                         "Received pipeline termination request"),
                        ("STEP_FAILURE",
                         'Execution of step "sleepy_solid.compute" failed.'),
                        ("PIPELINE_FAILURE",
                         'Execution of pipeline "sleepy_pipeline" failed.'),
                        ("ENGINE_EVENT",
                         "Pipeline execution terminated by interrupt"),
                        ("ENGINE_EVENT", "Process for pipeline exited"),
                    ],
                )
Esempio n. 26
0
def test_list_command_cli():
    with instance_for_test():

        runner = CliRunner()

        result = runner.invoke(
            pipeline_list_command,
            ["-f", file_relative_path(__file__, "test_cli_commands.py"), "-a", "bar"],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            [
                "-f",
                file_relative_path(__file__, "test_cli_commands.py"),
                "-a",
                "bar",
                "-d",
                os.path.dirname(__file__),
            ],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands", "-a", "bar"],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command, ["-w", file_relative_path(__file__, "workspace.yaml")]
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            [
                "-w",
                file_relative_path(__file__, "workspace.yaml"),
                "-w",
                file_relative_path(__file__, "override.yaml"),
            ],
        )
        assert_correct_extra_repository_output(result)

        result = runner.invoke(
            pipeline_list_command,
            [
                "-f",
                "foo.py",
                "-m",
                "dagster_tests.cli_tests.command_tests.test_cli_commands",
                "-a",
                "bar",
            ],
        )
        assert result.exit_code == 2

        result = runner.invoke(
            pipeline_list_command,
            ["-m", "dagster_tests.cli_tests.command_tests.test_cli_commands"],
        )
        assert_correct_bar_repository_output(result)

        result = runner.invoke(
            pipeline_list_command, ["-f", file_relative_path(__file__, "test_cli_commands.py")]
        )
        assert_correct_bar_repository_output(result)
Esempio n. 27
0
def test_start_mock_worker_config_from_yaml(worker_patch):
    with instance_for_test():
        args = ["-y", file_relative_path(__file__, "engine_config.yaml")]
        start_worker("dagster_test_worker", args=args)
        assert_called(worker_patch)
Esempio n. 28
0
def test_run_groups_over_time():
    with instance_for_test() as instance:
        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_A"}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_B"}),
            instance=instance,
        ).run_id

        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context_at_time_1:

            result = execute_dagster_graphql(context_at_time_1,
                                             ALL_RUN_GROUPS_QUERY)
            assert result.data
            assert "runGroupsOrError" in result.data
            assert "results" in result.data["runGroupsOrError"]
            assert len(result.data["runGroupsOrError"]["results"]) == 4

            t1_runs = {
                run["runId"]: run
                for group in result.data["runGroupsOrError"]["results"]
                for run in group["runs"]
            }

            # test full_evolve_run_id
            assert t1_runs[full_evolve_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": None,
            }

            # test foo_run_id
            assert t1_runs[foo_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "foo_pipeline",
                "solidSelection": None,
            }

            # test evolve_a_run_id
            assert t1_runs[evolve_a_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_A"],
            }
            assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"]

            # test evolve_b_run_id
            assert t1_runs[evolve_b_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_B"],
            }

        with define_out_of_process_context(__file__, "get_repo_at_time_2",
                                           instance) as context_at_time_2:
            result = execute_dagster_graphql(context_at_time_2,
                                             ALL_RUN_GROUPS_QUERY)
            assert "runGroupsOrError" in result.data
            assert "results" in result.data["runGroupsOrError"]
            assert len(result.data["runGroupsOrError"]["results"]) == 4

            t2_runs = {
                run["runId"]: run
                for group in result.data["runGroupsOrError"]["results"]
                for run in group["runs"]
            }

            # test full_evolve_run_id
            assert t2_runs[full_evolve_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": None,
            }

            # test evolve_a_run_id
            assert t2_runs[evolve_a_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_A"],
            }
            assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"]

            # names same
            assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] ==
                    t2_runs[evolve_a_run_id]["pipeline"]["name"])

            # snapshots differ
            assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] !=
                    t2_runs[evolve_a_run_id]["pipelineSnapshotId"])

            # pipeline name changed
            assert t2_runs[foo_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "foo_pipeline",
                "solidSelection": None,
            }
            # subset no longer valid - b renamed
            assert t2_runs[evolve_b_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_B"],
            }
Esempio n. 29
0
def test_partitions_for_monthly_schedule_decorators_without_timezone():
    with instance_for_test() as instance:
        with pendulum.test(
            to_timezone(create_pendulum_time(2019, 2, 27, 0, 1, 1, tz="UTC"), "US/Eastern")
        ):
            context_without_time = build_schedule_context(instance)

            start_date = datetime(year=2019, month=1, day=1)

            @monthly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_month=3,
                start_date=start_date,
                execution_time=time(9, 30),
            )
            def monthly_foo_schedule(monthly_time):
                return {"monthly_time": monthly_time.isoformat()}

            valid_monthly_time = create_pendulum_time(
                year=2019, month=2, day=3, hour=9, minute=30, tz="UTC"
            )
            context_with_valid_time = build_schedule_context(instance, valid_monthly_time)

            execution_data = monthly_foo_schedule.get_execution_data(context_with_valid_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time": create_pendulum_time(
                    year=2019, month=1, day=1, tz="UTC"
                ).isoformat()
            }

            execution_data = monthly_foo_schedule.get_execution_data(context_without_time)
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time": create_pendulum_time(
                    year=2019, month=1, day=1, tz="UTC"
                ).isoformat()
            }

            _check_partitions(
                monthly_foo_schedule,
                1,
                pendulum.instance(start_date, tz="UTC"),
                DEFAULT_MONTHLY_FORMAT,
                relativedelta(months=1),
            )

            # test partition_months_offset=0

            @monthly_schedule(
                pipeline_name="foo_pipeline",
                execution_day_of_month=3,
                start_date=start_date,
                execution_time=time(9, 30),
                partition_months_offset=0,
            )
            def monthly_foo_schedule_same_month(monthly_time):
                return {"monthly_time": monthly_time.isoformat()}

            valid_monthly_time = create_pendulum_time(
                year=2019, month=2, day=3, hour=9, minute=30, tz="UTC"
            )
            context_with_valid_time = build_schedule_context(instance, valid_monthly_time)

            execution_data = monthly_foo_schedule_same_month.get_execution_data(
                context_with_valid_time
            )
            assert len(execution_data) == 1
            assert isinstance(execution_data[0], RunRequest)
            assert execution_data[0].run_config == {
                "monthly_time": create_pendulum_time(
                    year=2019, month=2, day=1, tz="UTC"
                ).isoformat()
            }
Esempio n. 30
0
def instance():
    overrides = {
        "run_launcher": {"module": "dagster.core.test_utils", "class": "MockedRunLauncher"},
    }
    with instance_for_test(overrides=overrides) as inst:
        yield inst