예제 #1
0
def test_engine_error():
    with pytest.raises(DagsterSubprocessError):
        with seven.TemporaryDirectory() as tempdir:
            with instance_for_test_tempdir(tempdir) as instance:
                storage = os.path.join(tempdir, "flakey_storage")
                execute_pipeline(
                    ReconstructablePipeline.for_file(REPO_FILE,
                                                     "engine_error"),
                    run_config={
                        "storage": {
                            "filesystem": {
                                "config": {
                                    "base_dir": storage
                                }
                            }
                        },
                        "execution": {
                            "celery": {
                                "config": {
                                    "config_source": {
                                        "task_always_eager": True
                                    }
                                }
                            }
                        },
                        "solids": {
                            "destroy": {
                                "config": storage
                            }
                        },
                    },
                    instance=instance,
                )
예제 #2
0
def test_execute_on_dask_local():
    with seven.TemporaryDirectory() as tempdir:
        with instance_for_test_tempdir(tempdir) as instance:
            result = execute_pipeline(
                reconstructable(dask_engine_pipeline),
                run_config={
                    "intermediate_storage": {
                        "filesystem": {
                            "config": {
                                "base_dir": tempdir
                            }
                        }
                    },
                    "execution": {
                        "dask": {
                            "config": {
                                "cluster": {
                                    "local": {
                                        "timeout": 30
                                    }
                                }
                            }
                        }
                    },
                },
                instance=instance,
            )
            assert result.result_for_solid("simple").output_value() == 1
def graphql_postgres_instance(overrides):
    with tempfile.TemporaryDirectory() as temp_dir:
        with TestPostgresInstance.docker_service_up_or_skip(
            file_relative_path(__file__, "docker-compose.yml"),
            "test-postgres-db-graphql",
        ) as pg_conn_string:
            TestPostgresInstance.clean_run_storage(pg_conn_string)
            TestPostgresInstance.clean_event_log_storage(pg_conn_string)
            TestPostgresInstance.clean_schedule_storage(pg_conn_string)
            with instance_for_test_tempdir(
                temp_dir,
                overrides=merge_dicts(
                    {
                        "run_storage": {
                            "module": "dagster_postgres.run_storage.run_storage",
                            "class": "PostgresRunStorage",
                            "config": {"postgres_url": pg_conn_string},
                        },
                        "event_log_storage": {
                            "module": "dagster_postgres.event_log.event_log",
                            "class": "PostgresEventLogStorage",
                            "config": {"postgres_url": pg_conn_string},
                        },
                        "schedule_storage": {
                            "module": "dagster_postgres.schedule_storage.schedule_storage",
                            "class": "PostgresScheduleStorage",
                            "config": {"postgres_url": pg_conn_string},
                        },
                    },
                    overrides if overrides else {},
                ),
            ) as instance:
                yield instance
예제 #4
0
def test_dagster_telemetry_unset(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(temp_dir):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_attribute = "foo_pipeline"
                pipeline_name = "foo"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        path_to_file("test_cli_commands.py"), "-a",
                        pipeline_attribute
                    ],
                )

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(1)
                        assert message.get("repo_hash") == hash_name(
                            get_ephemeral_repository_name(pipeline_name))
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
예제 #5
0
def test_terminate_pipeline_on_celery(rabbitmq):
    with start_celery_worker():
        with seven.TemporaryDirectory() as tempdir:
            pipeline_def = ReconstructablePipeline.for_file(
                REPO_FILE, "interrupt_pipeline")

            with instance_for_test_tempdir(tempdir) as instance:
                run_config = {
                    "intermediate_storage": {
                        "filesystem": {
                            "config": {
                                "base_dir": tempdir
                            }
                        }
                    },
                    "execution": {
                        "celery": {}
                    },
                }

                results = []
                result_types = []
                interrupt_thread = None

                try:
                    for result in execute_pipeline_iterator(
                            pipeline=pipeline_def,
                            run_config=run_config,
                            instance=instance,
                    ):
                        # Interrupt once the first step starts
                        if (result.event_type == DagsterEventType.STEP_START
                                and not interrupt_thread):
                            interrupt_thread = Thread(target=send_interrupt,
                                                      args=())
                            interrupt_thread.start()

                        results.append(result)
                        result_types.append(result.event_type)

                    assert False
                except DagsterExecutionInterruptedError:
                    pass

                interrupt_thread.join()

                # At least one step succeeded (the one that was running when the interrupt fired)
                assert DagsterEventType.STEP_SUCCESS in result_types

                # At least one step was revoked (and there were no step failure events)
                revoke_steps = [
                    result for result in results
                    if result.event_type == DagsterEventType.ENGINE_EVENT
                    and "was revoked." in result.message
                ]

                assert len(revoke_steps) > 0

                # The overall pipeline failed
                assert DagsterEventType.PIPELINE_FAILURE in result_types
예제 #6
0
def test_logs_in_start_execution_predefined():
    variables = seven.json.dumps({
        "executionParams": {
            "selector": {
                "repositoryLocationName": "test_cli_location",
                "repositoryName": "test",
                "pipelineName": "math",
            },
            "runConfigData": {
                "solids": {
                    "add_one": {
                        "inputs": {
                            "num": {
                                "value": 123
                            }
                        }
                    }
                }
            },
            "mode": "default",
        }
    })

    workspace_path = file_relative_path(__file__, "./cli_test_workspace.yaml")
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(
                temp_dir,
                overrides={
                    "run_launcher": {
                        "module":
                        "dagster.core.launcher.sync_in_memory_run_launcher",
                        "class": "SyncInMemoryRunLauncher",
                    }
                },
        ) as instance:
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            result = runner.invoke(ui, [
                "-w", workspace_path, "-v", variables, "-p",
                "launchPipelineExecution"
            ])
            assert result.exit_code == 0
            result_data = json.loads(result.output.strip("\n").split("\n")[-1])
            assert (result_data["data"]["launchPipelineExecution"]
                    ["__typename"] == "LaunchPipelineRunSuccess")
            run_id = result_data["data"]["launchPipelineExecution"]["run"][
                "runId"]

            # allow FS events to flush
            retries = 5
            while retries != 0 and not _is_done(instance, run_id):
                time.sleep(0.333)
                retries -= 1

            # assert that the watching run storage captured the run correctly from the other process
            run = instance.get_run_by_id(run_id)

            assert run.status == PipelineRunStatus.SUCCESS
예제 #7
0
def test_run_priority_pipeline(rabbitmq):
    with seven.TemporaryDirectory() as tempdir:
        with instance_for_test_tempdir(tempdir) as instance:
            low_done = threading.Event()
            hi_done = threading.Event()

            # enqueue low-priority tasks
            low_thread = threading.Thread(
                target=execute_on_thread,
                args=("low_pipeline", low_done, instance.get_ref()),
                kwargs={
                    "tempdir": tempdir,
                    "tags": {
                        DAGSTER_CELERY_RUN_PRIORITY_TAG: "-3"
                    }
                },
            )
            low_thread.daemon = True
            low_thread.start()

            time.sleep(
                1)  # sleep so that we don't hit any sqlite concurrency issues

            # enqueue hi-priority tasks
            hi_thread = threading.Thread(
                target=execute_on_thread,
                args=("hi_pipeline", hi_done, instance.get_ref()),
                kwargs={
                    "tempdir": tempdir,
                    "tags": {
                        DAGSTER_CELERY_RUN_PRIORITY_TAG: "3"
                    }
                },
            )
            hi_thread.daemon = True
            hi_thread.start()

            time.sleep(5)  # sleep to give queue time to prioritize tasks

            with start_celery_worker():
                while not low_done.is_set() or not hi_done.is_set():
                    time.sleep(1)

                low_runs = instance.get_runs(filters=PipelineRunsFilter(
                    pipeline_name="low_pipeline"))
                assert len(low_runs) == 1
                low_run = low_runs[0]
                lowstats = instance.get_run_stats(low_run.run_id)
                hi_runs = instance.get_runs(filters=PipelineRunsFilter(
                    pipeline_name="hi_pipeline"))
                assert len(hi_runs) == 1
                hi_run = hi_runs[0]
                histats = instance.get_run_stats(hi_run.run_id)

                assert lowstats.start_time < histats.start_time
                assert lowstats.end_time > histats.end_time
예제 #8
0
def _default_cli_test_instance_tempdir(temp_dir, overrides=None):
    default_overrides = {
        "run_launcher": {"module": "dagster.core.test_utils", "class": "MockedRunLauncher",}
    }
    with instance_for_test_tempdir(
        temp_dir, overrides=merge_dicts(default_overrides, (overrides if overrides else {}))
    ) as instance:
        with mock.patch("dagster.core.instance.DagsterInstance.get") as _instance:
            _instance.return_value = instance
            yield instance
예제 #9
0
 def _sqlite_instance_with_default_hijack():
     with tempfile.TemporaryDirectory() as temp_dir:
         with instance_for_test_tempdir(
                 temp_dir,
                 overrides={
                     "backfill": {
                         "daemon_enabled": True
                     },
                 },
         ) as instance:
             yield instance
 def _sqlite_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         with instance_for_test_tempdir(
             temp_dir,
             overrides={
                 "run_coordinator": {
                     "module": "dagster.core.run_coordinator.queued_run_coordinator",
                     "class": "QueuedRunCoordinator",
                 },
             },
         ) as instance:
             yield instance
예제 #11
0
def dagster_cli_runner():
    with tempfile.TemporaryDirectory() as dagster_home_temp:
        with instance_for_test_tempdir(
                dagster_home_temp,
                overrides={
                    "run_launcher": {
                        "module":
                        "dagster.core.launcher.sync_in_memory_run_launcher",
                        "class": "SyncInMemoryRunLauncher",
                    }
                },
        ):
            yield CliRunner(env={"DAGSTER_HOME": dagster_home_temp})
 def _sqlite_instance_with_default_hijack():
     with seven.TemporaryDirectory() as temp_dir:
         with instance_for_test_tempdir(
             temp_dir,
             overrides={
                 "scheduler": {
                     "module": "dagster.utils.test",
                     "class": "FilesystemTestScheduler",
                     "config": {"base_dir": temp_dir},
                 },
                 "run_launcher": {"module": "dagster", "class": "DefaultRunLauncher",},
             },
         ) as instance:
             yield instance
예제 #13
0
def test_dagit_logs(
    server_mock,
    caplog,
):
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(temp_dir):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            workspace_path = file_relative_path(__file__,
                                                "telemetry_repository.yaml")
            result = runner.invoke(
                ui,
                ["-w", workspace_path],
            )
            assert result.exit_code == 0, str(result.exception)

            expected_repo_stats = {
                hash_name("test_repository"): 1,
                hash_name("dagster_test_repository"): 4,
            }
            actions = set()
            for record in caplog.records:
                message = json.loads(record.getMessage())
                actions.add(message.get("action"))
                if message.get("action") == UPDATE_REPO_STATS:
                    assert message.get("pipeline_name_hash") == ""
                    repo_hash = message.get("repo_hash")

                    assert repo_hash in expected_repo_stats
                    expected_num_pipelines_in_repo = expected_repo_stats.get(
                        repo_hash)
                    assert message.get("num_pipelines_in_repo") == str(
                        expected_num_pipelines_in_repo)

                assert set(message.keys()) == set([
                    "action",
                    "client_time",
                    "elapsed_time",
                    "event_id",
                    "instance_id",
                    "pipeline_name_hash",
                    "num_pipelines_in_repo",
                    "repo_hash",
                    "python_version",
                    "metadata",
                    "version",
                ])

            assert actions == set([START_DAGIT_WEBSERVER, UPDATE_REPO_STATS])
            assert len(caplog.records) == 3
            assert server_mock.call_args_list == [mock.call()]
예제 #14
0
def graphql_in_process_context():
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(
                temp_dir,
                overrides={
                    "scheduler": {
                        "module": "dagster.utils.test",
                        "class": "FilesystemTestScheduler",
                        "config": {
                            "base_dir": temp_dir
                        },
                    }
                },
        ) as instance:
            yield define_test_in_process_context(instance)
 def _sqlite_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         with instance_for_test_tempdir(
             temp_dir,
             overrides={
                 "scheduler": {
                     "module": "dagster.utils.test",
                     "class": "FilesystemTestScheduler",
                     "config": {"base_dir": temp_dir},
                 },
                 "run_launcher": {
                     "module": "dagster.core.launcher.sync_in_memory_run_launcher",
                     "class": "SyncInMemoryRunLauncher",
                 },
             },
         ) as instance:
             yield instance
예제 #16
0
def create_test_instance_and_storage():
    with tempfile.TemporaryDirectory() as tmpdir_path:
        with instance_for_test_tempdir(
                tmpdir_path,
                overrides=
            {
                "event_log_storage": {
                    "module":
                    "dagster_tests.core_tests.storage_tests.test_polling_event_watcher",
                    "class": "SqlitePollingEventLogStorage",
                    "config": {
                        "base_dir": tmpdir_path
                    },
                }
            },
        ) as instance:
            yield (instance, instance._event_storage)  # pylint: disable=protected-access
 def _readonly_sqlite_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         with instance_for_test_tempdir(
             temp_dir,
             overrides={
                 "scheduler": {
                     "module": "dagster.utils.test",
                     "class": "FilesystemTestScheduler",
                     "config": {"base_dir": temp_dir},
                 },
                 "run_launcher": {
                     "module": "dagster.core.test_utils",
                     "class": "ExplodingRunLauncher",
                 },
             },
         ) as instance:
             yield instance
예제 #18
0
def test_create_app_with_workspace_and_scheduler():
    with load_workspace_from_yaml_paths(
        [file_relative_path(__file__, "./workspace.yaml")]) as workspace:
        with tempfile.TemporaryDirectory() as temp_dir:
            with instance_for_test_tempdir(
                    temp_dir,
                    overrides={
                        "scheduler": {
                            "module": "dagster.utils.test",
                            "class": "FilesystemTestScheduler",
                            "config": {
                                "base_dir": temp_dir
                            },
                        }
                    },
            ) as instance:
                assert create_app_from_workspace(workspace, instance)
 def _sqlite_instance():
     with tempfile.TemporaryDirectory() as temp_dir:
         with instance_for_test_tempdir(
             temp_dir,
             overrides={
                 "scheduler": {
                     "module": "dagster.utils.test",
                     "class": "FilesystemTestScheduler",
                     "config": {"base_dir": temp_dir},
                 },
                 "run_coordinator": {
                     "module": "dagster.core.run_coordinator.queued_run_coordinator",
                     "class": "QueuedRunCoordinator",
                 },
             },
         ) as instance:
             yield instance
예제 #20
0
def test_invalid_instance_run(get_external_pipeline):
    with seven.TemporaryDirectory() as temp_dir:
        correct_run_storage_dir = os.path.join(temp_dir, "history", "")
        wrong_run_storage_dir = os.path.join(temp_dir, "wrong", "")

        with environ({"RUN_STORAGE_ENV": correct_run_storage_dir}):
            with instance_for_test_tempdir(
                    temp_dir,
                    overrides={
                        "run_storage": {
                            "module": "dagster.core.storage.runs",
                            "class": "SqliteRunStorage",
                            "config": {
                                "base_dir": {
                                    "env": "RUN_STORAGE_ENV"
                                }
                            },
                        }
                    },
            ) as instance:
                pipeline_run = instance.create_run_for_pipeline(
                    pipeline_def=noop_pipeline, )

                # Server won't be able to load the run from run storage
                with environ({"RUN_STORAGE_ENV": wrong_run_storage_dir}):
                    with get_external_pipeline(
                            pipeline_run.pipeline_name) as external_pipeline:

                        with pytest.raises(
                                DagsterLaunchFailedError,
                                match=re.escape(
                                    "gRPC server could not load run {run_id} in order to execute it"
                                    .format(run_id=pipeline_run.run_id)),
                        ):
                            instance.launch_run(
                                run_id=pipeline_run.run_id,
                                external_pipeline=external_pipeline,
                            )

                        failed_run = instance.get_run_by_id(
                            pipeline_run.run_id)
                        assert failed_run.status == PipelineRunStatus.FAILURE
예제 #21
0
def mysql_instance_for_test(dunder_file, container_name, overrides=None):
    with TemporaryDirectory() as temp_dir:
        with TestMySQLInstance.docker_service_up_or_skip(
                file_relative_path(dunder_file, "docker-compose.yml"),
                container_name,
        ) as mysql_conn_string:
            TestMySQLInstance.clean_run_storage(mysql_conn_string)
            TestMySQLInstance.clean_event_log_storage(mysql_conn_string)
            TestMySQLInstance.clean_schedule_storage(mysql_conn_string)
            with instance_for_test_tempdir(
                    temp_dir,
                    overrides=merge_dicts(
                        {
                            "run_storage": {
                                "module":
                                "dagster_mysql.run_storage.run_storage",
                                "class": "MySQLRunStorage",
                                "config": {
                                    "mysql_url": mysql_conn_string
                                },
                            },
                            "event_log_storage": {
                                "module": "dagster_mysql.event_log.event_log",
                                "class": "MySQLEventLogStorage",
                                "config": {
                                    "mysql_url": mysql_conn_string
                                },
                            },
                            "schedule_storage": {
                                "module":
                                "dagster_mysql.schedule_storage.schedule_storage",
                                "class": "MySQLScheduleStorage",
                                "config": {
                                    "mysql_url": mysql_conn_string
                                },
                            },
                        },
                        overrides if overrides else {},
                    ),
            ) as instance:
                yield instance
예제 #22
0
파일: test_stdout.py 프로젝트: xjhc/dagster
def test_compute_log_base_with_spaces():
    with tempfile.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(
                temp_dir,
            {
                "compute_logs": {
                    "module": "dagster.core.storage.local_compute_log_manager",
                    "class": "LocalComputeLogManager",
                    "config": {
                        "base_dir": os.path.join(temp_dir, "base with spaces")
                    },
                }
            },
        ) as instance:
            pipeline_name = "foo_pipeline"
            pipeline_run = create_run_for_test(instance,
                                               pipeline_name=pipeline_name)

            step_keys = ["A", "B", "C"]

            with instance.compute_log_manager.watch(pipeline_run):
                print("outer 1")  # pylint: disable=print-call
                print("outer 2")  # pylint: disable=print-call
                print("outer 3")  # pylint: disable=print-call

                for step_key in step_keys:
                    inner_step(instance, pipeline_run, step_key)

            for step_key in step_keys:
                stdout = instance.compute_log_manager.read_logs_file(
                    pipeline_run.run_id, step_key, ComputeIOType.STDOUT)
                assert normalize_file_content(
                    stdout.data) == expected_inner_output(step_key)

            full_out = instance.compute_log_manager.read_logs_file(
                pipeline_run.run_id, pipeline_name, ComputeIOType.STDOUT)

            assert normalize_file_content(full_out.data).startswith(
                expected_outer_prefix())
예제 #23
0
def test_engine_error():
    with seven.mock.patch(
        "dagster.core.execution.context.system.SystemExecutionContextData.raise_on_error",
        return_value=True,
    ):
        with pytest.raises(DagsterSubprocessError):
            with tempfile.TemporaryDirectory() as tempdir:
                with instance_for_test_tempdir(tempdir) as instance:
                    storage = os.path.join(tempdir, "flakey_storage")
                    execute_pipeline(
                        ReconstructablePipeline.for_file(REPO_FILE, "engine_error"),
                        run_config={
                            "intermediate_storage": {
                                "filesystem": {"config": {"base_dir": storage}}
                            },
                            "execution": {
                                "celery": {"config": {"config_source": {"task_always_eager": True}}}
                            },
                            "solids": {"destroy": {"config": storage}},
                        },
                        instance=instance,
                    )
예제 #24
0
def test_repo_stats(caplog):
    with seven.TemporaryDirectory() as temp_dir:
        with instance_for_test_tempdir(temp_dir):
            runner = CliRunner(env={"DAGSTER_HOME": temp_dir})
            with pushd(path_to_file("")):
                pipeline_name = "multi_mode_with_resources"
                result = runner.invoke(
                    pipeline_execute_command,
                    [
                        "-f",
                        file_relative_path(
                            __file__,
                            "../../general_tests/test_repository.py"),
                        "-a",
                        "dagster_test_repository",
                        "-p",
                        pipeline_name,
                        "--preset",
                        "add",
                        "--tags",
                        '{ "foo": "bar" }',
                    ],
                )

                assert result.exit_code == 0, result.stdout

                for record in caplog.records:
                    message = json.loads(record.getMessage())
                    if message.get("action") == UPDATE_REPO_STATS:
                        assert message.get("pipeline_name_hash") == hash_name(
                            pipeline_name)
                        assert message.get("num_pipelines_in_repo") == str(4)
                        assert message.get("repo_hash") == hash_name(
                            "dagster_test_repository")
                    assert set(message.keys()) == EXPECTED_KEYS

                assert len(caplog.records) == 5
                assert result.exit_code == 0
예제 #25
0
def test_filesystem_persist_one_run(tmpdir):
    with instance_for_test_tempdir(str(tmpdir)) as instance:
        do_test_single_write_read(instance)
예제 #26
0
def dagster_cli_runner():
    with seven.TemporaryDirectory() as dagster_home_temp:
        with instance_for_test_tempdir(dagster_home_temp):
            yield CliRunner(env={"DAGSTER_HOME": dagster_home_temp})
예제 #27
0
def test_interrupt_multiproc():
    with seven.TemporaryDirectory() as tempdir:
        with instance_for_test_tempdir(tempdir) as instance:

            file_1 = os.path.join(tempdir, "file_1")
            file_2 = os.path.join(tempdir, "file_2")
            file_3 = os.path.join(tempdir, "file_3")
            file_4 = os.path.join(tempdir, "file_4")

            # launch a thread that waits until the file is written to launch an interrupt
            Thread(target=_send_kbd_int,
                   args=([file_1, file_2, file_3, file_4], )).start()

            results = []

            received_interrupt = False

            try:
                # launch a pipeline that writes a file and loops infinitely
                # next time the launched thread wakes up it will send a keyboard
                # interrupt
                for result in execute_pipeline_iterator(
                        reconstructable(write_files_pipeline),
                        run_config={
                            "solids": {
                                "write_1": {
                                    "config": {
                                        "tempfile": file_1
                                    }
                                },
                                "write_2": {
                                    "config": {
                                        "tempfile": file_2
                                    }
                                },
                                "write_3": {
                                    "config": {
                                        "tempfile": file_3
                                    }
                                },
                                "write_4": {
                                    "config": {
                                        "tempfile": file_4
                                    }
                                },
                            },
                            "execution": {
                                "multiprocess": {
                                    "config": {
                                        "max_concurrent": 4
                                    }
                                }
                            },
                            "intermediate_storage": {
                                "filesystem": {}
                            },
                        },
                        instance=instance,
                ):
                    results.append(result)
                assert False  # should never reach
            except DagsterExecutionInterruptedError:
                received_interrupt = True

            assert received_interrupt
            assert [result.event_type for result in results
                    ].count(DagsterEventType.STEP_FAILURE) == 4
            assert DagsterEventType.PIPELINE_FAILURE in [
                result.event_type for result in results
            ]