Beispiel #1
0
def test_asset_batching():
    with instance_for_test() as instance:
        repo = get_asset_repo()
        foo_job = repo.get_job("foo_job")
        for _ in range(3):
            foo_job.execute_in_process(instance=instance)
        with define_out_of_process_context(__file__, "asset_repo",
                                           instance) as context:
            traced_counter.set(Counter())
            result = execute_dagster_graphql(
                context,
                ASSET_RUNS_QUERY,
                variables={"assetKey": {
                    "path": ["foo"]
                }})
            assert result.data
            assert "assetOrError" in result.data
            assert "assetMaterializations" in result.data["assetOrError"]
            materializations = result.data["assetOrError"][
                "assetMaterializations"]
            assert len(materializations) == 3
            counter = traced_counter.get()
            counts = counter.counts()
            assert counts
            assert counts.get("DagsterInstance.get_run_records") == 1
Beispiel #2
0
def test_filtered_runs():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        run_id_1 = execute_pipeline(
            repo.get_pipeline("foo_pipeline"), instance=instance, tags={"run": "one"}
        ).run_id
        _run_id_2 = execute_pipeline(
            repo.get_pipeline("foo_pipeline"), instance=instance, tags={"run": "two"}
        ).run_id
        with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context:
            result = execute_dagster_graphql(
                context, FILTERED_RUN_QUERY, variables={"filter": {"runId": run_id_1}}
            )
            assert result.data
            run_ids = [run["runId"] for run in result.data["pipelineRunsOrError"]["results"]]
            assert len(run_ids) == 1
            assert run_ids[0] == run_id_1

            result = execute_dagster_graphql(
                context,
                FILTERED_RUN_QUERY,
                variables={"filter": {"tags": [{"key": "run", "value": "one"}]}},
            )
            assert result.data
            run_ids = [run["runId"] for run in result.data["pipelineRunsOrError"]["results"]]
            assert len(run_ids) == 1
            assert run_ids[0] == run_id_1
Beispiel #3
0
def test_run_groups():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline("foo_pipeline")

        root_run_ids = [
            execute_pipeline(foo_pipeline, instance=instance).run_id
            for i in range(3)
        ]

        for _ in range(5):
            for root_run_id in root_run_ids:
                execute_pipeline(
                    foo_pipeline,
                    tags={
                        PARENT_RUN_ID_TAG: root_run_id,
                        ROOT_RUN_ID_TAG: root_run_id
                    },
                    instance=instance,
                )

        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context_at_time_1:
            result = execute_dagster_graphql(context_at_time_1,
                                             ALL_RUN_GROUPS_QUERY)

            assert result.data
            assert "runGroupsOrError" in result.data
            assert "results" in result.data["runGroupsOrError"]
            assert len(result.data["runGroupsOrError"]["results"]) == 3
            for run_group in result.data["runGroupsOrError"]["results"]:
                assert run_group["rootRunId"] in root_run_ids
                assert len(run_group["runs"]) == 6
Beispiel #4
0
def test_filtered_runs_multiple_statuses():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        _ = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.STARTED).run_id
        run_id_2 = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.FAILURE).run_id
        run_id_3 = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.SUCCESS).run_id
        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context:
            result = execute_dagster_graphql(
                context,
                FILTERED_RUN_QUERY,
                variables={"filter": {
                    "statuses": ["FAILURE", "SUCCESS"]
                }},
            )
            assert result.data
            run_ids = [
                run["runId"]
                for run in result.data["pipelineRunsOrError"]["results"]
            ]
            assert len(run_ids) == 2
            assert run_id_2 in run_ids
            assert run_id_3 in run_ids
Beispiel #5
0
def test_paginated_runs_query():
    with instance_for_test() as instance:
        repo = get_repo()
        _ = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.STARTED).run_id
        run_id_2 = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.FAILURE).run_id
        run_id_3 = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.SUCCESS).run_id
        with define_out_of_process_context(__file__, "get_repo",
                                           instance) as context:
            result = execute_dagster_graphql(
                context,
                PAGINATED_RUNS_QUERY,
                variables={
                    "cursor": run_id_3,
                    "limit": 1
                },
            )
            assert result.data
            run_ids = [
                run["runId"]
                for run in result.data["pipelineRunsOrError"]["results"]
            ]
            assert len(run_ids) == 1
            assert run_ids[0] == run_id_2
Beispiel #6
0
def test_filtered_runs_multiple_filters():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()

        started_run_with_tags = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED, tags={"foo": "bar"}
        )
        failed_run_with_tags = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE, tags={"foo": "bar"}
        )
        started_run_without_tags = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.STARTED,
            tags={"baz": "boom"},
        )

        with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context:
            result = execute_dagster_graphql(
                context,
                FILTERED_RUN_QUERY,
                variables={
                    "filter": {"statuses": ["STARTED"], "tags": [{"key": "foo", "value": "bar"}]}
                },
            )
            assert result.data
            run_ids = [run["runId"] for run in result.data["pipelineRunsOrError"]["results"]]
            assert len(run_ids) == 1
            assert started_run_with_tags.run_id in run_ids
            assert failed_run_with_tags.run_id not in run_ids
            assert started_run_without_tags.run_id not in run_ids
Beispiel #7
0
def test_repository_batching():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline("foo_pipeline")
        evolving_pipeline = repo.get_pipeline("evolving_pipeline")
        foo_run_ids = [execute_pipeline(foo_pipeline, instance=instance).run_id for i in range(3)]
        evolving_run_ids = [
            execute_pipeline(evolving_pipeline, instance=instance).run_id for i in range(2)
        ]
        with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context:
            traced_counter.set(Counter())
            result = execute_dagster_graphql(
                context,
                REPOSITORY_RUNS_QUERY,
                variables={"repositorySelector": infer_repository_selector(context)},
            )
            assert result.data
            assert "repositoryOrError" in result.data
            assert "pipelines" in result.data["repositoryOrError"]
            pipelines = result.data["repositoryOrError"]["pipelines"]
            assert len(pipelines) == 2
            pipeline_runs = {pipeline["name"]: pipeline["runs"] for pipeline in pipelines}
            assert len(pipeline_runs["foo_pipeline"]) == 3
            assert len(pipeline_runs["evolving_pipeline"]) == 2
            assert set(foo_run_ids) == set(run["runId"] for run in pipeline_runs["foo_pipeline"])
            assert set(evolving_run_ids) == set(
                run["runId"] for run in pipeline_runs["evolving_pipeline"]
            )
            counter = traced_counter.get()
            counts = counter.counts()
            assert counts
            assert len(counts) == 1
            # We should have a single batch call to fetch run records, instead of 3 separate calls
            # to fetch run records (which is fetched to instantiate GrapheneRun)
            assert counts.get("DagsterInstance.get_run_records") == 1
Beispiel #8
0
def test_repositories_query():
    with instance_for_test() as instance:
        with define_out_of_process_context(__file__, "get_repo",
                                           instance) as context:
            result = execute_dagster_graphql(context, REPOSITORIES_QUERY)
            assert not result.errors
            assert result.data
            repositories = result.data["repositoriesOrError"]["nodes"]
            assert len(repositories) == 1
            assert repositories[0]["name"] == "my_repo"
Beispiel #9
0
def test_pipelines_query():
    with instance_for_test() as instance:
        with define_out_of_process_context(__file__, "get_repo",
                                           instance) as context:
            result = execute_dagster_graphql(
                context,
                PIPELINES_QUERY,
                variables={
                    "repositoryLocationName": "test_location",
                    "repositoryName": "my_repo",
                },
            )
            assert not result.errors
            assert result.data
            pipelines = result.data["repositoryOrError"]["pipelines"]
            assert len(pipelines) == 2
Beispiel #10
0
def test_filtered_runs_count():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        instance.create_run_for_pipeline(  # pylint: disable=expression-not-assigned
            repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.STARTED
        ).run_id
        instance.create_run_for_pipeline(  # pylint: disable=expression-not-assigned
            repo.get_pipeline("foo_pipeline"), status=PipelineRunStatus.FAILURE
        ).run_id
        with define_out_of_process_context(__file__, "get_repo_at_time_1", instance) as context:
            result = execute_dagster_graphql(
                context, FILTERED_RUN_COUNT_QUERY, variables={"filter": {"statuses": ["FAILURE"]}}
            )
            assert result.data
            count = result.data["pipelineRunsOrError"]["count"]
            assert count == 1
Beispiel #11
0
def test_run_group_not_found():
    with instance_for_test() as instance:
        with define_out_of_process_context(
            __file__, "get_repo_at_time_1", instance
        ) as context_at_time_1:

            result = execute_dagster_graphql(
                context_at_time_1, RUN_GROUP_QUERY, variables={"runId": "foo"},
            )
            assert result.data
            assert result.data["runGroupOrError"]
            assert result.data["runGroupOrError"]["__typename"] == "RunGroupNotFoundError"
            assert result.data["runGroupOrError"]["runId"] == "foo"
            assert result.data["runGroupOrError"][
                "message"
            ] == "Run group of run {run_id} could not be found.".format(run_id="foo")
Beispiel #12
0
def test_run_group():
    with instance_for_test() as instance:
        repo = get_repo_at_time_1()
        foo_pipeline = repo.get_pipeline("foo_pipeline")
        runs = [execute_pipeline(foo_pipeline, instance=instance)]
        root_run_id = runs[-1].run_id
        for _ in range(3):
            # https://github.com/dagster-io/dagster/issues/2433
            run = instance.create_run_for_pipeline(
                foo_pipeline,
                parent_run_id=root_run_id,
                root_run_id=root_run_id,
                tags={
                    PARENT_RUN_ID_TAG: root_run_id,
                    ROOT_RUN_ID_TAG: root_run_id
                },
            )
            execute_run(InMemoryPipeline(foo_pipeline), run, instance)
            runs.append(run)

        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context_at_time_1:
            result_one = execute_dagster_graphql(
                context_at_time_1,
                RUN_GROUP_QUERY,
                variables={"runId": root_run_id},
            )
            assert result_one.data["runGroupOrError"][
                "__typename"] == "RunGroup"

            assert len(result_one.data["runGroupOrError"]["runs"]) == 4

            result_two = execute_dagster_graphql(
                context_at_time_1,
                RUN_GROUP_QUERY,
                variables={"runId": runs[-1].run_id},
            )
            assert result_one.data["runGroupOrError"][
                "__typename"] == "RunGroup"
            assert len(result_two.data["runGroupOrError"]["runs"]) == 4

            assert (result_one.data["runGroupOrError"]["rootRunId"] ==
                    result_two.data["runGroupOrError"]["rootRunId"])
            assert (result_one.data["runGroupOrError"]["runs"] ==
                    result_two.data["runGroupOrError"]["runs"])
Beispiel #13
0
def test_run_group_not_found():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)
        context_at_time_1 = define_out_of_process_context(
            __file__, "get_repo_at_time_1", instance)

        result = execute_dagster_graphql(
            context_at_time_1,
            RUN_GROUP_QUERY,
            variables={"runId": "foo"},
        )
        assert result.data
        assert result.data["runGroupOrError"]
        assert result.data["runGroupOrError"][
            "__typename"] == "RunGroupNotFoundError"
        assert result.data["runGroupOrError"]["runId"] == "foo"
        assert result.data["runGroupOrError"][
            "message"] == "Run group of run {run_id} could not be found.".format(
                run_id="foo")
Beispiel #14
0
def test_launch_preset_mutation():
    with instance_for_test() as instance:
        with define_out_of_process_context(__file__, "get_repo",
                                           instance) as context:
            result = execute_dagster_graphql(
                context,
                LAUNCH_PIPELINE_PRESET,
                variables={
                    "repositoryLocationName": "test_location",
                    "repositoryName": "my_repo",
                    "pipelineName": "foo_pipeline",
                    "presetName": "my_preset",
                },
            )
            assert not result.errors
            assert result.data
            run = result.data["launchPipelineExecution"]["run"]
            assert run
            assert run["runId"]
Beispiel #15
0
def test_runs_query():
    with instance_for_test() as instance:
        repo = get_repo()
        run_id_1 = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.STARTED).run_id
        run_id_2 = instance.create_run_for_pipeline(
            repo.get_pipeline("foo_pipeline"),
            status=PipelineRunStatus.FAILURE).run_id
        with define_out_of_process_context(__file__, "get_repo",
                                           instance) as context:
            result = execute_dagster_graphql(context, RUNS_QUERY)
            assert result.data
            run_ids = [
                run["runId"]
                for run in result.data["pipelineRunsOrError"]["results"]
            ]
            assert len(run_ids) == 2
            assert run_ids[0] == run_id_2
            assert run_ids[1] == run_id_1
Beispiel #16
0
def test_launch_mutation_error():
    with instance_for_test() as instance:
        with define_out_of_process_context(__file__, "get_repo",
                                           instance) as context:
            result = execute_dagster_graphql(
                context,
                LAUNCH_PIPELINE,
                variables={
                    "repositoryLocationName": "test_location",
                    "repositoryName": "my_repo",
                    "pipelineName": "foo_pipeline",
                    "runConfigData": {
                        "invalid": "config"
                    },
                    "mode": "default",
                },
            )
            assert not result.errors
            assert result.data
            errors = result.data["launchPipelineExecution"]["errors"]
            assert len(errors) == 1
            message = errors[0]["message"]
            assert message
Beispiel #17
0
def define_test_out_of_process_context(instance):
    check.inst_param(instance, "instance", DagsterInstance)
    with define_out_of_process_context(__file__, main_repo_name(),
                                       instance) as context:
        yield context
Beispiel #18
0
def define_test_out_of_process_context(instance):
    check.inst_param(instance, 'instance', DagsterInstance)
    return define_out_of_process_context(__file__, "test_repo", instance)
Beispiel #19
0
def test_runs_over_time():
    with seven.TemporaryDirectory() as temp_dir:
        instance = DagsterInstance.local_temp(temp_dir)

        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_A"}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_B"}),
            instance=instance,
        ).run_id

        context_at_time_1 = define_out_of_process_context(
            __file__, "get_repo_at_time_1", instance)

        result = execute_dagster_graphql(context_at_time_1, ALL_RUNS_QUERY)
        assert result.data

        t1_runs = {
            run["runId"]: run
            for run in result.data["pipelineRunsOrError"]["results"]
        }

        assert t1_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        assert t1_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }

        assert t1_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }

        assert t1_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }

        context_at_time_2 = define_out_of_process_context(
            __file__, "get_repo_at_time_2", instance)

        result = execute_dagster_graphql(context_at_time_2, ALL_RUNS_QUERY)
        assert result.data

        t2_runs = {
            run["runId"]: run
            for run in result.data["pipelineRunsOrError"]["results"]
        }

        assert t2_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        assert t2_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }
        # pipeline name changed
        assert t2_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }
        # subset no longer valid - b renamed
        assert t2_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }
Beispiel #20
0
def test_run_groups_over_time():
    with instance_for_test() as instance:
        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_A"}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_B"}),
            instance=instance,
        ).run_id

        with define_out_of_process_context(__file__, "get_repo_at_time_1",
                                           instance) as context_at_time_1:

            result = execute_dagster_graphql(context_at_time_1,
                                             ALL_RUN_GROUPS_QUERY)
            assert result.data
            assert "runGroupsOrError" in result.data
            assert "results" in result.data["runGroupsOrError"]
            assert len(result.data["runGroupsOrError"]["results"]) == 4

            t1_runs = {
                run["runId"]: run
                for group in result.data["runGroupsOrError"]["results"]
                for run in group["runs"]
            }

            # test full_evolve_run_id
            assert t1_runs[full_evolve_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": None,
            }

            # test foo_run_id
            assert t1_runs[foo_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "foo_pipeline",
                "solidSelection": None,
            }

            # test evolve_a_run_id
            assert t1_runs[evolve_a_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_A"],
            }
            assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"]

            # test evolve_b_run_id
            assert t1_runs[evolve_b_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_B"],
            }

        with define_out_of_process_context(__file__, "get_repo_at_time_2",
                                           instance) as context_at_time_2:
            result = execute_dagster_graphql(context_at_time_2,
                                             ALL_RUN_GROUPS_QUERY)
            assert "runGroupsOrError" in result.data
            assert "results" in result.data["runGroupsOrError"]
            assert len(result.data["runGroupsOrError"]["results"]) == 4

            t2_runs = {
                run["runId"]: run
                for group in result.data["runGroupsOrError"]["results"]
                for run in group["runs"]
            }

            # test full_evolve_run_id
            assert t2_runs[full_evolve_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": None,
            }

            # test evolve_a_run_id
            assert t2_runs[evolve_a_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_A"],
            }
            assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"]

            # names same
            assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] ==
                    t2_runs[evolve_a_run_id]["pipeline"]["name"])

            # snapshots differ
            assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] !=
                    t2_runs[evolve_a_run_id]["pipelineSnapshotId"])

            # pipeline name changed
            assert t2_runs[foo_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "foo_pipeline",
                "solidSelection": None,
            }
            # subset no longer valid - b renamed
            assert t2_runs[evolve_b_run_id]["pipeline"] == {
                "__typename": "PipelineSnapshot",
                "name": "evolving_pipeline",
                "solidSelection": ["solid_B"],
            }