Beispiel #1
0
    def test_retry_early_terminate(self, graphql_context):
        instance = graphql_context.instance
        selector = infer_pipeline_selector(
            graphql_context, "retry_multi_input_early_terminate_pipeline"
        )
        run_id = make_new_run_id()
        execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": {
                        "solids": {
                            "get_input_one": {"config": {"wait_to_terminate": True}},
                            "get_input_two": {"config": {"wait_to_terminate": True}},
                        },
                    },
                    "executionMetadata": {"runId": run_id},
                }
            },
        )
        # Wait until the first step succeeded
        while instance.get_run_stats(run_id).steps_succeeded < 1:
            sleep(0.1)
        # Terminate the current pipeline run at the second step
        graphql_context.instance.run_launcher.terminate(run_id)

        records = instance.all_logs(run_id)

        # The first step should succeed, the second should fail or not start,
        # and the following steps should not appear in records
        assert step_did_succeed_in_records(records, "return_one")
        assert not step_did_fail_in_records(records, "return_one")
        assert any(
            [
                step_did_fail_in_records(records, "get_input_one"),
                step_did_not_run_in_records(records, "get_input_one"),
            ]
        )
        assert step_did_not_run_in_records(records, "get_input_two")
        assert step_did_not_run_in_records(records, "sum_inputs")

        # Wait for the original run to finish
        poll_for_finished_run(instance, run_id, timeout=30)
        assert instance.get_run_by_id(run_id).status == PipelineRunStatus.CANCELED

        # Start retry
        new_run_id = make_new_run_id()

        execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PIPELINE_REEXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "mode": "default",
                    "selector": selector,
                    "runConfigData": {
                        "solids": {
                            "get_input_one": {"config": {"wait_to_terminate": False}},
                            "get_input_two": {"config": {"wait_to_terminate": False}},
                        },
                    },
                    "executionMetadata": {
                        "runId": new_run_id,
                        "rootRunId": run_id,
                        "parentRunId": run_id,
                        "tags": [{"key": RESUME_RETRY_TAG, "value": "true"}],
                    },
                }
            },
        )

        retry_records = instance.all_logs(new_run_id)
        # The first step should not run and the other three steps should succeed in retry
        assert step_did_not_run_in_records(retry_records, "return_one")
        assert step_did_succeed_in_records(retry_records, "get_input_one")
        assert step_did_succeed_in_records(retry_records, "get_input_two")
        assert step_did_succeed_in_records(retry_records, "sum_inputs")
Beispiel #2
0
def test_successful_pipeline_reexecution(snapshot):
    run_id = str(uuid.uuid4())
    result_one = execute_dagster_graphql(
        define_context(),
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'executionMetadata': {
                    'runId': run_id
                },
                'mode': 'default',
            }
        },
    )

    assert (result_one.data['startPipelineExecution']['__typename'] ==
            'StartPipelineExecutionSuccess')

    snapshot.assert_match(result_one.data)

    expected_value_repr = (
        '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), '''
        '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), '''
        '''('sum_sq', 49)])]''')

    store = FileSystemIntermediateStore(run_id)
    assert store.has_intermediate(None, 'sum_solid.compute')
    assert store.has_intermediate(None, 'sum_sq_solid.compute')
    assert (str(
        store.get_intermediate(None, 'sum_sq_solid.compute',
                               PoorMansDataFrame)) == expected_value_repr)

    new_run_id = str(uuid.uuid4())

    result_two = execute_dagster_graphql(
        define_context(),
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'stepKeys': ['sum_sq_solid.compute'],
                'executionMetadata': {
                    'runId': new_run_id
                },
                'mode': 'default',
            },
            'reexecutionConfig': {
                'previousRunId':
                run_id,
                'stepOutputHandles': [{
                    'stepKey': 'sum_solid.compute',
                    'outputName': 'result'
                }],
            },
        },
    )

    query_result = result_two.data['startPipelineExecution']
    assert query_result['__typename'] == 'StartPipelineExecutionSuccess'
    logs = query_result['run']['logs']['nodes']

    assert isinstance(logs, list)
    assert has_event_of_type(logs, 'PipelineStartEvent')
    assert has_event_of_type(logs, 'PipelineSuccessEvent')
    assert not has_event_of_type(logs, 'PipelineFailureEvent')

    assert not get_step_output_event(logs, 'sum_solid.compute')
    assert get_step_output_event(logs, 'sum_sq_solid.compute')

    snapshot.assert_match(result_two.data)

    store = FileSystemIntermediateStore(new_run_id)
    assert not store.has_intermediate(None, 'sum_solid.inputs.num.read',
                                      'input_thunk_output')
    assert store.has_intermediate(None, 'sum_solid.compute')
    assert store.has_intermediate(None, 'sum_sq_solid.compute')
    assert (str(
        store.get_intermediate(None, 'sum_sq_solid.compute',
                               PoorMansDataFrame)) == expected_value_repr)
Beispiel #3
0
def test_production_query(graphql_context):
    result = execute_dagster_graphql(graphql_context, PRODUCTION_QUERY)

    assert not result.errors
    assert result.data
Beispiel #4
0
    def test_asset_in_progress(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "hanging_job")
        run_id = "foo"

        with safe_tempfile_path() as path:
            result = execute_dagster_graphql(
                graphql_context,
                LAUNCH_PIPELINE_EXECUTION_MUTATION,
                variables={
                    "executionParams": {
                        "selector": selector,
                        "mode": "default",
                        "runConfigData": {
                            "resources": {
                                "hanging_asset_resource": {
                                    "config": {
                                        "file": path
                                    }
                                }
                            }
                        },
                        "executionMetadata": {
                            "runId": run_id
                        },
                    }
                },
            )

            assert not result.errors
            assert result.data

            # ensure the execution has happened
            while not os.path.exists(path):
                time.sleep(0.1)

            result = execute_dagster_graphql(
                graphql_context,
                GET_ASSET_IN_PROGRESS_RUNS,
                variables={
                    "repositorySelector":
                    infer_repository_selector(graphql_context)
                },
            )
            graphql_context.instance.run_launcher.terminate(run_id)

            assert result.data
            assert result.data["repositoryOrError"]
            assert result.data["repositoryOrError"]["inProgressRunsByStep"]

            in_progress_runs_by_step = result.data["repositoryOrError"][
                "inProgressRunsByStep"]

            assert len(in_progress_runs_by_step) == 2

            hanging_asset_status = in_progress_runs_by_step[0]
            never_runs_asset_status = in_progress_runs_by_step[1]
            # graphql endpoint returns unordered list of steps
            # swap if never_runs_asset_status is first in list
            if hanging_asset_status["stepKey"] != "hanging_asset":
                never_runs_asset_status, hanging_asset_status = (
                    hanging_asset_status,
                    never_runs_asset_status,
                )

            assert hanging_asset_status["stepKey"] == "hanging_asset"
            assert len(hanging_asset_status["inProgressRuns"]) == 1
            assert hanging_asset_status["inProgressRuns"][0]["runId"] == run_id
            assert len(hanging_asset_status["unstartedRuns"]) == 0

            assert never_runs_asset_status["stepKey"] == "never_runs_asset"
            assert len(never_runs_asset_status["inProgressRuns"]) == 0
            assert len(never_runs_asset_status["unstartedRuns"]) == 1
            assert never_runs_asset_status["unstartedRuns"][0][
                "runId"] == run_id
 def execute(self, gql_query, variable_values=None):
     return execute_dagster_graphql(
         graphql_context,
         gql_query,
         variable_values,
     ).data
Beispiel #6
0
def test_type_rendering(graphql_context):
    selector = infer_pipeline_selector(graphql_context, "more_complicated_nested_config")
    result = execute_dagster_graphql(graphql_context, TYPE_RENDER_QUERY, {"selector": selector})
    assert not result.errors
    assert result.data
Beispiel #7
0
    def test_load_workspace(self, graphql_context):
        # Add an error origin
        original_origins = location_origins_from_yaml_paths(
            [file_relative_path(__file__, "multi_location.yaml")]
        )
        with mock.patch(
            "dagster.core.workspace.load_target.location_origins_from_yaml_paths",
        ) as origins_mock:
            original_origins.append(
                ManagedGrpcPythonEnvRepositoryLocationOrigin(
                    location_name="error_location",
                    loadable_target_origin=LoadableTargetOrigin(
                        python_file="made_up_file.py", executable_path=sys.executable
                    ),
                )
            )

            origins_mock.return_value = original_origins

            reload_time = time.time()

            new_context = graphql_context.reload_workspace()

            result = execute_dagster_graphql(new_context, WORKSPACE_QUERY)

            assert result
            assert result.data
            assert result.data["workspaceOrError"]
            assert result.data["workspaceOrError"]["__typename"] == "Workspace", str(result.data)

            nodes = result.data["workspaceOrError"]["locationEntries"]

            assert len(nodes) == 3

            assert all([node["__typename"] == "WorkspaceLocationEntry" for node in nodes]), str(
                nodes
            )

            success_nodes = [
                node
                for node in nodes
                if node["locationOrLoadError"]["__typename"] == "RepositoryLocation"
            ]
            assert len(success_nodes) == 2

            failures = [
                node for node in nodes if node["locationOrLoadError"]["__typename"] == "PythonError"
            ]
            assert len(failures) == 1
            failure_node = failures[0]

            assert failure_node["name"] == "error_location"
            assert failure_node["loadStatus"] == "LOADED"
            assert "No such file or directory" in failure_node["locationOrLoadError"]["message"]

            for node in nodes:
                assert node["loadStatus"] == "LOADED"
                update_time = node["updatedTimestamp"]
                assert update_time >= reload_time and update_time <= time.time()

                metadatas = node["displayMetadata"]
                metadata_dict = {metadata["key"]: metadata["value"] for metadata in metadatas}

                assert (
                    "python_file" in metadata_dict
                    or "module_name" in metadata_dict
                    or "package_name" in metadata_dict
                )
Beispiel #8
0
    def test_reload_workspace(self, graphql_context):
        result = execute_dagster_graphql(graphql_context,
                                         RELOAD_WORKSPACE_QUERY)

        assert result
        assert result.data
        assert result.data["reloadWorkspace"]
        assert result.data["reloadWorkspace"][
            "__typename"] == "RepositoryLocationConnection"

        nodes = result.data["reloadWorkspace"]["nodes"]

        assert len(nodes) == 2

        assert all(
            [node["__typename"] == "RepositoryLocation" for node in nodes])

        original_origins = location_origins_from_yaml_paths(
            [file_relative_path(__file__, "multi_location.yaml")])

        # simulate removing all the origins
        with mock.patch(
                "dagster.cli.workspace.cli_target.location_origins_from_yaml_paths",
        ) as origins_mock:

            # simulate removing an origin, reload

            origins_mock.return_value = original_origins[0:1]
            result = execute_dagster_graphql(graphql_context,
                                             RELOAD_WORKSPACE_QUERY)

            assert result
            assert result.data
            assert result.data["reloadWorkspace"]
            assert result.data["reloadWorkspace"][
                "__typename"] == "RepositoryLocationConnection"

            nodes = result.data["reloadWorkspace"]["nodes"]

            assert len(nodes) == 1

            assert all([
                node["__typename"] == "RepositoryLocation"
                and node["loadStatus"] == "LOADED" for node in nodes
            ])

            # Simulate adding an origin with an error, reload

            original_origins.append(
                ManagedGrpcPythonEnvRepositoryLocationOrigin(
                    location_name="error_location",
                    loadable_target_origin=LoadableTargetOrigin(
                        python_file="made_up_file.py",
                        executable_path=sys.executable),
                ))

            origins_mock.return_value = original_origins

            result = execute_dagster_graphql(graphql_context,
                                             RELOAD_WORKSPACE_QUERY)

            assert result
            assert result.data
            assert result.data["reloadWorkspace"]
            assert result.data["reloadWorkspace"][
                "__typename"] == "RepositoryLocationConnection"

            nodes = result.data["reloadWorkspace"]["nodes"]
            assert len(nodes) == 3

            assert (len([
                node for node in nodes
                if node["__typename"] == "RepositoryLocation"
                and node["loadStatus"] == "LOADED"
            ]) == 2)
            failures = [
                node for node in nodes
                if node["__typename"] == "RepositoryLocationLoadFailure"
            ]
            assert len(failures) == 1
            assert failures[0]["name"] == "error_location"
            assert failures[0]["loadStatus"] == "LOADED"

            # Add another origin without an error, reload

            original_origins.append(
                original_origins[0]._replace(location_name="location_copy"))
            origins_mock.return_value = original_origins

            result = execute_dagster_graphql(graphql_context,
                                             RELOAD_WORKSPACE_QUERY)

            nodes = result.data["reloadWorkspace"]["nodes"]
            assert len(nodes) == 4

            assert len([
                node for node in nodes
                if node["__typename"] == "RepositoryLocation"
            ]) == 3
            failures = [
                node for node in nodes
                if node["__typename"] == "RepositoryLocationLoadFailure"
            ]
            assert len(failures) == 1

            assert "location_copy" in [node["name"] for node in nodes]
            assert original_origins[0].location_name in [
                node["name"] for node in nodes
            ]

            # Finally, update one of the origins' location names

            original_origins[0] = original_origins[0]._replace(
                location_name="new_location_name")

            result = execute_dagster_graphql(graphql_context,
                                             RELOAD_WORKSPACE_QUERY)

            nodes = result.data["reloadWorkspace"]["nodes"]
            assert len(nodes) == 4

            assert len([
                node for node in nodes
                if node["__typename"] == "RepositoryLocation"
            ]) == 3
            failures = [
                node for node in nodes
                if node["__typename"] == "RepositoryLocationLoadFailure"
            ]
            assert len(failures) == 1

            assert "new_location_name" in [node["name"] for node in nodes]
Beispiel #9
0
def test_query_all_solids(graphql_context, snapshot):
    result = execute_dagster_graphql(graphql_context, all_solids_query())
    snapshot.assert_match(result.data)
Beispiel #10
0
    def test_out_of_process_reload_location(self, graphql_context):
        result = execute_dagster_graphql(graphql_context,
                                         RELOAD_REPOSITORY_LOCATION_QUERY,
                                         {"repositoryLocationName": "test"})

        assert result
        assert result.data
        assert result.data["reloadRepositoryLocation"]
        assert result.data["reloadRepositoryLocation"][
            "__typename"] == "RepositoryLocation"
        assert result.data["reloadRepositoryLocation"]["name"] == "test"
        repositories = result.data["reloadRepositoryLocation"]["repositories"]
        assert len(repositories) == 1
        assert repositories[0]["name"] == "test_repo"

        assert result.data["reloadRepositoryLocation"][
            "isReloadSupported"] is True

        with mock.patch(
                # note it where the function is *used* that needs to mocked, not
                # where it is defined.
                # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
                "dagster.core.host_representation.repository_location.sync_list_repositories_grpc"
        ) as cli_command_mock:

            with mock.patch(
                    # note it where the function is *used* that needs to mocked, not
                    # where it is defined.
                    # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
                    "dagster.core.host_representation.repository_location.sync_get_streaming_external_repositories_data_grpc"
            ) as external_repository_mock:

                @repository
                def new_repo():
                    return []

                new_repo_data = external_repository_data_from_def(new_repo)

                external_repository_mock.return_value = {
                    "new_repo": new_repo_data
                }

                cli_command_mock.return_value = ListRepositoriesResponse(
                    repository_symbols=[],
                    executable_path=sys.executable,
                    repository_code_pointer_dict={
                        "new_repo":
                        CodePointer.from_python_file(__file__, "new_repo",
                                                     None)
                    },
                )

                result = execute_dagster_graphql(
                    graphql_context,
                    RELOAD_REPOSITORY_LOCATION_QUERY,
                    {"repositoryLocationName": "test"},
                )

                assert cli_command_mock.call_count == 1
                assert external_repository_mock.call_count == 1

                repositories = result.data["reloadRepositoryLocation"][
                    "repositories"]
                assert len(repositories) == 1
                assert repositories[0]["name"] == "new_repo"
Beispiel #11
0
    def test_reload_failure(self, graphql_context):
        result = execute_dagster_graphql(graphql_context,
                                         RELOAD_REPOSITORY_LOCATION_QUERY,
                                         {"repositoryLocationName": "test"})

        assert result
        assert result.data
        assert result.data["reloadRepositoryLocation"]
        assert result.data["reloadRepositoryLocation"][
            "__typename"] == "RepositoryLocation"
        assert result.data["reloadRepositoryLocation"]["name"] == "test"
        repositories = result.data["reloadRepositoryLocation"]["repositories"]
        assert len(repositories) == 1
        assert repositories[0]["name"] == "test_repo"
        assert result.data["reloadRepositoryLocation"][
            "isReloadSupported"] is True

        with mock.patch(
                # note it where the function is *used* that needs to mocked, not
                # where it is defined.
                # see https://docs.python.org/3/library/unittest.mock.html#where-to-patch
                "dagster.core.host_representation.repository_location.sync_list_repositories_grpc"
        ) as cli_command_mock:
            cli_command_mock.side_effect = Exception(
                "Mocked repository load failure")

            result = execute_dagster_graphql(
                graphql_context,
                RELOAD_REPOSITORY_LOCATION_QUERY,
                {"repositoryLocationName": "test"},
            )

            assert result
            assert result.data
            assert result.data["reloadRepositoryLocation"]
            assert (result.data["reloadRepositoryLocation"]["__typename"] ==
                    "RepositoryLocationLoadFailure")
            assert result.data["reloadRepositoryLocation"]["name"] == "test"
            assert (
                "Mocked repository load failure"
                in result.data["reloadRepositoryLocation"]["error"]["message"])

            # Verify failure is idempotent
            result = execute_dagster_graphql(
                graphql_context,
                RELOAD_REPOSITORY_LOCATION_QUERY,
                {"repositoryLocationName": "test"},
            )

            assert result
            assert result.data
            assert result.data["reloadRepositoryLocation"]
            assert (result.data["reloadRepositoryLocation"]["__typename"] ==
                    "RepositoryLocationLoadFailure")
            assert result.data["reloadRepositoryLocation"]["name"] == "test"
            assert (
                "Mocked repository load failure"
                in result.data["reloadRepositoryLocation"]["error"]["message"])

        # can be reloaded again successfully
        result = execute_dagster_graphql(
            graphql_context,
            RELOAD_REPOSITORY_LOCATION_QUERY,
            {"repositoryLocationName": "test"},
        )

        assert result
        assert result.data
        assert result.data["reloadRepositoryLocation"]
        assert result.data["reloadRepositoryLocation"][
            "__typename"] == "RepositoryLocation"
        assert result.data["reloadRepositoryLocation"]["name"] == "test"
        assert result.data["reloadRepositoryLocation"][
            "loadStatus"] == "LOADED"
        repositories = result.data["reloadRepositoryLocation"]["repositories"]
        assert len(repositories) == 1
        assert repositories[0]["name"] == "test_repo"
        assert result.data["reloadRepositoryLocation"][
            "isReloadSupported"] is True
    def test_launch_from_failure(self, graphql_context):
        repository_selector = infer_repository_selector(graphql_context)
        partition_set_selector = {
            "repositorySelector": repository_selector,
            "partitionSetName": "chained_integer_partition",
        }

        # trigger failure in the conditionally_fail solid

        output_file = os.path.join(
            get_system_temp_directory(),
            "chained_failure_pipeline_conditionally_fail")
        try:
            with open(output_file, "w"):
                result = execute_dagster_graphql_and_finish_runs(
                    graphql_context,
                    LAUNCH_PARTITION_BACKFILL_MUTATION,
                    variables={
                        "backfillParams": {
                            "selector": partition_set_selector,
                            "partitionNames": ["2", "3"],
                        }
                    },
                )
        finally:
            os.remove(output_file)

        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"

        # re-execute from failure (without the failure file)
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": partition_set_selector,
                    "partitionNames": ["2", "3"],
                    "fromFailure": True,
                }
            },
        )

        assert not result.errors
        assert result.data
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "PartitionBackfillSuccess"
        backfill_id = result.data["launchPartitionBackfill"]["backfillId"]

        result = execute_dagster_graphql(graphql_context,
                                         PARTITION_PROGRESS_QUERY,
                                         variables={"backfillId": backfill_id})
        assert not result.errors
        assert result.data
        assert result.data["partitionBackfillOrError"][
            "__typename"] == "PartitionBackfill"
        assert result.data["partitionBackfillOrError"]["status"] == "REQUESTED"
        assert result.data["partitionBackfillOrError"]["isPersisted"]
        assert result.data["partitionBackfillOrError"]["numRequested"] == 0
        assert result.data["partitionBackfillOrError"]["numTotal"] == 2
        assert result.data["partitionBackfillOrError"]["fromFailure"]
Beispiel #13
0
def test_execute_hammer_through_dagit():
    recon_repo = ReconstructableRepository.for_file(
        file_relative_path(
            __file__, '../../../dagster-test/dagster_test/toys/hammer.py'),
        'hammer_pipeline',
    )
    instance = DagsterInstance.local_temp()

    context = DagsterGraphQLContext(
        locations=[InProcessRepositoryLocation(recon_repo)],
        instance=instance,
    )

    selector = infer_pipeline_selector(context, 'hammer_pipeline')

    executor = SyncExecutor()

    variables = {
        'executionParams': {
            'runConfigData': {
                'storage': {
                    'filesystem': {}
                },
                'execution': {
                    'dask': {
                        'config': {
                            'cluster': {
                                'local': {}
                            }
                        }
                    }
                },
            },
            'selector': selector,
            'mode': 'default',
        }
    }

    start_pipeline_result = graphql(
        request_string=LAUNCH_PIPELINE_EXECUTION_MUTATION,
        schema=create_schema(),
        context=context,
        variables=variables,
        executor=executor,
    )

    if start_pipeline_result.errors:
        raise Exception('{}'.format(start_pipeline_result.errors))

    run_id = start_pipeline_result.data['launchPipelineExecution']['run'][
        'runId']

    context.drain_outstanding_executions()

    subscription = execute_dagster_graphql(context,
                                           SUBSCRIPTION_QUERY,
                                           variables={'runId': run_id})

    subscribe_results = []
    subscription.subscribe(subscribe_results.append)

    messages = [
        x['__typename']
        for x in subscribe_results[0].data['pipelineRunLogs']['messages']
    ]

    assert 'PipelineStartEvent' in messages
    assert 'PipelineSuccessEvent' in messages
Beispiel #14
0
def test_basic_execute_plan_with_materialization(graphql_context):
    selector = infer_pipeline_selector(graphql_context, 'csv_hello_world')
    with get_temp_file_name() as out_csv_path:

        run_config = {
            'solids': {
                'sum_solid': {
                    'inputs': {
                        'num': file_relative_path(__file__, '../data/num.csv')
                    },
                    'outputs': [{
                        'result': out_csv_path
                    }],
                }
            }
        }

        result = execute_dagster_graphql(
            graphql_context,
            EXECUTION_PLAN_QUERY,
            variables={
                'pipeline': selector,
                'runConfigData': run_config,
                'mode': 'default',
            },
        )

        steps_data = result.data['executionPlanOrError']['steps']

        assert set([step_data['key'] for step_data in steps_data]) == set([
            'sum_solid.compute',
            'sum_sq_solid.compute',
        ])

        instance = graphql_context.instance

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=csv_hello_world, run_config=run_config)

        result = execute_dagster_graphql(
            graphql_context,
            EXECUTE_PLAN_QUERY,
            variables={
                'executionParams': {
                    'selector': selector,
                    'runConfigData': run_config,
                    'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'],
                    'executionMetadata': {
                        'runId': pipeline_run.run_id
                    },
                    'mode': 'default',
                },
            },
        )

        assert result.data

        step_mat_event = None

        for message in result.data['executePlan']['stepEvents']:
            if message['__typename'] == 'StepMaterializationEvent':
                # ensure only one event
                assert step_mat_event is None
                step_mat_event = message

        # ensure only one event
        assert step_mat_event
        assert step_mat_event['materialization']
        assert len(step_mat_event['materialization']['metadataEntries']) == 1
        metadata_entry = step_mat_event['materialization']['metadataEntries'][
            0]
        assert metadata_entry['path'] == out_csv_path
def test_basic_execute_plan_with_materialization():
    with get_temp_file_name() as out_csv_path:

        environment_dict = {
            'solids': {
                'sum_solid': {
                    'inputs': {
                        'num': file_relative_path(__file__, '../data/num.csv')
                    },
                    'outputs': [{
                        'result': out_csv_path
                    }],
                }
            }
        }

        instance = DagsterInstance.ephemeral()

        result = execute_dagster_graphql(
            define_test_context(instance=instance),
            EXECUTION_PLAN_QUERY,
            variables={
                'pipeline': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData': environment_dict,
                'mode': 'default',
            },
        )

        steps_data = result.data['executionPlan']['steps']

        assert [step_data['key'] for step_data in steps_data] == [
            'sum_solid.compute',
            'sum_sq_solid.compute',
        ]

        pipeline_run = instance.create_run_for_pipeline(
            pipeline_def=csv_hello_world, environment_dict=environment_dict)

        result = execute_dagster_graphql(
            define_test_context(instance=instance),
            EXECUTE_PLAN_QUERY,
            variables={
                'executionParams': {
                    'selector': {
                        'name': 'csv_hello_world'
                    },
                    'environmentConfigData': environment_dict,
                    'stepKeys': ['sum_solid.compute', 'sum_sq_solid.compute'],
                    'executionMetadata': {
                        'runId': pipeline_run.run_id
                    },
                    'mode': 'default',
                }
            },
        )

        assert result.data

        step_mat_event = None

        for message in result.data['executePlan']['stepEvents']:
            if message['__typename'] == 'StepMaterializationEvent':
                # ensure only one event
                assert step_mat_event is None
                step_mat_event = message

        # ensure only one event
        assert step_mat_event
        assert step_mat_event['materialization']
        assert len(step_mat_event['materialization']['metadataEntries']) == 1
        metadata_entry = step_mat_event['materialization']['metadataEntries'][
            0]
        assert metadata_entry['path'] == out_csv_path
Beispiel #16
0
def test_query_get_solid_exists(graphql_context):
    result = execute_dagster_graphql(graphql_context, get_solid_query_exists())

    assert not result.errors
    assert result.data['usedSolid']['definition']['name'] == 'sum_solid'
Beispiel #17
0
 def test_run_not_found(self, graphql_context):
     result = execute_dagster_graphql(graphql_context,
                                      RUN_CANCELLATION_QUERY,
                                      variables={"runId": "nope"})
     assert result.data["terminatePipelineExecution"][
         "__typename"] == "PipelineRunNotFoundError"
Beispiel #18
0
 def test_run_not_found(self, graphql_context):
     result = execute_dagster_graphql(graphql_context,
                                      RUN_CANCELLATION_QUERY,
                                      variables={'runId': 'nope'})
     assert result.data['terminatePipelineExecution'][
         '__typename'] == 'PipelineRunNotFoundError'
Beispiel #19
0
    def test_basic_start_pipeline_execution_with_preset_failure(self, graphql_context):
        subset_selector = infer_pipeline_selector(
            graphql_context, "csv_hello_world", ["sum_sq_solid"]
        )

        result = execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={"executionParams": {"selector": subset_selector, "preset": "test_inline",}},
        )

        # while illegally defining selector.solid_selection
        assert not result.errors
        assert result.data
        assert (
            result.data["launchPipelineExecution"]["__typename"]
            == "ConflictingExecutionParamsError"
        )
        assert (
            result.data["launchPipelineExecution"]["message"]
            == "Invalid ExecutionParams. Cannot define selector.solid_selection when using a preset."
        )

        # while illegally defining runConfigData
        selector = infer_pipeline_selector(graphql_context, "csv_hello_world")
        result = execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "selector": selector,
                    "preset": "test_inline",
                    "runConfigData": csv_hello_world_solids_config(),
                }
            },
        )

        assert not result.errors
        assert result.data
        assert (
            result.data["launchPipelineExecution"]["__typename"]
            == "ConflictingExecutionParamsError"
        )
        assert (
            result.data["launchPipelineExecution"]["message"]
            == "Invalid ExecutionParams. Cannot define runConfigData when using a preset."
        )

        # while illegally defining mode
        result = execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                "executionParams": {
                    "selector": selector,
                    "preset": "test_inline",
                    "mode": "default",
                }
            },
        )

        assert not result.errors
        assert result.data
        assert (
            result.data["launchPipelineExecution"]["__typename"]
            == "ConflictingExecutionParamsError"
        )
        assert (
            result.data["launchPipelineExecution"]["message"]
            == "Invalid ExecutionParams. Cannot define mode when using a preset."
        )
Beispiel #20
0
    def test_get_runs_over_graphql(self, graphql_context):
        # This include needs to be here because its inclusion screws up
        # other code in this file which reads itself to load a repo
        from .utils import sync_execute_get_run_log_data

        selector = infer_pipeline_selector(graphql_context,
                                           "multi_mode_with_resources")

        payload_one = sync_execute_get_run_log_data(
            context=graphql_context,
            variables={
                "executionParams": {
                    "selector": selector,
                    "mode": "add_mode",
                    "runConfigData": {
                        "resources": {
                            "op": {
                                "config": 2
                            }
                        }
                    },
                    "executionMetadata": {
                        "tags": [{
                            "key": "fruit",
                            "value": "apple"
                        }]
                    },
                }
            },
        )
        run_id_one = payload_one["run"]["runId"]

        read_context = graphql_context

        result = execute_dagster_graphql(read_context,
                                         RUNS_QUERY,
                                         variables={"selector": selector})

        runs = result.data["pipelineOrError"]["runs"]
        assert len(runs) == 1

        tags = runs[0]["tags"]
        assert len(tags) == 1

        assert tags[0]["key"] == "fruit"
        assert tags[0]["value"] == "apple"

        payload_two = sync_execute_get_run_log_data(
            context=graphql_context,
            variables={
                "executionParams": {
                    "selector": selector,
                    "mode": "add_mode",
                    "runConfigData": {
                        "resources": {
                            "op": {
                                "config": 3
                            }
                        }
                    },
                    "executionMetadata": {
                        "tags": [{
                            "key": "veggie",
                            "value": "carrot"
                        }]
                    },
                }
            },
        )

        run_id_two = payload_two["run"]["runId"]

        result = execute_dagster_graphql(read_context,
                                         RUNS_QUERY,
                                         variables={"selector": selector})

        runs = result.data["pipelineOrError"]["runs"]
        assert len(runs) == 2

        all_tags_result = execute_dagster_graphql(read_context, ALL_TAGS_QUERY)
        tags = all_tags_result.data["pipelineRunTags"]

        assert len(tags) == 2
        tags_dict = {item["key"]: item["values"] for item in tags}

        assert tags_dict == {
            "fruit": ["apple"],
            "veggie": ["carrot"],
        }

        # delete the second run
        result = execute_dagster_graphql(read_context,
                                         DELETE_RUN_MUTATION,
                                         variables={"runId": run_id_two})
        assert result.data["deletePipelineRun"][
            "__typename"] == "DeletePipelineRunSuccess"
        assert result.data["deletePipelineRun"]["runId"] == run_id_two

        # query it back out
        result = execute_dagster_graphql(read_context,
                                         RUNS_QUERY,
                                         variables={"selector": selector})

        # first is the same
        run_one_data = _get_runs_data(result, run_id_one)
        assert run_one_data

        # second is gone
        run_two_data = _get_runs_data(result, run_id_two)
        assert run_two_data is None

        # try to delete the second run again
        execute_dagster_graphql(read_context,
                                DELETE_RUN_MUTATION,
                                variables={"runId": run_id_two})

        result = execute_dagster_graphql(read_context,
                                         DELETE_RUN_MUTATION,
                                         variables={"runId": run_id_two})
        assert result.data["deletePipelineRun"][
            "__typename"] == "PipelineRunNotFoundError"
Beispiel #21
0
    def test_asset_partitions_in_pipeline(self, graphql_context):
        selector = infer_pipeline_selector(graphql_context, "two_assets_job")
        result = execute_dagster_graphql(
            graphql_context,
            GET_ASSET_PARTITIONS_FROM_KEYS,
            variables={"pipelineSelector": selector},
        )

        assert result.data
        assert result.data["pipelineOrError"]
        assert result.data["pipelineOrError"]["assetNodes"]
        assert len(result.data["pipelineOrError"]["assetNodes"]) == 2
        asset_node = result.data["pipelineOrError"]["assetNodes"][0]
        assert asset_node["partitionKeys"] == []

        selector = infer_pipeline_selector(graphql_context,
                                           "static_partitioned_assets_job")
        result = execute_dagster_graphql(
            graphql_context,
            GET_ASSET_PARTITIONS_FROM_KEYS,
            variables={"pipelineSelector": selector},
        )

        assert result.data
        assert result.data["pipelineOrError"]
        assert result.data["pipelineOrError"]["assetNodes"]
        assert len(result.data["pipelineOrError"]["assetNodes"]) == 2
        asset_node = result.data["pipelineOrError"]["assetNodes"][0]
        assert asset_node["partitionKeys"] and asset_node["partitionKeys"] == [
            "a",
            "b",
            "c",
            "d",
        ]
        asset_node = result.data["pipelineOrError"]["assetNodes"][1]
        assert asset_node["partitionKeys"] and asset_node["partitionKeys"] == [
            "a",
            "b",
            "c",
            "d",
        ]

        selector = infer_pipeline_selector(graphql_context,
                                           "time_partitioned_assets_job")
        result = execute_dagster_graphql(
            graphql_context,
            GET_ASSET_PARTITIONS_FROM_KEYS,
            variables={"pipelineSelector": selector},
        )

        assert result.data
        assert result.data["pipelineOrError"]
        assert result.data["pipelineOrError"]["assetNodes"]
        assert len(result.data["pipelineOrError"]["assetNodes"]) == 2
        asset_node = result.data["pipelineOrError"]["assetNodes"][0]

        # test partition starts at "2021-05-05-01:00". Should be > 100 partition keys
        # since partition is hourly
        assert asset_node["partitionKeys"] and len(
            asset_node["partitionKeys"]) > 100
        assert asset_node["partitionKeys"][0] == "2021-05-05-01:00"
        assert asset_node["partitionKeys"][1] == "2021-05-05-02:00"
Beispiel #22
0
def test_run_groups_over_time():
    with seven.TemporaryDirectory() as tempdir:
        instance = DagsterInstance.local_temp(tempdir=tempdir)

        repo_1 = get_repo_at_time_1()

        full_evolve_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline"), instance=instance).run_id
        foo_run_id = execute_pipeline(repo_1.get_pipeline("foo_pipeline"),
                                      instance=instance).run_id
        evolve_a_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_A"}),
            instance=instance,
        ).run_id
        evolve_b_run_id = execute_pipeline(
            repo_1.get_pipeline("evolving_pipeline").get_pipeline_subset_def(
                {"solid_B"}),
            instance=instance,
        ).run_id

        context_at_time_1 = define_context_for_file(__file__,
                                                    "get_repo_at_time_1",
                                                    instance)

        result = execute_dagster_graphql(context_at_time_1,
                                         ALL_RUN_GROUPS_QUERY)
        assert result.data
        assert "runGroupsOrError" in result.data
        assert "results" in result.data["runGroupsOrError"]
        assert len(result.data["runGroupsOrError"]["results"]) == 4

        t1_runs = {
            run["runId"]: run
            for group in result.data["runGroupsOrError"]["results"]
            for run in group["runs"]
        }

        # test full_evolve_run_id
        assert t1_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        # test foo_run_id
        assert t1_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }

        # test evolve_a_run_id
        assert t1_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }
        assert t1_runs[evolve_a_run_id]["pipelineSnapshotId"]

        # test evolve_b_run_id
        assert t1_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }

        context_at_time_2 = define_context_for_file(__file__,
                                                    "get_repo_at_time_2",
                                                    instance)

        result = execute_dagster_graphql(context_at_time_2,
                                         ALL_RUN_GROUPS_QUERY)
        assert "runGroupsOrError" in result.data
        assert "results" in result.data["runGroupsOrError"]
        assert len(result.data["runGroupsOrError"]["results"]) == 4

        t2_runs = {
            run["runId"]: run
            for group in result.data["runGroupsOrError"]["results"]
            for run in group["runs"]
        }

        # test full_evolve_run_id
        assert t2_runs[full_evolve_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": None,
        }

        # test evolve_a_run_id
        assert t2_runs[evolve_a_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_A"],
        }
        assert t2_runs[evolve_a_run_id]["pipelineSnapshotId"]

        # names same
        assert (t1_runs[full_evolve_run_id]["pipeline"]["name"] ==
                t2_runs[evolve_a_run_id]["pipeline"]["name"])

        # snapshots differ
        assert (t1_runs[full_evolve_run_id]["pipelineSnapshotId"] !=
                t2_runs[evolve_a_run_id]["pipelineSnapshotId"])

        # pipeline name changed
        assert t2_runs[foo_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "foo_pipeline",
            "solidSelection": None,
        }
        # subset no longer valid - b renamed
        assert t2_runs[evolve_b_run_id]["pipeline"] == {
            "__typename": "PipelineSnapshot",
            "name": "evolving_pipeline",
            "solidSelection": ["solid_B"],
        }
Beispiel #23
0
def test_get_all_partition_sets(graphql_context, snapshot):
    result = execute_dagster_graphql(graphql_context, GET_PARTITION_SETS_QUERY)

    assert result.data
    snapshot.assert_match(result.data)
def test_successful_pipeline_reexecution(snapshot):
    def sanitize_result_data(result_data):
        if isinstance(result_data, dict):
            if 'path' in result_data:
                result_data['path'] = 'DUMMY_PATH'
            result_data = {
                k: sanitize_result_data(v)
                for k, v in result_data.items()
            }
        elif isinstance(result_data, list):
            for i in range(len(result_data)):
                result_data[i] = sanitize_result_data(result_data[i])
        else:
            pass
        return result_data

    run_id = make_new_run_id()
    instance = DagsterInstance.ephemeral()
    result_one = execute_dagster_graphql(
        define_test_context(instance=instance),
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'executionMetadata': {
                    'runId': run_id
                },
                'mode': 'default',
            }
        },
    )

    assert (result_one.data['startPipelineExecution']['__typename'] ==
            'StartPipelineExecutionSuccess')

    snapshot.assert_match(sanitize_result_data(result_one.data))

    expected_value_repr = (
        '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), '''
        '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), '''
        '''('sum_sq', 49)])]''')

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        run_id)
    assert store.has_intermediate(None, 'sum_solid.compute')
    assert store.has_intermediate(None, 'sum_sq_solid.compute')
    assert (str(
        store.get_intermediate(None, 'sum_sq_solid.compute',
                               PoorMansDataFrame).obj) == expected_value_repr)

    new_run_id = make_new_run_id()

    result_two = execute_dagster_graphql(
        define_test_context(instance=instance),
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'stepKeys': ['sum_sq_solid.compute'],
                'executionMetadata': {
                    'runId': new_run_id
                },
                'mode': 'default',
                'retryRunId': run_id,
            }
        },
    )

    query_result = result_two.data['startPipelineExecution']
    assert query_result['__typename'] == 'StartPipelineExecutionSuccess'
    logs = query_result['run']['logs']['nodes']

    assert isinstance(logs, list)
    assert has_event_of_type(logs, 'PipelineStartEvent')
    assert has_event_of_type(logs, 'PipelineSuccessEvent')
    assert not has_event_of_type(logs, 'PipelineFailureEvent')

    assert not get_step_output_event(logs, 'sum_solid.compute')
    assert get_step_output_event(logs, 'sum_sq_solid.compute')

    snapshot.assert_match(sanitize_result_data(result_two.data))

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        new_run_id)
    assert not store.has_intermediate(None, 'sum_solid.inputs.num.read',
                                      'input_thunk_output')
    assert store.has_intermediate(None, 'sum_solid.compute')
    assert store.has_intermediate(None, 'sum_sq_solid.compute')
    assert (str(
        store.get_intermediate(None, 'sum_sq_solid.compute',
                               PoorMansDataFrame).obj) == expected_value_repr)
Beispiel #25
0
    def test_get_partition_status(self, graphql_context):
        repository_selector = infer_repository_selector(graphql_context)
        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": {
                        "repositorySelector": repository_selector,
                        "partitionSetName": "integer_partition",
                    },
                    "partitionNames": ["2", "3"],
                    "forceSynchronousSubmission": True,
                }
            },
        )
        assert not result.errors
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "LaunchBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 2

        result = execute_dagster_graphql(
            graphql_context,
            query=GET_PARTITION_SET_STATUS_QUERY,
            variables={
                "partitionSetName": "integer_partition",
                "repositorySelector": repository_selector,
            },
        )
        assert not result.errors
        assert result.data
        partitionStatuses = result.data["partitionSetOrError"][
            "partitionStatusesOrError"]["results"]
        assert len(partitionStatuses) == 10
        for partitionStatus in partitionStatuses:
            if partitionStatus["partitionName"] in ("2", "3"):
                assert partitionStatus["runStatus"] == "SUCCESS"
            else:
                assert partitionStatus["runStatus"] is None

        result = execute_dagster_graphql_and_finish_runs(
            graphql_context,
            LAUNCH_PARTITION_BACKFILL_MUTATION,
            variables={
                "backfillParams": {
                    "selector": {
                        "repositorySelector": repository_selector,
                        "partitionSetName": "integer_partition",
                    },
                    "partitionNames": [str(num) for num in range(10)],
                    "forceSynchronousSubmission": True,
                }
            },
        )
        assert not result.errors
        assert result.data["launchPartitionBackfill"][
            "__typename"] == "LaunchBackfillSuccess"
        assert len(
            result.data["launchPartitionBackfill"]["launchedRunIds"]) == 10

        result = execute_dagster_graphql(
            graphql_context,
            query=GET_PARTITION_SET_STATUS_QUERY,
            variables={
                "partitionSetName": "integer_partition",
                "repositorySelector": repository_selector,
            },
        )
        assert not result.errors
        assert result.data
        partitionStatuses = result.data["partitionSetOrError"][
            "partitionStatusesOrError"]["results"]
        assert len(partitionStatuses) == 10
        for partitionStatus in partitionStatuses:
            assert partitionStatus["runStatus"] == "SUCCESS"
    def test_basic_start_pipeline_execution_with_preset_failure(self, graphql_context):
        subset_selector = infer_pipeline_selector(
            graphql_context, 'csv_hello_world', ['sum_sq_solid']
        )

        result = execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={'executionParams': {'selector': subset_selector, 'preset': 'test_inline',}},
        )

        # while illegally defining selector.solid_selection
        assert not result.errors
        assert result.data
        assert (
            result.data['launchPipelineExecution']['__typename']
            == 'ConflictingExecutionParamsError'
        )
        assert (
            result.data['launchPipelineExecution']['message']
            == 'Invalid ExecutionParams. Cannot define selector.solid_selection when using a preset.'
        )

        # while illegally defining runConfigData
        selector = infer_pipeline_selector(graphql_context, 'csv_hello_world')
        result = execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                'executionParams': {
                    'selector': selector,
                    'preset': 'test_inline',
                    'runConfigData': csv_hello_world_solids_config(),
                }
            },
        )

        assert not result.errors
        assert result.data
        assert (
            result.data['launchPipelineExecution']['__typename']
            == 'ConflictingExecutionParamsError'
        )
        assert (
            result.data['launchPipelineExecution']['message']
            == 'Invalid ExecutionParams. Cannot define runConfigData when using a preset.'
        )

        # while illegally defining mode
        result = execute_dagster_graphql(
            graphql_context,
            LAUNCH_PIPELINE_EXECUTION_MUTATION,
            variables={
                'executionParams': {
                    'selector': selector,
                    'preset': 'test_inline',
                    'mode': 'default',
                }
            },
        )

        assert not result.errors
        assert result.data
        assert (
            result.data['launchPipelineExecution']['__typename']
            == 'ConflictingExecutionParamsError'
        )
        assert (
            result.data['launchPipelineExecution']['message']
            == 'Invalid ExecutionParams. Cannot define mode when using a preset.'
        )
Beispiel #27
0
def test_pipeline_reexecution_info_query(snapshot):
    context = define_context()

    run_id = str(uuid.uuid4())
    execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'executionMetadata': {
                    'runId': run_id
                },
                'mode': 'default',
            }
        },
    )

    new_run_id = str(uuid.uuid4())
    execute_dagster_graphql(
        context,
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'stepKeys': ['sum_sq_solid.compute'],
                'executionMetadata': {
                    'runId': new_run_id
                },
                'mode': 'default',
            },
            'reexecutionConfig': {
                'previousRunId':
                run_id,
                'stepOutputHandles': [{
                    'stepKey': 'sum_solid.compute',
                    'outputName': 'result'
                }],
            },
        },
    )

    result_one = execute_dagster_graphql(context,
                                         PIPELINE_REEXECUTION_INFO_QUERY,
                                         variables={'runId': run_id})
    query_result_one = result_one.data['pipelineRunOrError']
    assert query_result_one['__typename'] == 'PipelineRun'
    assert query_result_one['stepKeysToExecute'] is None

    result_two = execute_dagster_graphql(context,
                                         PIPELINE_REEXECUTION_INFO_QUERY,
                                         variables={'runId': new_run_id})
    query_result_two = result_two.data['pipelineRunOrError']
    assert query_result_two['__typename'] == 'PipelineRun'
    stepKeysToExecute = query_result_two['stepKeysToExecute']
    assert stepKeysToExecute is not None
    snapshot.assert_match(stepKeysToExecute)
def test_successful_two_part_execute_plan(snapshot):
    instance = DagsterInstance.local_temp()
    environment_dict = csv_hello_world_solids_config_fs_storage()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=csv_hello_world, environment_dict=environment_dict)
    result_one = execute_dagster_graphql(
        define_test_context(instance=instance),
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData': environment_dict,
                'stepKeys': ['sum_solid.compute'],
                'executionMetadata': {
                    'runId': pipeline_run.run_id
                },
                'mode': 'default',
            }
        },
    )

    assert result_one.data['executePlan']['__typename'] == 'ExecutePlanSuccess'

    snapshot.assert_match(clean_log_messages(result_one.data))

    result_two = execute_dagster_graphql(
        define_test_context(instance=instance),
        EXECUTE_PLAN_QUERY,
        variables={
            'executionParams': {
                'selector': {
                    'name': 'csv_hello_world'
                },
                'environmentConfigData':
                csv_hello_world_solids_config_fs_storage(),
                'stepKeys': ['sum_sq_solid.compute'],
                'executionMetadata': {
                    'runId': pipeline_run.run_id
                },
                'mode': 'default',
            }
        },
    )

    query_result = result_two.data['executePlan']
    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'csv_hello_world'
    assert query_result['hasFailures'] is False
    step_events = query_result['stepEvents']
    assert [se['__typename'] for se in step_events] == [
        'ExecutionStepStartEvent',
        'ObjectStoreOperationEvent',
        'ExecutionStepInputEvent',
        'ExecutionStepOutputEvent',
        'ObjectStoreOperationEvent',
        'ExecutionStepSuccessEvent',
    ]
    assert step_events[0]['step']['key'] == 'sum_sq_solid.compute'
    assert step_events[1]['step']['key'] == 'sum_sq_solid.compute'
    assert step_events[2]['step']['key'] == 'sum_sq_solid.compute'
    assert step_events[3]['outputName'] == 'result'
    assert step_events[4]['step']['key'] == 'sum_sq_solid.compute'

    snapshot.assert_match(clean_log_messages(result_two.data))

    expected_value_repr = (
        '''[OrderedDict([('num1', '1'), ('num2', '2'), ('sum', 3), '''
        '''('sum_sq', 9)]), OrderedDict([('num1', '3'), ('num2', '4'), ('sum', 7), '''
        '''('sum_sq', 49)])]''')

    store = build_fs_intermediate_store(instance.intermediates_directory,
                                        pipeline_run.run_id)
    assert store.has_intermediate(None, 'sum_sq_solid.compute')
    assert (str(
        store.get_intermediate(None, 'sum_sq_solid.compute',
                               PoorMansDataFrame).obj) == expected_value_repr)
Beispiel #29
0
def test_sensor_tick_range(graphql_context):
    external_repository = graphql_context.get_repository_location(
        main_repo_location_name()).get_repository(main_repo_name())
    graphql_context.instance.reconcile_scheduler_state(external_repository)

    sensor_name = "always_no_config_sensor"
    external_sensor = external_repository.get_external_sensor(sensor_name)
    sensor_selector = infer_sensor_selector(graphql_context, sensor_name)

    # test with no job state
    result = execute_dagster_graphql(
        graphql_context,
        GET_SENSOR_TICK_RANGE_QUERY,
        variables={
            "sensorSelector": sensor_selector,
            "dayRange": None,
            "dayOffset": None
        },
    )
    assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 0

    # turn the sensor on
    graphql_context.instance.add_job_state(
        JobState(external_sensor.get_external_origin(), JobType.SENSOR,
                 JobStatus.RUNNING))

    now = pendulum.now("US/Central")
    one = now.subtract(days=2).subtract(hours=1)
    with pendulum.test(one):
        _create_tick(graphql_context.instance)

    two = now.subtract(days=1).subtract(hours=1)
    with pendulum.test(two):
        _create_tick(graphql_context.instance)

    three = now.subtract(hours=1)
    with pendulum.test(three):
        _create_tick(graphql_context.instance)

    result = execute_dagster_graphql(
        graphql_context,
        GET_SENSOR_TICK_RANGE_QUERY,
        variables={
            "sensorSelector": sensor_selector,
            "dayRange": None,
            "dayOffset": None
        },
    )
    assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 3

    result = execute_dagster_graphql(
        graphql_context,
        GET_SENSOR_TICK_RANGE_QUERY,
        variables={
            "sensorSelector": sensor_selector,
            "dayRange": 1,
            "dayOffset": None
        },
    )
    assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1
    assert result.data["sensorOrError"]["sensorState"]["ticks"][0][
        "timestamp"] == three.timestamp()

    result = execute_dagster_graphql(
        graphql_context,
        GET_SENSOR_TICK_RANGE_QUERY,
        variables={
            "sensorSelector": sensor_selector,
            "dayRange": 1,
            "dayOffset": 1
        },
    )
    assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 1
    assert result.data["sensorOrError"]["sensorState"]["ticks"][0][
        "timestamp"] == two.timestamp()

    result = execute_dagster_graphql(
        graphql_context,
        GET_SENSOR_TICK_RANGE_QUERY,
        variables={
            "sensorSelector": sensor_selector,
            "dayRange": 2,
            "dayOffset": None,
        },
    )
    assert len(result.data["sensorOrError"]["sensorState"]["ticks"]) == 2
Beispiel #30
0
def test_smoke_test_dagster_type_system(graphql_context):
    result = execute_dagster_graphql(graphql_context, ALL_RUNTIME_TYPES_QUERY)

    assert not result.errors
    assert result.data