Esempio n. 1
0
def execute_execute_plan_mutation(handle, variables, instance_ref=None):
    instance = (
        DagsterInstance.from_ref(instance_ref) if instance_ref else DagsterInstance.ephemeral()
    )
    res = execute_query(
        handle, EXECUTE_PLAN_MUTATION, variables, use_sync_executor=True, instance=instance
    )
    handle_execution_errors(res, 'executePlan')
    return handle_execute_plan_result(res)
Esempio n. 2
0
def execute_start_pipeline_execution_query(handle, variables):
    res = execute_query(
        handle,
        START_PIPELINE_EXECUTION_QUERY,
        variables,
        raise_on_error=True,
        use_sync_executor=True,
    )
    handle_start_pipeline_execution_errors(res)
    return handle_start_pipeline_execution_result(res)
Esempio n. 3
0
def query_on_dask_worker(handle, query, variables, dependencies):  # pylint: disable=unused-argument
    '''Note that we need to pass "dependencies" to ensure Dask sequences futures during task
    scheduling, even though we do not use this argument within the function.

    We also pass in 'raise_on_error' here, because otherwise (currently) very little information
    is propagated to the dask master from the workers about the state of execution; we should at
    least inform the user of exceptions.
    '''
    res = execute_query(handle, query, variables, raise_on_error=True)
    handle_errors(res)
    return handle_result(res)
Esempio n. 4
0
def execute_execute_plan_mutation_raw(recon_repo, variables, instance_ref=None):
    """The underlying mutation returns the DagsterEventRecords serialized as strings, rather
    than dict representations of the DagsterEvents, thus "raw". This method in turn returns a
    stream of DagsterEventRecords, not DagsterEvents."""

    instance = (
        DagsterInstance.from_ref(instance_ref) if instance_ref else DagsterInstance.ephemeral()
    )
    res = execute_query(recon_repo, RAW_EXECUTE_PLAN_MUTATION, variables, instance=instance)
    handle_execution_errors(res, "executePlan")
    return handle_execute_plan_result_raw(res)
Esempio n. 5
0
def execute_start_pipeline_execution_mutation(handle, variables, instance_ref=None):
    res = execute_query(
        handle,
        START_PIPELINE_EXECUTION_MUTATION,
        variables,
        use_sync_executor=True,
        instance=DagsterInstance.from_ref(instance_ref)
        if instance_ref
        else DagsterInstance.ephemeral(),
    )
    handle_execution_errors(res, 'startPipelineExecution')
    return handle_start_pipeline_execution_result(res)
Esempio n. 6
0
def test_all_step_events():  # pylint: disable=too-many-locals
    pipeline = reconstructable(define_test_events_pipeline)
    pipeline_def = pipeline.get_definition()
    mode = pipeline_def.get_default_mode_name()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode
    )
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:
            variables = {
                'executionParams': {
                    'selector': {'name': pipeline_def.name},
                    'environmentConfigData': {'storage': {'filesystem': {}}},
                    'mode': mode,
                    'executionMetadata': {'runId': pipeline_run.run_id},
                    'stepKeys': [step.key],
                }
            }
            res = execute_query(
                pipeline.get_reconstructable_repository(),
                EXECUTE_PLAN_MUTATION,
                variables,
                instance=instance,
            )

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                assert 'data' in res, res
                assert 'executePlan' in res['data'], res
                assert 'stepEvents' in res['data']['executePlan'], res
                step_events = res['data']['executePlan']['stepEvents']

                events = [
                    dagster_event_from_dict(e, pipeline_def.name)
                    for e in step_events
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {DagsterEventType(e.event_type_value) for e in events}
            else:
                raise Exception(res['errors'])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (
            DagsterEventType(log.dagster_event.event_type_value)
            not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT]))
        ):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED}
Esempio n. 7
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    run_config = RunConfig(mode=mode)
    execution_plan = create_execution_plan(pipeline, {}, run_config=run_config)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {
                        'name': pipeline.name
                    },
                    'environmentConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                    'mode': mode,
                    'executionMetadata': {
                        'runId': run_config.run_id
                    },
                    'stepKeys': [step.key],
                }
            }
            instance = DagsterInstance.ephemeral()
            res = execute_query(handle,
                                START_PIPELINE_EXECUTION_MUTATION,
                                variables,
                                instance=instance)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run'][
                    'logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name) for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res['errors'])

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = instance.all_logs(run_config.run_id)
            for log in logs:
                if not log.dagster_event or (
                        DagsterEventType(log.dagster_event.event_type_value)
                        not in STEP_EVENTS.union(
                            set([DagsterEventType.ENGINE_EVENT]))):
                    continue
                if log.dagster_event.step_key:
                    key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                else:
                    key = log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events
Esempio n. 8
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline, {}, mode=mode)
    step_levels = execution_plan.topological_step_levels()
    run_config = RunConfig(
        executor_config=InProcessExecutorConfig(raise_on_error=False),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    step_event_fragment = get_step_event_fragment()
    log_message_event_fragment = get_log_message_event_fragment()
    query = '\n'.join(
        (
            PIPELINE_EXECUTION_QUERY_TEMPLATE.format(
                step_event_fragment=step_event_fragment.include_key,
                log_message_event_fragment=log_message_event_fragment.include_key,
            ),
            step_event_fragment.fragment,
            log_message_event_fragment.fragment,
        )
    )

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {'name': pipeline.name},
                    'environmentConfigData': {'storage': {'filesystem': {}}},
                    'mode': mode,
                    'executionMetadata': {'runId': run_config.run_id},
                    'stepKeys': [step.key],
                }
            }

            pipeline_run_storage = PipelineRunStorage()

            res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name)
                    for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    key = event.step_key + '.' + event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {DagsterEventType(e.event_type_value) for e in events}

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs()
            for log in logs:
                if not log.dagster_event or (
                    DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS
                ):
                    continue
                key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events
Esempio n. 9
0
def test_all_step_events():  # pylint: disable=too-many-locals
    instance = DagsterInstance.ephemeral()

    workspace = workspace_from_load_target(
        PythonFileTarget(__file__,
                         define_test_events_pipeline.__name__,
                         working_directory=None),
        instance,
    )
    pipeline_def = define_test_events_pipeline()
    mode = pipeline_def.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline_def, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        "LogMessageEvent",
        "PipelineStartEvent",
        "PipelineSuccessEvent",
        "PipelineInitFailureEvent",
        "PipelineFailureEvent",
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                "executionParams": {
                    "selector": {
                        "repositoryLocationName":
                        get_ephemeral_repository_name(pipeline_def.name),
                        "repositoryName":
                        get_ephemeral_repository_name(pipeline_def.name),
                        "pipelineName":
                        pipeline_def.name,
                    },
                    "runConfigData": {
                        "storage": {
                            "filesystem": {}
                        }
                    },
                    "mode": mode,
                    "executionMetadata": {
                        "runId": pipeline_run.run_id
                    },
                    "stepKeys": [step.key],
                },
            }
            res = execute_query(
                workspace,
                EXECUTE_PLAN_MUTATION,
                variables,
                instance=instance,
            )

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get("errors"):
                assert "data" in res, res
                assert "executePlan" in res["data"], res
                assert "stepEvents" in res["data"]["executePlan"], res
                step_events = res["data"]["executePlan"]["stepEvents"]

                events = [
                    dagster_event_from_dict(e, pipeline_def.name)
                    for e in step_events
                    if e["__typename"] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + "." + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res["errors"])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (DagsterEventType(
                log.dagster_event.event_type_value) not in STEP_EVENTS.union(
                    set([DagsterEventType.ENGINE_EVENT]))):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {
        DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED
    }