Example #1
0
def test_can_handle_all_step_events():
    '''This test is designed to ensure we catch the case when new step events are added, as they
    must be handled by the event parsing, but this does not check that the event parsing works
    correctly.
    '''
    handled = set(HANDLED_EVENTS.values())
    # The distinction between "step events" and "pipeline events" needs to be reexamined
    assert handled == STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT]))
Example #2
0
def test_all_step_events():  # pylint: disable=too-many-locals
    pipeline = reconstructable(define_test_events_pipeline)
    pipeline_def = pipeline.get_definition()
    mode = pipeline_def.get_default_mode_name()
    instance = DagsterInstance.ephemeral()
    execution_plan = create_execution_plan(pipeline, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode
    )
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:
            variables = {
                'executionParams': {
                    'selector': {'name': pipeline_def.name},
                    'environmentConfigData': {'storage': {'filesystem': {}}},
                    'mode': mode,
                    'executionMetadata': {'runId': pipeline_run.run_id},
                    'stepKeys': [step.key],
                }
            }
            res = execute_query(
                pipeline.get_reconstructable_repository(),
                EXECUTE_PLAN_MUTATION,
                variables,
                instance=instance,
            )

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                assert 'data' in res, res
                assert 'executePlan' in res['data'], res
                assert 'stepEvents' in res['data']['executePlan'], res
                step_events = res['data']['executePlan']['stepEvents']

                events = [
                    dagster_event_from_dict(e, pipeline_def.name)
                    for e in step_events
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {DagsterEventType(e.event_type_value) for e in events}
            else:
                raise Exception(res['errors'])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (
            DagsterEventType(log.dagster_event.event_type_value)
            not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT]))
        ):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED}
Example #3
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline, {}, mode=mode)
    step_levels = execution_plan.topological_step_levels()
    run_config = RunConfig(
        executor_config=InProcessExecutorConfig(raise_on_error=False),
        storage_mode=RunStorageMode.FILESYSTEM,
    )

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    step_event_fragment = get_step_event_fragment()
    log_message_event_fragment = get_log_message_event_fragment()
    query = '\n'.join(
        (
            PIPELINE_EXECUTION_QUERY_TEMPLATE.format(
                step_event_fragment=step_event_fragment.include_key,
                log_message_event_fragment=log_message_event_fragment.include_key,
            ),
            step_event_fragment.fragment,
            log_message_event_fragment.fragment,
        )
    )

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {'name': pipeline.name},
                    'environmentConfigData': {'storage': {'filesystem': {}}},
                    'mode': mode,
                    'executionMetadata': {'runId': run_config.run_id},
                    'stepKeys': [step.key],
                }
            }

            pipeline_run_storage = PipelineRunStorage()

            res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name)
                    for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    key = event.step_key + '.' + event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {DagsterEventType(e.event_type_value) for e in events}

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs()
            for log in logs:
                if not log.dagster_event or (
                    DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS
                ):
                    continue
                key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events
Example #4
0
def test_all_step_events():  # pylint: disable=too-many-locals
    handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline)
    pipeline = handle.build_pipeline_definition()
    mode = pipeline.get_default_mode_name()
    run_config = RunConfig(mode=mode)
    execution_plan = create_execution_plan(pipeline, {}, run_config=run_config)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        'LogMessageEvent',
        'PipelineStartEvent',
        'PipelineSuccessEvent',
        'PipelineInitFailureEvent',
        'PipelineFailureEvent',
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                'executionParams': {
                    'selector': {
                        'name': pipeline.name
                    },
                    'environmentConfigData': {
                        'storage': {
                            'filesystem': {}
                        }
                    },
                    'mode': mode,
                    'executionMetadata': {
                        'runId': run_config.run_id
                    },
                    'stepKeys': [step.key],
                }
            }
            instance = DagsterInstance.ephemeral()
            res = execute_query(handle,
                                START_PIPELINE_EXECUTION_MUTATION,
                                variables,
                                instance=instance)

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get('errors'):
                run_logs = res['data']['startPipelineExecution']['run'][
                    'logs']['nodes']

                events = [
                    dagster_event_from_dict(e, pipeline.name) for e in run_logs
                    if e['__typename'] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + '.' + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res['errors'])

            # build up a dict, incrementing all the event records we've produced in the run storage
            logs = instance.all_logs(run_config.run_id)
            for log in logs:
                if not log.dagster_event or (
                        DagsterEventType(log.dagster_event.event_type_value)
                        not in STEP_EVENTS.union(
                            set([DagsterEventType.ENGINE_EVENT]))):
                    continue
                if log.dagster_event.step_key:
                    key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value
                else:
                    key = log.dagster_event.event_type_value
                event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    assert not unhandled_events
Example #5
0
def test_all_step_events():  # pylint: disable=too-many-locals
    instance = DagsterInstance.ephemeral()

    workspace = workspace_from_load_target(
        PythonFileTarget(__file__,
                         define_test_events_pipeline.__name__,
                         working_directory=None),
        instance,
    )
    pipeline_def = define_test_events_pipeline()
    mode = pipeline_def.get_default_mode_name()
    execution_plan = create_execution_plan(pipeline_def, mode=mode)
    pipeline_run = instance.create_run_for_pipeline(
        pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode)
    step_levels = execution_plan.topological_step_levels()

    unhandled_events = STEP_EVENTS.copy()

    # Exclude types that are not step events
    ignored_events = {
        "LogMessageEvent",
        "PipelineStartEvent",
        "PipelineSuccessEvent",
        "PipelineInitFailureEvent",
        "PipelineFailureEvent",
    }

    event_counts = defaultdict(int)

    for step_level in step_levels:
        for step in step_level:

            variables = {
                "executionParams": {
                    "selector": {
                        "repositoryLocationName":
                        get_ephemeral_repository_name(pipeline_def.name),
                        "repositoryName":
                        get_ephemeral_repository_name(pipeline_def.name),
                        "pipelineName":
                        pipeline_def.name,
                    },
                    "runConfigData": {
                        "storage": {
                            "filesystem": {}
                        }
                    },
                    "mode": mode,
                    "executionMetadata": {
                        "runId": pipeline_run.run_id
                    },
                    "stepKeys": [step.key],
                },
            }
            res = execute_query(
                workspace,
                EXECUTE_PLAN_MUTATION,
                variables,
                instance=instance,
            )

            # go through the same dict, decrement all the event records we've seen from the GraphQL
            # response
            if not res.get("errors"):
                assert "data" in res, res
                assert "executePlan" in res["data"], res
                assert "stepEvents" in res["data"]["executePlan"], res
                step_events = res["data"]["executePlan"]["stepEvents"]

                events = [
                    dagster_event_from_dict(e, pipeline_def.name)
                    for e in step_events
                    if e["__typename"] not in ignored_events
                ]

                for event in events:
                    if event.step_key:
                        key = event.step_key + "." + event.event_type_value
                    else:
                        key = event.event_type_value
                    event_counts[key] -= 1
                unhandled_events -= {
                    DagsterEventType(e.event_type_value)
                    for e in events
                }
            else:
                raise Exception(res["errors"])

    # build up a dict, incrementing all the event records we've produced in the run storage
    logs = instance.all_logs(pipeline_run.run_id)
    for log in logs:
        if not log.dagster_event or (DagsterEventType(
                log.dagster_event.event_type_value) not in STEP_EVENTS.union(
                    set([DagsterEventType.ENGINE_EVENT]))):
            continue
        if log.dagster_event.step_key:
            key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value
        else:
            key = log.dagster_event.event_type_value
        event_counts[key] += 1

    # Ensure we've processed all the events that were generated in the run storage
    assert sum(event_counts.values()) == 0

    # Ensure we've handled the universe of event types
    # Why are these retry events not handled? Because right now there is no way to configure retries
    # on executePlan -- this needs to change, and we should separate the ExecutionParams that get
    # sent to executePlan fromm those that get sent to startPipelineExecution and friends
    assert unhandled_events == {
        DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED
    }