def test_all_step_events(): # pylint: disable=too-many-locals pipeline = reconstructable(define_test_events_pipeline) pipeline_def = pipeline.get_definition() mode = pipeline_def.get_default_mode_name() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode ) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline_def.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': pipeline_run.run_id}, 'stepKeys': [step.key], } } res = execute_query( pipeline.get_reconstructable_repository(), EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): assert 'data' in res, res assert 'executePlan' in res['data'], res assert 'stepEvents' in res['data']['executePlan'], res step_events = res['data']['executePlan']['stepEvents'] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT])) ): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == {DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED}
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, {}, mode=mode) step_levels = execution_plan.topological_step_levels() run_config = RunConfig( executor_config=InProcessExecutorConfig(raise_on_error=False), storage_mode=RunStorageMode.FILESYSTEM, ) unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } step_event_fragment = get_step_event_fragment() log_message_event_fragment = get_log_message_event_fragment() query = '\n'.join( ( PIPELINE_EXECUTION_QUERY_TEMPLATE.format( step_event_fragment=step_event_fragment.include_key, log_message_event_fragment=log_message_event_fragment.include_key, ), step_event_fragment.fragment, log_message_event_fragment.fragment, ) ) event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': run_config.run_id}, 'stepKeys': [step.key], } } pipeline_run_storage = PipelineRunStorage() res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: key = event.step_key + '.' + event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} # build up a dict, incrementing all the event records we've produced in the run storage logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs() for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS ): continue key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() run_config = RunConfig(mode=mode) execution_plan = create_execution_plan(pipeline, {}, run_config=run_config) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'name': pipeline.name }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'mode': mode, 'executionMetadata': { 'runId': run_config.run_id }, 'stepKeys': [step.key], } } instance = DagsterInstance.ephemeral() res = execute_query(handle, START_PIPELINE_EXECUTION_MUTATION, variables, instance=instance) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run'][ 'logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(run_config.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals instance = DagsterInstance.ephemeral() workspace = workspace_from_load_target( PythonFileTarget(__file__, define_test_events_pipeline.__name__, working_directory=None), instance, ) pipeline_def = define_test_events_pipeline() mode = pipeline_def.get_default_mode_name() execution_plan = create_execution_plan(pipeline_def, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { "LogMessageEvent", "PipelineStartEvent", "PipelineSuccessEvent", "PipelineInitFailureEvent", "PipelineFailureEvent", } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { "executionParams": { "selector": { "repositoryLocationName": get_ephemeral_repository_name(pipeline_def.name), "repositoryName": get_ephemeral_repository_name(pipeline_def.name), "pipelineName": pipeline_def.name, }, "runConfigData": { "storage": { "filesystem": {} } }, "mode": mode, "executionMetadata": { "runId": pipeline_run.run_id }, "stepKeys": [step.key], }, } res = execute_query( workspace, EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get("errors"): assert "data" in res, res assert "executePlan" in res["data"], res assert "stepEvents" in res["data"]["executePlan"], res step_events = res["data"]["executePlan"]["stepEvents"] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e["__typename"] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + "." + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res["errors"]) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or (DagsterEventType( log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == { DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED }