def execute_execute_plan_mutation(handle, variables, instance_ref=None): instance = ( DagsterInstance.from_ref(instance_ref) if instance_ref else DagsterInstance.ephemeral() ) res = execute_query( handle, EXECUTE_PLAN_MUTATION, variables, use_sync_executor=True, instance=instance ) handle_execution_errors(res, 'executePlan') return handle_execute_plan_result(res)
def execute_start_pipeline_execution_query(handle, variables): res = execute_query( handle, START_PIPELINE_EXECUTION_QUERY, variables, raise_on_error=True, use_sync_executor=True, ) handle_start_pipeline_execution_errors(res) return handle_start_pipeline_execution_result(res)
def query_on_dask_worker(handle, query, variables, dependencies): # pylint: disable=unused-argument '''Note that we need to pass "dependencies" to ensure Dask sequences futures during task scheduling, even though we do not use this argument within the function. We also pass in 'raise_on_error' here, because otherwise (currently) very little information is propagated to the dask master from the workers about the state of execution; we should at least inform the user of exceptions. ''' res = execute_query(handle, query, variables, raise_on_error=True) handle_errors(res) return handle_result(res)
def execute_execute_plan_mutation_raw(recon_repo, variables, instance_ref=None): """The underlying mutation returns the DagsterEventRecords serialized as strings, rather than dict representations of the DagsterEvents, thus "raw". This method in turn returns a stream of DagsterEventRecords, not DagsterEvents.""" instance = ( DagsterInstance.from_ref(instance_ref) if instance_ref else DagsterInstance.ephemeral() ) res = execute_query(recon_repo, RAW_EXECUTE_PLAN_MUTATION, variables, instance=instance) handle_execution_errors(res, "executePlan") return handle_execute_plan_result_raw(res)
def execute_start_pipeline_execution_mutation(handle, variables, instance_ref=None): res = execute_query( handle, START_PIPELINE_EXECUTION_MUTATION, variables, use_sync_executor=True, instance=DagsterInstance.from_ref(instance_ref) if instance_ref else DagsterInstance.ephemeral(), ) handle_execution_errors(res, 'startPipelineExecution') return handle_start_pipeline_execution_result(res)
def test_all_step_events(): # pylint: disable=too-many-locals pipeline = reconstructable(define_test_events_pipeline) pipeline_def = pipeline.get_definition() mode = pipeline_def.get_default_mode_name() instance = DagsterInstance.ephemeral() execution_plan = create_execution_plan(pipeline, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode ) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline_def.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': pipeline_run.run_id}, 'stepKeys': [step.key], } } res = execute_query( pipeline.get_reconstructable_repository(), EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): assert 'data' in res, res assert 'executePlan' in res['data'], res assert 'stepEvents' in res['data']['executePlan'], res step_events = res['data']['executePlan']['stepEvents'] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union(set([DagsterEventType.ENGINE_EVENT])) ): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == {DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED}
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() run_config = RunConfig(mode=mode) execution_plan = create_execution_plan(pipeline, {}, run_config=run_config) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': { 'name': pipeline.name }, 'environmentConfigData': { 'storage': { 'filesystem': {} } }, 'mode': mode, 'executionMetadata': { 'runId': run_config.run_id }, 'stepKeys': [step.key], } } instance = DagsterInstance.ephemeral() res = execute_query(handle, START_PIPELINE_EXECUTION_MUTATION, variables, instance=instance) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run'][ 'logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + '.' + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res['errors']) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(run_config.run_id) for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals handle = ExecutionTargetHandle.for_pipeline_fn(define_test_events_pipeline) pipeline = handle.build_pipeline_definition() mode = pipeline.get_default_mode_name() execution_plan = create_execution_plan(pipeline, {}, mode=mode) step_levels = execution_plan.topological_step_levels() run_config = RunConfig( executor_config=InProcessExecutorConfig(raise_on_error=False), storage_mode=RunStorageMode.FILESYSTEM, ) unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { 'LogMessageEvent', 'PipelineStartEvent', 'PipelineSuccessEvent', 'PipelineInitFailureEvent', 'PipelineFailureEvent', } step_event_fragment = get_step_event_fragment() log_message_event_fragment = get_log_message_event_fragment() query = '\n'.join( ( PIPELINE_EXECUTION_QUERY_TEMPLATE.format( step_event_fragment=step_event_fragment.include_key, log_message_event_fragment=log_message_event_fragment.include_key, ), step_event_fragment.fragment, log_message_event_fragment.fragment, ) ) event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { 'executionParams': { 'selector': {'name': pipeline.name}, 'environmentConfigData': {'storage': {'filesystem': {}}}, 'mode': mode, 'executionMetadata': {'runId': run_config.run_id}, 'stepKeys': [step.key], } } pipeline_run_storage = PipelineRunStorage() res = execute_query(handle, query, variables, pipeline_run_storage=pipeline_run_storage) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get('errors'): run_logs = res['data']['startPipelineExecution']['run']['logs']['nodes'] events = [ dagster_event_from_dict(e, pipeline.name) for e in run_logs if e['__typename'] not in ignored_events ] for event in events: key = event.step_key + '.' + event.event_type_value event_counts[key] -= 1 unhandled_events -= {DagsterEventType(e.event_type_value) for e in events} # build up a dict, incrementing all the event records we've produced in the run storage logs = pipeline_run_storage.get_run_by_id(run_config.run_id).all_logs() for log in logs: if not log.dagster_event or ( DagsterEventType(log.dagster_event.event_type_value) not in STEP_EVENTS ): continue key = log.dagster_event.step_key + '.' + log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types assert not unhandled_events
def test_all_step_events(): # pylint: disable=too-many-locals instance = DagsterInstance.ephemeral() workspace = workspace_from_load_target( PythonFileTarget(__file__, define_test_events_pipeline.__name__, working_directory=None), instance, ) pipeline_def = define_test_events_pipeline() mode = pipeline_def.get_default_mode_name() execution_plan = create_execution_plan(pipeline_def, mode=mode) pipeline_run = instance.create_run_for_pipeline( pipeline_def=pipeline_def, execution_plan=execution_plan, mode=mode) step_levels = execution_plan.topological_step_levels() unhandled_events = STEP_EVENTS.copy() # Exclude types that are not step events ignored_events = { "LogMessageEvent", "PipelineStartEvent", "PipelineSuccessEvent", "PipelineInitFailureEvent", "PipelineFailureEvent", } event_counts = defaultdict(int) for step_level in step_levels: for step in step_level: variables = { "executionParams": { "selector": { "repositoryLocationName": get_ephemeral_repository_name(pipeline_def.name), "repositoryName": get_ephemeral_repository_name(pipeline_def.name), "pipelineName": pipeline_def.name, }, "runConfigData": { "storage": { "filesystem": {} } }, "mode": mode, "executionMetadata": { "runId": pipeline_run.run_id }, "stepKeys": [step.key], }, } res = execute_query( workspace, EXECUTE_PLAN_MUTATION, variables, instance=instance, ) # go through the same dict, decrement all the event records we've seen from the GraphQL # response if not res.get("errors"): assert "data" in res, res assert "executePlan" in res["data"], res assert "stepEvents" in res["data"]["executePlan"], res step_events = res["data"]["executePlan"]["stepEvents"] events = [ dagster_event_from_dict(e, pipeline_def.name) for e in step_events if e["__typename"] not in ignored_events ] for event in events: if event.step_key: key = event.step_key + "." + event.event_type_value else: key = event.event_type_value event_counts[key] -= 1 unhandled_events -= { DagsterEventType(e.event_type_value) for e in events } else: raise Exception(res["errors"]) # build up a dict, incrementing all the event records we've produced in the run storage logs = instance.all_logs(pipeline_run.run_id) for log in logs: if not log.dagster_event or (DagsterEventType( log.dagster_event.event_type_value) not in STEP_EVENTS.union( set([DagsterEventType.ENGINE_EVENT]))): continue if log.dagster_event.step_key: key = log.dagster_event.step_key + "." + log.dagster_event.event_type_value else: key = log.dagster_event.event_type_value event_counts[key] += 1 # Ensure we've processed all the events that were generated in the run storage assert sum(event_counts.values()) == 0 # Ensure we've handled the universe of event types # Why are these retry events not handled? Because right now there is no way to configure retries # on executePlan -- this needs to change, and we should separate the ExecutionParams that get # sent to executePlan fromm those that get sent to startPipelineExecution and friends assert unhandled_events == { DagsterEventType.STEP_UP_FOR_RETRY, DagsterEventType.STEP_RESTARTED }