def test_success_whole_execution_plan(snapshot): run_id = str(uuid.uuid4()) result = execute_dagster_graphql( define_context(), EXECUTE_PLAN_QUERY, variables={ 'pipelineName': 'pandas_hello_world', 'config': pandas_hello_world_solids_config_fs_storage(), 'stepKeys': None, 'executionMetadata': { 'runId': run_id }, }, ) query_result = result.data['executePlan'] assert query_result['__typename'] == 'ExecutePlanSuccess' assert query_result['pipeline']['name'] == 'pandas_hello_world' assert query_result['hasFailures'] is False step_events = { step_event['step']['key']: step_event for step_event in query_result['stepEvents'] } assert 'sum_solid.transform' in step_events assert 'sum_sq_solid.transform' in step_events snapshot.assert_match(result.data) assert has_filesystem_intermediate(run_id, 'sum_solid.transform') assert has_filesystem_intermediate(run_id, 'sum_sq_solid.transform')
def test_using_file_system_for_subplan_multiprocessing(): pipeline = define_inty_pipeline() environment_dict = {'storage': {'filesystem': {}}} execution_plan = create_execution_plan(pipeline, environment_dict=environment_dict) assert execution_plan.get_step_by_key('return_one.transform') step_keys = ['return_one.transform'] run_id = str(uuid.uuid4()) return_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id, executor_config=MultiprocessExecutorConfig( define_inty_pipeline)), step_keys_to_execute=step_keys, )) assert get_step_output(return_one_step_events, 'return_one.transform') assert has_filesystem_intermediate(run_id, 'return_one.transform') assert get_filesystem_intermediate(run_id, 'return_one.transform', Int) == 1 add_one_step_events = list( execute_plan( execution_plan, environment_dict=environment_dict, run_config=RunConfig(run_id=run_id, executor_config=MultiprocessExecutorConfig( define_inty_pipeline)), step_keys_to_execute=['add_one.transform'], )) assert get_step_output(add_one_step_events, 'add_one.transform') assert has_filesystem_intermediate(run_id, 'add_one.transform') assert get_filesystem_intermediate(run_id, 'add_one.transform', Int) == 2
def test_successful_two_part_execute_plan(snapshot): run_id = str(uuid.uuid4()) result_one = execute_dagster_graphql( define_context(), EXECUTE_PLAN_QUERY, variables={ 'pipelineName': 'pandas_hello_world', 'config': pandas_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_solid.inputs.num.read', 'sum_solid.transform'], 'executionMetadata': { 'runId': run_id }, }, ) assert result_one.data['executePlan']['__typename'] == 'ExecutePlanSuccess' snapshot.assert_match(result_one.data) result_two = execute_dagster_graphql( define_context(), EXECUTE_PLAN_QUERY, variables={ 'pipelineName': 'pandas_hello_world', 'config': pandas_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.transform'], 'executionMetadata': { 'runId': run_id }, }, ) query_result = result_two.data['executePlan'] assert query_result['__typename'] == 'ExecutePlanSuccess' assert query_result['pipeline']['name'] == 'pandas_hello_world' assert query_result['hasFailures'] is False step_events = query_result['stepEvents'] assert step_events[0]['__typename'] == 'ExecutionStepStartEvent' assert step_events[0]['step']['key'] == 'sum_sq_solid.transform' assert step_events[1]['__typename'] == 'ExecutionStepOutputEvent' assert step_events[1]['outputName'] == 'result' assert step_events[2]['__typename'] == 'ExecutionStepSuccessEvent' snapshot.assert_match(result_two.data) expected_value_repr = ''' num1 num2 sum sum_sq 0 1 2 3 9 1 3 4 7 49''' assert has_filesystem_intermediate(run_id, 'sum_sq_solid.transform') assert (str( get_filesystem_intermediate(run_id, 'sum_sq_solid.transform', DataFrame)) == expected_value_repr)
def test_successful_pipeline_reexecution(snapshot): run_id = str(uuid.uuid4()) result_one = execute_dagster_graphql( define_context(), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'pipeline': {'name': 'pandas_hello_world'}, 'config': pandas_hello_world_solids_config_fs_storage(), 'executionMetadata': {'runId': run_id}, }, ) assert ( result_one.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess' ) snapshot.assert_match(result_one.data) expected_value_repr = ''' num1 num2 sum sum_sq 0 1 2 3 9 1 3 4 7 49''' assert has_filesystem_intermediate(run_id, 'sum_solid.inputs.num.read', 'input_thunk_output') assert has_filesystem_intermediate(run_id, 'sum_solid.transform') assert has_filesystem_intermediate(run_id, 'sum_sq_solid.transform') assert ( str(get_filesystem_intermediate(run_id, 'sum_sq_solid.transform', DataFrame)) == expected_value_repr ) new_run_id = str(uuid.uuid4()) result_two = execute_dagster_graphql( define_context(), START_PIPELINE_EXECUTION_SNAPSHOT_QUERY, variables={ 'pipeline': {'name': 'pandas_hello_world'}, 'config': pandas_hello_world_solids_config_fs_storage(), 'stepKeys': ['sum_sq_solid.transform'], 'executionMetadata': {'runId': new_run_id}, 'reexecutionConfig': { 'previousRunId': run_id, 'stepOutputHandles': [{'stepKey': 'sum_solid.transform', 'outputName': 'result'}], }, }, ) query_result = result_two.data['startPipelineExecution'] assert query_result['__typename'] == 'StartPipelineExecutionSuccess' logs = query_result['run']['logs']['nodes'] assert isinstance(logs, list) assert has_event_of_type(logs, 'PipelineStartEvent') assert has_event_of_type(logs, 'PipelineSuccessEvent') assert not has_event_of_type(logs, 'PipelineFailureEvent') assert not get_step_output_event(logs, 'sum_solid.transform') assert get_step_output_event(logs, 'sum_sq_solid.transform') snapshot.assert_match(result_two.data) assert not has_filesystem_intermediate( new_run_id, 'sum_solid.inputs.num.read', 'input_thunk_output' ) assert has_filesystem_intermediate(new_run_id, 'sum_solid.transform') assert has_filesystem_intermediate(new_run_id, 'sum_sq_solid.transform') assert ( str(get_filesystem_intermediate(new_run_id, 'sum_sq_solid.transform', DataFrame)) == expected_value_repr )