コード例 #1
0
def test_success_whole_execution_plan(snapshot):
    run_id = str(uuid.uuid4())
    result = execute_dagster_graphql(
        define_context(),
        EXECUTE_PLAN_QUERY,
        variables={
            'pipelineName': 'pandas_hello_world',
            'config': pandas_hello_world_solids_config_fs_storage(),
            'stepKeys': None,
            'executionMetadata': {
                'runId': run_id
            },
        },
    )

    query_result = result.data['executePlan']

    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'pandas_hello_world'
    assert query_result['hasFailures'] is False
    step_events = {
        step_event['step']['key']: step_event
        for step_event in query_result['stepEvents']
    }
    assert 'sum_solid.transform' in step_events
    assert 'sum_sq_solid.transform' in step_events

    snapshot.assert_match(result.data)
    assert has_filesystem_intermediate(run_id, 'sum_solid.transform')
    assert has_filesystem_intermediate(run_id, 'sum_sq_solid.transform')
コード例 #2
0
def test_using_file_system_for_subplan_multiprocessing():
    pipeline = define_inty_pipeline()

    environment_dict = {'storage': {'filesystem': {}}}

    execution_plan = create_execution_plan(pipeline,
                                           environment_dict=environment_dict)

    assert execution_plan.get_step_by_key('return_one.transform')

    step_keys = ['return_one.transform']

    run_id = str(uuid.uuid4())

    return_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id,
                                 executor_config=MultiprocessExecutorConfig(
                                     define_inty_pipeline)),
            step_keys_to_execute=step_keys,
        ))

    assert get_step_output(return_one_step_events, 'return_one.transform')
    assert has_filesystem_intermediate(run_id, 'return_one.transform')
    assert get_filesystem_intermediate(run_id, 'return_one.transform',
                                       Int) == 1

    add_one_step_events = list(
        execute_plan(
            execution_plan,
            environment_dict=environment_dict,
            run_config=RunConfig(run_id=run_id,
                                 executor_config=MultiprocessExecutorConfig(
                                     define_inty_pipeline)),
            step_keys_to_execute=['add_one.transform'],
        ))

    assert get_step_output(add_one_step_events, 'add_one.transform')
    assert has_filesystem_intermediate(run_id, 'add_one.transform')
    assert get_filesystem_intermediate(run_id, 'add_one.transform', Int) == 2
コード例 #3
0
def test_successful_two_part_execute_plan(snapshot):
    run_id = str(uuid.uuid4())
    result_one = execute_dagster_graphql(
        define_context(),
        EXECUTE_PLAN_QUERY,
        variables={
            'pipelineName': 'pandas_hello_world',
            'config': pandas_hello_world_solids_config_fs_storage(),
            'stepKeys': ['sum_solid.inputs.num.read', 'sum_solid.transform'],
            'executionMetadata': {
                'runId': run_id
            },
        },
    )

    assert result_one.data['executePlan']['__typename'] == 'ExecutePlanSuccess'

    snapshot.assert_match(result_one.data)

    result_two = execute_dagster_graphql(
        define_context(),
        EXECUTE_PLAN_QUERY,
        variables={
            'pipelineName': 'pandas_hello_world',
            'config': pandas_hello_world_solids_config_fs_storage(),
            'stepKeys': ['sum_sq_solid.transform'],
            'executionMetadata': {
                'runId': run_id
            },
        },
    )

    query_result = result_two.data['executePlan']
    assert query_result['__typename'] == 'ExecutePlanSuccess'
    assert query_result['pipeline']['name'] == 'pandas_hello_world'
    assert query_result['hasFailures'] is False
    step_events = query_result['stepEvents']
    assert step_events[0]['__typename'] == 'ExecutionStepStartEvent'
    assert step_events[0]['step']['key'] == 'sum_sq_solid.transform'
    assert step_events[1]['__typename'] == 'ExecutionStepOutputEvent'
    assert step_events[1]['outputName'] == 'result'
    assert step_events[2]['__typename'] == 'ExecutionStepSuccessEvent'

    snapshot.assert_match(result_two.data)

    expected_value_repr = '''   num1  num2  sum  sum_sq
0     1     2    3       9
1     3     4    7      49'''

    assert has_filesystem_intermediate(run_id, 'sum_sq_solid.transform')
    assert (str(
        get_filesystem_intermediate(run_id, 'sum_sq_solid.transform',
                                    DataFrame)) == expected_value_repr)
コード例 #4
0
def test_successful_pipeline_reexecution(snapshot):
    run_id = str(uuid.uuid4())
    result_one = execute_dagster_graphql(
        define_context(),
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'pipeline': {'name': 'pandas_hello_world'},
            'config': pandas_hello_world_solids_config_fs_storage(),
            'executionMetadata': {'runId': run_id},
        },
    )

    assert (
        result_one.data['startPipelineExecution']['__typename'] == 'StartPipelineExecutionSuccess'
    )

    snapshot.assert_match(result_one.data)

    expected_value_repr = '''   num1  num2  sum  sum_sq
0     1     2    3       9
1     3     4    7      49'''

    assert has_filesystem_intermediate(run_id, 'sum_solid.inputs.num.read', 'input_thunk_output')
    assert has_filesystem_intermediate(run_id, 'sum_solid.transform')
    assert has_filesystem_intermediate(run_id, 'sum_sq_solid.transform')
    assert (
        str(get_filesystem_intermediate(run_id, 'sum_sq_solid.transform', DataFrame))
        == expected_value_repr
    )

    new_run_id = str(uuid.uuid4())

    result_two = execute_dagster_graphql(
        define_context(),
        START_PIPELINE_EXECUTION_SNAPSHOT_QUERY,
        variables={
            'pipeline': {'name': 'pandas_hello_world'},
            'config': pandas_hello_world_solids_config_fs_storage(),
            'stepKeys': ['sum_sq_solid.transform'],
            'executionMetadata': {'runId': new_run_id},
            'reexecutionConfig': {
                'previousRunId': run_id,
                'stepOutputHandles': [{'stepKey': 'sum_solid.transform', 'outputName': 'result'}],
            },
        },
    )

    query_result = result_two.data['startPipelineExecution']
    assert query_result['__typename'] == 'StartPipelineExecutionSuccess'
    logs = query_result['run']['logs']['nodes']

    assert isinstance(logs, list)
    assert has_event_of_type(logs, 'PipelineStartEvent')
    assert has_event_of_type(logs, 'PipelineSuccessEvent')
    assert not has_event_of_type(logs, 'PipelineFailureEvent')

    assert not get_step_output_event(logs, 'sum_solid.transform')
    assert get_step_output_event(logs, 'sum_sq_solid.transform')

    snapshot.assert_match(result_two.data)

    assert not has_filesystem_intermediate(
        new_run_id, 'sum_solid.inputs.num.read', 'input_thunk_output'
    )
    assert has_filesystem_intermediate(new_run_id, 'sum_solid.transform')
    assert has_filesystem_intermediate(new_run_id, 'sum_sq_solid.transform')
    assert (
        str(get_filesystem_intermediate(new_run_id, 'sum_sq_solid.transform', DataFrame))
        == expected_value_repr
    )