Example #1
0
def test_multiprocessing_execution_for_composite_solid():
    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}}
            }
        }
    }

    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'composite_pipeline')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.SUCCESS

    environment_dict = {
        'solids': {
            'composite_with_nested_config_solid': {
                'solids': {'node_a': {'config': {'foo': 'baz'}}, 'node_b': {'config': {'bar': 3}}}
            }
        },
        'execution': {'multiprocess': {}},
        'storage': {'filesystem': {}},
    }

    run_id = make_new_run_id()

    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=composite_pipeline.name,
            run_id=run_id,
            selector=ExecutionSelector('nonce'),
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, composite_pipeline, pipeline_run, instance)
    execution_manager.join()
        def python_callable(ts, dag_run, **kwargs):  # pylint: disable=unused-argument
            run_id = dag_run.run_id

            # TODO: https://github.com/dagster-io/dagster/issues/1342
            redacted = construct_variables(mode, 'REDACTED', pipeline_name,
                                           run_id, ts, step_keys)
            logging.info('Executing GraphQL query: {query}\n'.format(
                query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' +
                         seven.json.dumps(redacted, indent=2))
            instance = DagsterInstance.from_ref(
                instance_ref) if instance_ref else None
            if instance:
                instance.get_or_create_run(
                    PipelineRun(
                        pipeline_name=pipeline_name,
                        run_id=run_id,
                        environment_dict=environment_dict,
                        mode=mode,
                        selector=ExecutionSelector(pipeline_name),
                        reexecution_config=None,
                        step_keys_to_execute=None,
                        tags=None,
                        status=PipelineRunStatus.MANAGED,
                    ))

            events = execute_execute_plan_mutation(
                handle,
                construct_variables(mode, environment_dict, pipeline_name,
                                    run_id, ts, step_keys),
                instance_ref=instance_ref,
            )

            check_events_for_skips(events)

            return events
Example #3
0
def test_failing():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'failing_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': file_relative_path(__file__, 'data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=failing_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run,
                                       instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    assert instance.all_logs(run_id)
Example #4
0
def execute_remote_pipeline_run(host,
                                pipeline_name,
                                environment_dict=None,
                                tags=None,
                                solid_subset=None,
                                mode=None):
    check.str_param(host, 'host')
    check.str_param(pipeline_name, 'pipeline_name')
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    check.opt_dict_param(tags, 'tags', key_type=str, value_type=str)
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)

    selector = ExecutionSelector(pipeline_name, solid_subset)
    execution_params = ExecutionParams(
        selector=selector,
        environment_dict=environment_dict,
        mode=mode,
        execution_metadata=ExecutionMetadata(run_id=None, tags=tags or {}),
        step_keys=None,
        previous_run_id=None,
    )

    result = execute_query_against_remote(
        host,
        START_PIPELINE_EXECUTION_MUTATION,
        variables=json.dumps(
            {'executionParams': execution_params.to_graphql_input()}),
    )

    return result
Example #5
0
def pipeline_launch_command(env, preset_name, mode, **kwargs):
    env = list(check.opt_tuple_param(env, 'env', default=(), of_type=str))

    pipeline = create_pipeline_from_cli_args(kwargs)

    instance = DagsterInstance.get()

    if preset_name:
        if env:
            raise click.UsageError('Can not use --preset with --env.')

        if mode:
            raise click.UsageError('Can not use --preset with --mode.')

        preset = pipeline.get_preset(preset_name)
    else:
        preset = None

    run_tags = get_tags_from_args(kwargs)

    run = PipelineRun(
        pipeline_name=pipeline.name,
        run_id=make_new_run_id(),
        selector=ExecutionSelector(pipeline.name,
                                   preset.solid_subset if preset else None),
        environment_dict=preset.environment_dict
        if preset else load_yaml_from_glob_list(env),
        mode=(preset.mode if preset else mode) or 'default',
        status=PipelineRunStatus.NOT_STARTED,
        tags=run_tags,
    )

    return instance.launch_run(run)
Example #6
0
def test_roundtrip_run():
    run = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        selector=ExecutionSelector('pipey_mcpipeface'),
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
        previous_run_id='previousID',
    )
    for field in run:
        # ensure we have a test value to round trip for each field
        assert field

    exec_params = execution_params_from_pipeline_run(run)
    assert run == pipeline_run_from_execution_params(exec_params)

    exec_params_gql = execution_params_from_graphql(
        exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    assert run == pipeline_run_from_execution_params(exec_params_gql)

    empty_run = PipelineRun.create_empty_run('foo', 'bar')
    exec_params = execution_params_from_pipeline_run(empty_run)
    assert empty_run == pipeline_run_from_execution_params(exec_params)

    exec_params_gql = execution_params_from_graphql(
        exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    assert empty_run == pipeline_run_from_execution_params(exec_params_gql)
Example #7
0
def execute_partition_set(partition_set, partition_filter, instance=None):
    check.inst_param(partition_set, 'partition_set', PartitionSetDefinition)
    check.callable_param(partition_filter, 'partition_filter')
    check.inst_param(instance, 'instance', DagsterInstance)

    candidate_partitions = partition_set.get_partitions()
    partitions = partition_filter(candidate_partitions)

    instance = instance or DagsterInstance.ephemeral()

    for partition in partitions:
        run = PipelineRun(
            pipeline_name=partition_set.pipeline_name,
            run_id=make_new_run_id(),
            selector=ExecutionSelector(partition_set.pipeline_name),
            environment_dict=partition_set.environment_dict_for_partition(
                partition),
            mode='default',
            tags=merge_dicts({'dagster/backfill': 'custom'},
                             partition_set.tags_for_partition(partition)),
            status=PipelineRunStatus.NOT_STARTED,
        )

        # Remove once we can handle synchronous execution... currently limited by sqlite
        time.sleep(0.1)

        instance.run_launcher.launch_run(run)
Example #8
0
def test_execution_crash():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(__file__, 'crashy_pipeline')
    environment_dict = {
        'solids': {'sum_solid': {'inputs': {'num': file_relative_path(__file__, 'data/num.csv')}}}
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=crashy_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        )
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run, instance)
    execution_manager.join()
    assert instance.get_run_by_id(run_id).status == PipelineRunStatus.FAILURE
    last_log = instance.all_logs(run_id)[-1]

    assert last_log.message.startswith(
        'Exception: Pipeline execution process for {run_id} unexpectedly exited\n'.format(
            run_id=run_id
        )
    )
Example #9
0
def test_running():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'passing_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': file_relative_path(__file__, 'data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline=passing_pipeline,
        selector=selector,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, passing_pipeline, pipeline_run,
                                       instance)
    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.SUCCESS
    events = instance.all_logs(pipeline_run.run_id)
    assert events

    engine_events = get_events_of_type(events, DagsterEventType.ENGINE_EVENT)
    assert (len([
        ev for ev in engine_events
        if 'SubprocessExecutionManager' in ev.message
    ]) == 3)  # starting, started, exit
Example #10
0
def test_execution_crash():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'crashy_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': file_relative_path(__file__, 'data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline=crashy_pipeline,
        selector=selector,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, crashy_pipeline, pipeline_run,
                                       instance)
    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.FAILURE
    crash_log = instance.all_logs(pipeline_run.run_id)[
        -2]  # last message is pipeline failure, second to last is...

    assert crash_log.message.startswith(
        '[SubprocessExecutionManager] Pipeline execution process for {run_id} unexpectedly exited'
        .format(run_id=pipeline_run.run_id))
Example #11
0
def test_failing():
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'failing_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': file_relative_path(__file__, 'data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run_for_pipeline(
        pipeline=failing_pipeline,
        selector=selector,
        environment_dict=environment_dict,
    )
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle, failing_pipeline, pipeline_run,
                                       instance)
    execution_manager.join()
    assert instance.get_run_by_id(
        pipeline_run.run_id).status == PipelineRunStatus.FAILURE
    assert instance.all_logs(pipeline_run.run_id)
Example #12
0
    def __init__(
        self,
        name,
        cron_schedule,
        pipeline_name,
        environment_dict=None,
        environment_dict_fn=None,
        tags=None,
        tags_fn=None,
        solid_subset=None,
        mode="default",
        should_execute=None,
        environment_vars=None,
    ):
        check.str_param(name, 'name')
        check.str_param(cron_schedule, 'cron_schedule')
        check.str_param(pipeline_name, 'pipeline_name')
        check.opt_dict_param(environment_dict, 'environment_dict')
        check.opt_callable_param(environment_dict_fn, 'environment_dict_fn')
        check.opt_dict_param(tags, 'tags', key_type=str, value_type=str)
        check.opt_callable_param(tags_fn, 'tags_fn')
        check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
        mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)
        check.opt_callable_param(should_execute, 'should_execute')
        check.opt_dict_param(environment_vars, 'environment_vars', key_type=str, value_type=str)

        if environment_dict_fn and environment_dict:
            raise DagsterInvalidDefinitionError(
                'Attempted to provide both environment_dict_fn and environment_dict as arguments'
                ' to ScheduleDefinition. Must provide only one of the two.'
            )

        if tags_fn and tags:
            raise DagsterInvalidDefinitionError(
                'Attempted to provide both tags_fn and tags as arguments'
                ' to ScheduleDefinition. Must provide only one of the two.'
            )

        if not environment_dict and not environment_dict_fn:
            environment_dict_fn = lambda _context: {}

        if not tags and not tags_fn:
            tags_fn = lambda _context: {}

        if not should_execute:
            should_execute = lambda _context: True

        self._schedule_definition_data = ScheduleDefinitionData(
            name=check.str_param(name, 'name'),
            cron_schedule=check.str_param(cron_schedule, 'cron_schedule'),
            environment_vars=check.opt_dict_param(environment_vars, 'environment_vars'),
        )

        self._environment_dict = environment_dict
        self._environment_dict_fn = environment_dict_fn
        self._tags = tags
        self._tags_fn = tags_fn
        self._should_execute = should_execute
        self._mode = mode
        self._selector = ExecutionSelector(pipeline_name, solid_subset)
Example #13
0
def start_scheduled_execution(graphene_info, schedule_name):
    from dagster_graphql.schema.roots import create_execution_metadata

    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.str_param(schedule_name, 'schedule_name')

    schedule = get_dagster_schedule(graphene_info, schedule_name)
    schedule_def = get_dagster_schedule_def(graphene_info, schedule_name)

    schedule_context = ScheduleExecutionContext(graphene_info.context.instance)

    # Run should_execute and halt if it returns False
    if not schedule_def.should_execute(schedule_context):
        return graphene_info.schema.type_named(
            'ScheduledExecutionBlocked'
        )(message=
          'Schedule {schedule_name} did not run because the should_execute did not return'
          ' True'.format(schedule_name=schedule_name))

    # Get environment_dict
    environment_dict = schedule_def.get_environment_dict(schedule_context)
    tags = schedule_def.get_tags(schedule_context)

    check.invariant('dagster/schedule_id' not in tags)
    tags['dagster/schedule_id'] = schedule.schedule_id

    check.invariant('dagster/schedule_name' not in tags)
    tags['dagster/schedule_name'] = schedule_def.name

    execution_metadata_tags = [{
        'key': key,
        'value': value
    } for key, value in tags.items()]
    execution_params = merge_dicts(
        schedule_def.execution_params,
        {'executionMetadata': {
            'tags': execution_metadata_tags
        }})

    selector = ExecutionSelector(
        execution_params['selector']['name'],
        execution_params['selector'].get('solidSubset'))

    execution_params = ExecutionParams(
        selector=selector,
        environment_dict=environment_dict,
        mode=execution_params.get('mode'),
        execution_metadata=create_execution_metadata(
            execution_params.get('executionMetadata')),
        step_keys=execution_params.get('stepKeys'),
        previous_run_id=None,
    )

    # Launch run if run launcher is defined
    run_launcher = graphene_info.context.instance.run_launcher
    if run_launcher:
        return launch_pipeline_execution(graphene_info, execution_params)

    return start_pipeline_execution(graphene_info, execution_params)
Example #14
0
def start_scheduled_execution(graphene_info, schedule_name):
    from dagster_graphql.schema.roots import create_execution_metadata

    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.str_param(schedule_name, 'schedule_name')

    schedule = get_dagster_schedule(graphene_info, schedule_name)
    schedule_def = get_dagster_schedule_def(graphene_info, schedule_name)

    # Run should_execute and halt if it returns False
    should_execute = schedule_def.should_execute
    if should_execute() != True:
        return graphene_info.schema.type_named('ScheduledExecutionBlocked')(
            message='Schedule {schedule_name} did not run because the should_execute did not return '
            'True'
        )

    # Add dagster/schedule_id tag to executionMetadata
    execution_params = merge_dicts(
        {'executionMetadata': {'tags': []}}, schedule_def.execution_params
    )

    # Check that the dagster/schedule_id tag is not already set
    check.invariant(
        not any(
            tag['key'] == 'dagster/schedule_id'
            for tag in execution_params['executionMetadata']['tags']
        ),
        "Tag dagster/schedule_id tag is already defined in executionMetadata.tags",
    )

    # Check that the dagster/schedule_name tag is not already set
    check.invariant(
        not any(
            tag['key'] == 'dagster/schedule_name'
            for tag in execution_params['executionMetadata']['tags']
        ),
        "Tag dagster/schedule_name tag is already defined in executionMetadata.tags",
    )

    execution_params['executionMetadata']['tags'].append(
        {'key': 'dagster/schedule_id', 'value': schedule.schedule_id}
    )

    execution_params['executionMetadata']['tags'].append(
        {'key': 'dagster/schedule_name', 'value': schedule.name}
    )

    selector = execution_params['selector']
    execution_params = ExecutionParams(
        selector=ExecutionSelector(selector['name'], selector.get('solidSubset')),
        environment_dict=execution_params.get('environmentConfigData'),
        mode=execution_params.get('mode'),
        execution_metadata=create_execution_metadata(execution_params.get('executionMetadata')),
        step_keys=execution_params.get('stepKeys'),
        previous_run_id=None,
    )

    return start_pipeline_execution(graphene_info, execution_params)
Example #15
0
    def __new__(
        cls,
        pipeline_name,
        run_id,
        environment_dict,
        mode,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        pipeline_snapshot_id=None,
        ## GRAVEYARD BELOW
        # see https://github.com/dagster-io/dagster/issues/2372 for explanation
        previous_run_id=None,
    ):
        from dagster.core.definitions.pipeline import ExecutionSelector

        root_run_id = check.opt_str_param(root_run_id, 'root_run_id')
        parent_run_id = check.opt_str_param(parent_run_id, 'parent_run_id')

        check.invariant(
            (root_run_id is not None and parent_run_id is not None)
            or (root_run_id is None and parent_run_id is None),
            ('Must set both root_run_id and parent_run_id when creating a PipelineRun that '
             'belongs to a run group'),
        )

        # Historical runs may have previous_run_id set, in which case
        # that previous ID becomes both the root and the parent
        if previous_run_id:
            if not (parent_run_id and root_run_id):
                parent_run_id = previous_run_id
                root_run_id = previous_run_id

        return super(PipelineRun, cls).__new__(
            cls,
            pipeline_name=check.str_param(pipeline_name, 'pipeline_name'),
            run_id=check.str_param(run_id, 'run_id'),
            environment_dict=check.opt_dict_param(environment_dict,
                                                  'environment_dict',
                                                  key_type=str),
            mode=check.str_param(mode, 'mode'),
            selector=check.opt_inst_param(selector, 'selector',
                                          ExecutionSelector,
                                          ExecutionSelector(pipeline_name)),
            step_keys_to_execute=None
            if step_keys_to_execute is None else check.list_param(
                step_keys_to_execute, 'step_keys_to_execute', of_type=str),
            status=check.opt_inst_param(status, 'status', PipelineRunStatus,
                                        PipelineRunStatus.NOT_STARTED),
            tags=check.opt_dict_param(tags, 'tags', key_type=str),
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot_id=check.opt_str_param(pipeline_snapshot_id,
                                                     'pipeline_snapshot_id'),
        )
Example #16
0
def get_runtime_type(graphene_info, pipeline_name, type_name):
    pipeline = get_dagster_pipeline_from_selector(
        graphene_info, ExecutionSelector(pipeline_name))

    if not pipeline.has_runtime_type(type_name):
        raise UserFacingGraphQLError(
            graphene_info.schema.type_named('RuntimeTypeNotFoundError')(
                pipeline=pipeline, runtime_type_name=type_name))

    return to_dauphin_runtime_type(pipeline.runtime_type_named(type_name))
Example #17
0
def execution_params_from_graphql(graphql_execution_params):
    return ExecutionParams(
        selector=ExecutionSelector.from_dict(
            graphql_execution_params.get('selector')),
        environment_dict=graphql_execution_params.get('environmentConfigData')
        or {},
        mode=graphql_execution_params.get('mode'),
        execution_metadata=create_execution_metadata(
            graphql_execution_params.get('executionMetadata')),
        step_keys=graphql_execution_params.get('stepKeys'),
    )
Example #18
0
    def create_empty_run(pipeline_name, run_id, environment_dict=None, tags=None):
        from dagster.core.definitions.pipeline import ExecutionSelector

        return PipelineRun(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode='default',
            selector=ExecutionSelector(pipeline_name),
            step_keys_to_execute=None,
            tags=tags,
            status=PipelineRunStatus.NOT_STARTED,
        )
Example #19
0
    def create_run_for_pipeline(
        self,
        pipeline,
        execution_plan=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap.execution_plan_snapshot import snapshot_from_execution_plan

        check.inst_param(pipeline, 'pipeline', PipelineDefinition)
        check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        if execution_plan is None:
            execution_plan = create_execution_plan(
                pipeline,
                environment_dict=environment_dict,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
            )

        return self.get_or_create_run(
            pipeline_name=pipeline.name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=check.opt_str_param(mode,
                                     'mode',
                                     default=pipeline.get_default_mode_name()),
            selector=check.opt_inst_param(
                selector,
                'selector',
                ExecutionSelector,
                default=ExecutionSelector(name=pipeline.name),
            ),
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline.get_pipeline_snapshot_id()),
        )
Example #20
0
def get_dagster_type(graphene_info, pipeline_name, type_name):
    dauphin_pipeline = get_dauphin_pipeline_from_selector(
        graphene_info, ExecutionSelector(pipeline_name))
    pipeline_index = dauphin_pipeline.get_pipeline_index()

    if not pipeline_index.has_dagster_type_name(type_name):
        raise UserFacingGraphQLError(
            graphene_info.schema.type_named('RuntimeTypeNotFoundError')(
                pipeline=dauphin_pipeline, runtime_type_name=type_name))

    return to_dauphin_dagster_type(
        pipeline_index.pipeline_snapshot,
        pipeline_index.get_dagster_type_from_name(type_name).key,
    )
Example #21
0
def build_run(
    run_id, pipeline_name, mode='default', tags=None, status=PipelineRunStatus.NOT_STARTED
):
    return PipelineRun(
        pipeline_name=pipeline_name,
        run_id=run_id,
        environment_dict=None,
        mode=mode,
        selector=ExecutionSelector(pipeline_name),
        reexecution_config=None,
        step_keys_to_execute=None,
        tags=tags,
        status=status,
    )
def test_running():
    run_id = make_new_run_id()
    handle = ExecutionTargetHandle.for_pipeline_python_file(
        __file__, 'passing_pipeline')
    environment_dict = {
        'solids': {
            'sum_solid': {
                'inputs': {
                    'num': script_relative_path('data/num.csv')
                }
            }
        }
    }
    selector = ExecutionSelector('csv_hello_world')

    instance = DagsterInstance.local_temp()
    pipeline_run = instance.create_run(
        PipelineRun(
            pipeline_name=passing_pipeline.name,
            run_id=run_id,
            selector=selector,
            environment_dict=environment_dict,
            mode='default',
            reexecution_config=None,
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.NOT_STARTED,
        ))
    execution_manager = SubprocessExecutionManager(instance)
    execution_manager.execute_pipeline(handle,
                                       passing_pipeline,
                                       pipeline_run,
                                       instance,
                                       raise_on_error=False)
    execution_manager.join()
    assert instance.get_run(run_id).status == PipelineRunStatus.SUCCESS
    events = instance.all_logs(run_id)
    assert events

    process_start_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_START)
    assert len(process_start_events) == 1

    process_started_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_STARTED)
    assert len(process_started_events) == 1

    process_exited_events = get_events_of_type(
        events, DagsterEventType.PIPELINE_PROCESS_EXITED)
    assert len(process_exited_events) == 1
Example #23
0
def create_execution_params(graphene_info, graphql_execution_params):

    preset_name = graphql_execution_params.get('preset')
    if preset_name:
        check.invariant(
            not graphql_execution_params.get('environmentConfigData'),
            'Invalid ExecutionParams. Cannot define environment_dict when using preset',
        )
        check.invariant(
            not graphql_execution_params.get('mode'),
            'Invalid ExecutionParams. Cannot define mode when using preset',
        )

        selector = graphql_execution_params['selector'].to_selector()
        check.invariant(
            not selector.solid_subset,
            'Invalid ExecutionParams. Cannot define selector.solid_subset when using preset',
        )
        dauphin_pipeline = get_dauphin_pipeline_reference_from_selector(graphene_info, selector)
        pipeline = dauphin_pipeline.get_dagster_pipeline()

        if not pipeline.has_preset(preset_name):
            raise UserFacingGraphQLError(
                graphene_info.schema.type_named('PresetNotFoundError')(
                    preset=preset_name, selector=selector
                )
            )

        preset = pipeline.get_preset(preset_name)

        return ExecutionParams(
            selector=ExecutionSelector(selector.name, preset.solid_subset),
            environment_dict=preset.environment_dict,
            mode=preset.mode,
            execution_metadata=ExecutionMetadata(run_id=None, tags={}),
            step_keys=graphql_execution_params.get('stepKeys'),
            previous_run_id=graphql_execution_params.get('retryRunId'),
        )

    return ExecutionParams(
        selector=graphql_execution_params['selector'].to_selector(),
        environment_dict=graphql_execution_params.get('environmentConfigData'),
        mode=graphql_execution_params.get('mode'),
        execution_metadata=create_execution_metadata(
            graphql_execution_params.get('executionMetadata')
        ),
        step_keys=graphql_execution_params.get('stepKeys'),
        previous_run_id=graphql_execution_params.get('retryRunId'),
    )
Example #24
0
def get_config_type(graphene_info, pipeline_name, config_type_name, mode):
    check.str_param(pipeline_name, 'pipeline_name')
    check.str_param(config_type_name, 'config_type_name')
    check.opt_str_param(mode, 'mode')

    pipeline = get_dagster_pipeline_from_selector(
        graphene_info, ExecutionSelector(pipeline_name))
    environment_schema = create_environment_schema(pipeline, mode)
    if not environment_schema.has_config_type(config_type_name):
        raise UserFacingGraphQLError(
            graphene_info.schema.type_named('ConfigTypeNotFoundError')(
                pipeline=pipeline, config_type_name=config_type_name))

    return to_dauphin_config_type(
        environment_schema.config_type_named(config_type_name))
Example #25
0
def _get_partition_sets(graphene_info, pipeline_name):
    partition_sets = graphene_info.context.get_all_partition_sets()

    if pipeline_name:
        pipeline_def = get_pipeline_def_from_selector(
            graphene_info, ExecutionSelector(pipeline_name))
        matching_partition_sets = filter(
            lambda partition_set: partition_set.pipeline_name == pipeline_def.
            name, partition_sets)
    else:
        matching_partition_sets = partition_sets

    return [
        graphene_info.schema.type_named('PartitionSet')(partition_set)
        for partition_set in matching_partition_sets
    ]
Example #26
0
def test_roundtrip_run():
    run_with_snapshot = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        selector=ExecutionSelector('pipey_mcpipeface'),
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
        root_run_id='previousID',
        parent_run_id='previousID',
        pipeline_snapshot_id='pipey_mcpipeface_snapshot_id',
        execution_plan_snapshot_id='mcexecutionplanface_snapshot_id',
    )
    for field in run_with_snapshot:
        # ensure we have a test value to round trip for each field
        assert field

    # The invariant that all the execution parameter structs
    # pipeline run can be constructed from each other is no longer
    # true. Clients of the GraphQL API cannot know the value of the
    # pipeline_snapshot_id prior to execution, because it is
    # constructed on the server. Hence these roundtrip tests
    # do not include snapshot_id

    run = run_with_snapshot._replace(pipeline_snapshot_id=None, execution_plan_snapshot_id=None)

    exec_params = execution_params_from_pipeline_run(run)
    for key, value in pipeline_run_args_from_execution_params(exec_params).items():
        assert getattr(run, key) == value

    exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items():
        assert getattr(run, key) == value

    empty_run = PipelineRun(pipeline_name='foo', run_id='bar', mode='default')
    exec_params = execution_params_from_pipeline_run(empty_run)
    for key, value in pipeline_run_args_from_execution_params(exec_params).items():
        assert getattr(empty_run, key) == value

    exec_params_gql = execution_params_from_graphql(exec_params.to_graphql_input())
    assert exec_params_gql == exec_params
    for key, value in pipeline_run_args_from_execution_params(exec_params_gql).items():
        assert getattr(empty_run, key) == value
Example #27
0
def test_roundtrip_run():
    run = PipelineRun(
        pipeline_name='pipey_mcpipeface',
        run_id='8675309',
        environment_dict={'good': True},
        mode='default',
        selector=ExecutionSelector('pipey_mcpipeface'),
        step_keys_to_execute=['step_1', 'step_2', 'step_3'],
        tags={'tag_it': 'bag_it'},
        status=PipelineRunStatus.NOT_STARTED,
    )
    assert run == pipeline_run_from_execution_params(
        execution_params_from_pipeline_run(run))

    empty_run = PipelineRun.create_empty_run('foo', 'bar')
    assert empty_run == pipeline_run_from_execution_params(
        execution_params_from_pipeline_run(empty_run))
Example #28
0
    def __new__(
        cls,
        pipeline_name,
        run_id,
        environment_dict,
        mode,
        selector=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        previous_run_id=None,
        pipeline_snapshot_id=None,
    ):
        from dagster.core.definitions.pipeline import ExecutionSelector

        tags = check.opt_dict_param(tags, 'tags', key_type=str)
        selector = check.opt_inst_param(selector, 'selector', ExecutionSelector)
        if not selector:
            selector = ExecutionSelector(pipeline_name)

        if not status:
            status = PipelineRunStatus.NOT_STARTED

        return super(PipelineRun, cls).__new__(
            cls,
            pipeline_name=check.str_param(pipeline_name, 'pipeline_name'),
            run_id=check.str_param(run_id, 'run_id'),
            environment_dict=check.opt_dict_param(
                environment_dict, 'environment_dict', key_type=str
            ),
            mode=check.str_param(mode, 'mode'),
            selector=selector,
            step_keys_to_execute=None
            if step_keys_to_execute is None
            else check.list_param(step_keys_to_execute, 'step_keys_to_execute', of_type=str),
            status=status,
            tags=check.opt_dict_param(tags, 'tags', key_type=str),
            root_run_id=check.opt_str_param(root_run_id, 'root_run_id'),
            parent_run_id=check.opt_str_param(parent_run_id, 'parent_run_id'),
            previous_run_id=check.opt_str_param(previous_run_id, 'previous_run_id'),
            pipeline_snapshot_id=check.opt_str_param(pipeline_snapshot_id, 'pipeline_snapshot_id'),
        )
Example #29
0
def invoke_steps_within_python_operator(invocation_args, ts, dag_run,
                                        **kwargs):  # pylint: disable=unused-argument
    mode = invocation_args.mode
    pipeline_name = invocation_args.pipeline_name
    step_keys = invocation_args.step_keys
    instance_ref = invocation_args.instance_ref
    environment_dict = invocation_args.environment_dict
    handle = invocation_args.handle
    pipeline_snapshot = invocation_args.pipeline_snapshot
    execution_plan_snapshot = invocation_args.execution_plan_snapshot

    run_id = dag_run.run_id

    variables = construct_variables(mode, environment_dict, pipeline_name,
                                    run_id, step_keys)
    variables = add_airflow_tags(variables, ts)

    logging.info('Executing GraphQL query: {query}\n'.format(
        query=EXECUTE_PLAN_MUTATION) + 'with variables:\n' +
                 seven.json.dumps(variables, indent=2))
    instance = DagsterInstance.from_ref(instance_ref) if instance_ref else None
    if instance:
        instance.get_or_create_run(
            pipeline_name=pipeline_name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=mode,
            selector=ExecutionSelector(pipeline_name),
            step_keys_to_execute=None,
            tags=None,
            status=PipelineRunStatus.MANAGED,
            pipeline_snapshot=pipeline_snapshot,
            execution_plan_snapshot=execution_plan_snapshot,
        )

    events = execute_execute_plan_mutation(
        handle,
        variables,
        instance_ref=instance_ref,
    )
    check_events_for_failures(events)
    check_events_for_skips(events)
    return events
Example #30
0
def create_execution_params(graphene_info, graphql_execution_params):

    preset_name = graphql_execution_params.get('preset')
    if preset_name:
        check.invariant(
            not graphql_execution_params.get('environmentConfigData'),
            'Invalid ExecutionParams. Cannot define environment_dict when using preset',
        )
        check.invariant(
            not graphql_execution_params.get('mode'),
            'Invalid ExecutionParams. Cannot define mode when using preset',
        )

        selector = graphql_execution_params['selector'].to_selector()
        check.invariant(
            not selector.solid_subset,
            'Invalid ExecutionParams. Cannot define selector.solid_subset when using preset',
        )

        external_pipeline = get_external_pipeline_or_raise(
            graphene_info, selector.name)

        if not external_pipeline.has_preset(preset_name):
            raise UserFacingGraphQLError(
                graphene_info.schema.type_named('PresetNotFoundError')(
                    preset=preset_name, selector=selector))

        preset = external_pipeline.get_preset(preset_name)

        return ExecutionParams(
            selector=ExecutionSelector(selector.name, preset.solid_subset),
            environment_dict=preset.environment_dict,
            mode=preset.mode,
            execution_metadata=create_execution_metadata(
                graphql_execution_params.get('executionMetadata')),
            step_keys=graphql_execution_params.get('stepKeys'),
        )

    return execution_params_from_graphql(graphql_execution_params)