Beispiel #1
0
def create_execution_metadata(graphql_execution_metadata):
    return (ExecutionMetadata(
        graphql_execution_metadata.get('runId'),
        {
            t['key']: t['value']
            for t in graphql_execution_metadata.get('tags', [])
        },
    ) if graphql_execution_metadata else ExecutionMetadata(run_id=None,
                                                           tags={}))
Beispiel #2
0
def create_execution_metadata(graphql_execution_metadata):
    return (ExecutionMetadata(
        run_id=graphql_execution_metadata.get("runId"),
        tags={
            t["key"]: t["value"]
            for t in graphql_execution_metadata.get("tags", [])
        },
        root_run_id=graphql_execution_metadata.get("rootRunId"),
        parent_run_id=graphql_execution_metadata.get("parentRunId"),
    ) if graphql_execution_metadata else ExecutionMetadata(run_id=None,
                                                           tags={}))
Beispiel #3
0
def execute_remote_pipeline_run(host,
                                pipeline_name,
                                environment_dict=None,
                                tags=None,
                                solid_subset=None,
                                mode=None):
    check.str_param(host, 'host')
    check.str_param(pipeline_name, 'pipeline_name')
    environment_dict = check.opt_dict_param(environment_dict,
                                            'environment_dict',
                                            key_type=str)
    check.opt_dict_param(tags, 'tags', key_type=str, value_type=str)
    check.opt_nullable_list_param(solid_subset, 'solid_subset', of_type=str)
    mode = check.opt_str_param(mode, 'mode', DEFAULT_MODE_NAME)

    selector = ExecutionSelector(pipeline_name, solid_subset)
    execution_params = ExecutionParams(
        selector=selector,
        environment_dict=environment_dict,
        mode=mode,
        execution_metadata=ExecutionMetadata(run_id=None, tags=tags or {}),
        step_keys=None,
        previous_run_id=None,
    )

    result = execute_query_against_remote(
        host,
        START_PIPELINE_EXECUTION_MUTATION,
        variables=json.dumps(
            {'executionParams': execution_params.to_graphql_input()}),
    )

    return result
Beispiel #4
0
def execution_params_from_pipeline_run(run):
    check.inst_param(run, 'run', PipelineRun)

    return ExecutionParams(
        mode=run.mode,
        step_keys=run.step_keys_to_execute,
        environment_dict=run.environment_dict,
        selector=run.selector,
        execution_metadata=ExecutionMetadata(run_id=run.run_id, tags=run.tags),
    )
Beispiel #5
0
def create_execution_params(graphene_info, graphql_execution_params):

    preset_name = graphql_execution_params.get('preset')
    if preset_name:
        check.invariant(
            not graphql_execution_params.get('environmentConfigData'),
            'Invalid ExecutionParams. Cannot define environment_dict when using preset',
        )
        check.invariant(
            not graphql_execution_params.get('mode'),
            'Invalid ExecutionParams. Cannot define mode when using preset',
        )

        selector = graphql_execution_params['selector'].to_selector()
        check.invariant(
            not selector.solid_subset,
            'Invalid ExecutionParams. Cannot define selector.solid_subset when using preset',
        )
        dauphin_pipeline = get_dauphin_pipeline_reference_from_selector(graphene_info, selector)
        pipeline = dauphin_pipeline.get_dagster_pipeline()

        if not pipeline.has_preset(preset_name):
            raise UserFacingGraphQLError(
                graphene_info.schema.type_named('PresetNotFoundError')(
                    preset=preset_name, selector=selector
                )
            )

        preset = pipeline.get_preset(preset_name)

        return ExecutionParams(
            selector=ExecutionSelector(selector.name, preset.solid_subset),
            environment_dict=preset.environment_dict,
            mode=preset.mode,
            execution_metadata=ExecutionMetadata(run_id=None, tags={}),
            step_keys=graphql_execution_params.get('stepKeys'),
            previous_run_id=graphql_execution_params.get('retryRunId'),
        )

    return ExecutionParams(
        selector=graphql_execution_params['selector'].to_selector(),
        environment_dict=graphql_execution_params.get('environmentConfigData'),
        mode=graphql_execution_params.get('mode'),
        execution_metadata=create_execution_metadata(
            graphql_execution_params.get('executionMetadata')
        ),
        step_keys=graphql_execution_params.get('stepKeys'),
        previous_run_id=graphql_execution_params.get('retryRunId'),
    )
Beispiel #6
0
def execution_params_from_pipeline_run(context, run):
    check.inst_param(run, 'run', PipelineRun)

    return ExecutionParams(
        mode=run.mode,
        step_keys=run.step_keys_to_execute,
        environment_dict=run.environment_dict,
        selector=PipelineSelector.legacy(context, run.pipeline_name,
                                         run.solid_subset),
        execution_metadata=ExecutionMetadata(
            run_id=run.run_id,
            tags=run.tags,
            root_run_id=run.root_run_id,
            parent_run_id=run.parent_run_id,
        ),
    )
def start_scheduled_execution(graphene_info, schedule_name):
    '''
    When a scheduler ticks and needs to run for a given schedule, it issues a
    START_SCHEDULED_EXECUTION mutation with just the schedule name. The mutation is
    resolved entirely by this method.
    '''

    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.str_param(schedule_name, 'schedule_name')

    tick = None
    try:
        # We first load the repository and schedule definition to create
        # and store a ScheduleTick.
        # If this fails, this error should be sent to the file based scheduler logs.
        external_repository = graphene_info.context.get_external_repository()
        repository_name = external_repository.name
        schedule_def = get_dagster_schedule_def(graphene_info, schedule_name)
        cron_schedule = "Unknown" if not schedule_def else schedule_def.cron_schedule
        tick = graphene_info.context.instance.create_schedule_tick(
            repository_name,
            ScheduleTickData(
                schedule_name=schedule_name,
                cron_schedule=cron_schedule,
                timestamp=time.time(),
                status=ScheduleTickStatus.STARTED,
            ),
        )

        # Run should_execute and halt if it returns False
        schedule_context = ScheduleExecutionContext(
            graphene_info.context.instance)
        with user_code_error_boundary(
                ScheduleExecutionError,
                lambda:
                'Error occurred during the execution should_execute for schedule '
                '{schedule_name}'.format(schedule_name=schedule_def.name),
        ):
            should_execute = schedule_def.should_execute(schedule_context)

        if not should_execute:
            # Update tick to skipped state and return
            tick = tick.with_status(ScheduleTickStatus.SKIPPED)
            graphene_info.context.instance.update_schedule_tick(
                repository_name, tick)
            # Return skipped specific gql response
            return graphene_info.schema.type_named(
                'ScheduledExecutionBlocked'
            )(message=
              'Schedule {schedule_name} did not run because the should_execute did not return'
              ' True'.format(schedule_name=schedule_name))

        errors = []

        environment_dict = {}
        schedule_tags = {}
        try:
            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    'Error occurred during the execution of environment_dict_fn for schedule '
                    '{schedule_name}'.format(schedule_name=schedule_def.name),
            ):
                environment_dict = schedule_def.get_environment_dict(
                    schedule_context)
        except DagsterUserCodeExecutionError as exc:
            error_data = serializable_error_info_from_exc_info(sys.exc_info())
            errors.append(error_data)

        try:
            with user_code_error_boundary(
                    ScheduleExecutionError,
                    lambda:
                    'Error occurred during the execution of tags_fn for schedule '
                    '{schedule_name}'.format(schedule_name=schedule_def.name),
            ):
                schedule_tags = schedule_def.get_tags(schedule_context)
        except DagsterUserCodeExecutionError:
            error_data = serializable_error_info_from_exc_info(sys.exc_info())
            errors.append(error_data)

        external_pipeline = get_external_pipeline_or_raise(
            graphene_info, schedule_def.selector.name,
            schedule_def.selector.solid_subset)
        pipeline_tags = external_pipeline.tags or {}
        check_tags(pipeline_tags, 'pipeline_tags')
        tags = merge_dicts(pipeline_tags, schedule_tags)

        selector = schedule_def.selector
        mode = schedule_def.mode

        execution_params = ExecutionParams(
            selector=selector,
            environment_dict=environment_dict,
            mode=mode,
            execution_metadata=ExecutionMetadata(tags=tags, run_id=None),
            step_keys=None,
        )

        run, result = _execute_schedule(graphene_info, external_pipeline,
                                        execution_params, errors)
        graphene_info.context.instance.update_schedule_tick(
            repository_name,
            tick.with_status(ScheduleTickStatus.SUCCESS, run_id=run.run_id),
        )

        return result

    except Exception as exc:  # pylint: disable=broad-except
        error_data = serializable_error_info_from_exc_info(sys.exc_info())

        if tick:
            graphene_info.context.instance.update_schedule_tick(
                repository_name,
                tick.with_status(ScheduleTickStatus.FAILURE, error=error_data),
            )

        raise exc