예제 #1
0
def get_subset_external_pipeline(context, selector):
    from ..schema.pipelines.pipeline_errors import GrapheneInvalidSubsetError
    from ..schema.pipelines.pipeline import GraphenePipeline

    check.inst_param(selector, "selector", PipelineSelector)

    repository_location = context.get_repository_location(
        selector.location_name)
    external_repository = repository_location.get_repository(
        selector.repository_name)

    try:
        subset_result = repository_location.get_subset_external_pipeline_result(
            selector)
    except Exception:  # pylint: disable=broad-except
        error_info = serializable_error_info_from_exc_info(sys.exc_info())
        raise UserFacingGraphQLError(
            GrapheneInvalidSubsetError(
                message="{message}{cause_message}".format(
                    message=error_info.message,
                    cause_message="\n{}".format(error_info.cause.message)
                    if error_info.cause else "",
                ),
                pipeline=GraphenePipeline(
                    context.get_full_external_pipeline(selector)),
            ))

    return ExternalPipeline(
        subset_result.external_pipeline_data,
        repository_handle=external_repository.handle,
    )
예제 #2
0
    def get_external_pipeline(self):
        if self._cached_external_pipeline is None:
            from dagster.core.host_representation import ExternalPipeline

            self._cached_external_pipeline = ExternalPipeline.from_pipeline_def(
                self)
        return self._cached_external_pipeline
예제 #3
0
def external_pipeline_from_location_handle(repository_location_handle,
                                           external_pipeline_origin,
                                           solid_selection):
    check.inst_param(repository_location_handle, "repository_location_handle",
                     RepositoryLocationHandle)
    check.inst_param(external_pipeline_origin, "external_pipeline_origin",
                     ExternalPipelineOrigin)

    repo_location = repository_location_handle.create_location()
    repo_name = external_pipeline_origin.external_repository_origin.repository_name
    pipeline_name = external_pipeline_origin.pipeline_name

    check.invariant(
        repo_location.has_repository(repo_name),
        "Could not find repository {repo_name} in location {repo_location_name}"
        .format(repo_name=repo_name, repo_location_name=repo_location.name),
    )
    external_repo = repo_location.get_repository(repo_name)

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )
    return external_pipeline
예제 #4
0
def external_pipeline_from_location_handle(repository_location_handle,
                                           pipeline_name, solid_selection):
    check.inst_param(repository_location_handle, "repository_location_handle",
                     RepositoryLocationHandle)

    repo_location = RepositoryLocation.from_handle(repository_location_handle)
    repo_dict = repo_location.get_repositories()
    check.invariant(
        len(repo_dict) == 1,
        "Reconstructed repository location should have exactly one repository",
    )
    external_repo = next(iter(repo_dict.values()))

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )
    return external_pipeline
예제 #5
0
def external_pipeline_from_run(pipeline_run):
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    external_pipeline_origin = check.inst(
        pipeline_run.external_pipeline_origin, ExternalPipelineOrigin)

    with RepositoryLocationHandle.create_from_repository_location_origin(
            external_pipeline_origin.external_repository_origin.
            repository_location_origin) as repo_location_handle:
        repo_location = RepositoryLocation.from_handle(repo_location_handle)

        repo_dict = repo_location.get_repositories()
        check.invariant(
            len(repo_dict) == 1,
            "Reconstructed repository location should have exactly one repository",
        )
        external_repo = next(iter(repo_dict.values()))

        pipeline_selector = PipelineSelector(
            location_name=repo_location.name,
            repository_name=external_repo.name,
            pipeline_name=pipeline_run.pipeline_name,
            solid_selection=pipeline_run.solid_selection,
        )

        subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
            pipeline_selector)
        external_pipeline = ExternalPipeline(
            subset_pipeline_result.external_pipeline_data,
            external_repo.handle,
        )
        yield external_pipeline
예제 #6
0
    def get_external_pipeline(self, name, solid_subset):
        check.str_param(name, 'name')
        check.list_param(solid_subset, 'solid_subset', of_type=str)

        return ExternalPipeline.from_pipeline_def(
            self.get_reconstructable_pipeline(name).get_definition(),
            solid_subset=solid_subset,
        )
예제 #7
0
 def get_external_pipeline(self, handle, solid_subset):
     check.inst_param(handle, 'handle', PipelineHandle)
     check.invariant(
         handle.environment_name == self.name,
         'Received invalid handle, environment name mismatch',
     )
     return ExternalPipeline.from_pipeline_def(
         self.get_reconstructable_pipeline(
             handle.pipeline_name).get_definition(), solid_subset)
예제 #8
0
def external_pipeline_from_recon_pipeline(recon_pipeline, solid_selection, repository_handle):
    if solid_selection:
        sub_pipeline = recon_pipeline.subset_for_execution(solid_selection)
        pipeline_def = sub_pipeline.get_definition()
    else:
        pipeline_def = recon_pipeline.get_definition()

    return ExternalPipeline(
        external_pipeline_data_from_def(pipeline_def), repository_handle=repository_handle,
    )
예제 #9
0
def external_pipeline_from_recon_pipeline(recon_pipeline, solid_subset,
                                          repository_handle):
    full_pipeline_def = recon_pipeline.get_definition()

    pipeline_def = (full_pipeline_def.subset_for_execution(solid_subset)
                    if solid_subset else full_pipeline_def)

    return ExternalPipeline(
        external_pipeline_data_from_def(pipeline_def),
        repository_handle=repository_handle,
    )
예제 #10
0
def _evaluate_sensor(
    context,
    instance,
    repo_location,
    external_repo,
    external_sensor,
    job_state,
    sensor_debug_crash_flags=None,
):
    context.logger.info(
        f"Checking for new runs for sensor: {external_sensor.name}")
    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
        instance,
        external_repo.handle,
        external_sensor.name,
        job_state.job_specific_data.last_tick_timestamp
        if job_state.job_specific_data else None,
        job_state.job_specific_data.last_run_key
        if job_state.job_specific_data else None,
    )
    if isinstance(sensor_runtime_data, ExternalSensorExecutionErrorData):
        context.logger.error(
            f"Failed to resolve sensor for {external_sensor.name} : {sensor_runtime_data.error.to_string()}"
        )
        context.update_state(JobTickStatus.FAILURE,
                             error=sensor_runtime_data.error)
        yield
        return

    assert isinstance(sensor_runtime_data, ExternalSensorExecutionData)
    if not sensor_runtime_data.run_requests:
        if sensor_runtime_data.skip_message:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping: "
                f"{sensor_runtime_data.skip_message}")
            context.update_state(JobTickStatus.SKIPPED,
                                 skip_reason=sensor_runtime_data.skip_message)
        else:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping")
            context.update_state(JobTickStatus.SKIPPED)
        yield
        return

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_sensor.pipeline_name,
        solid_selection=external_sensor.solid_selection,
    )
    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    skipped_runs = []
    for run_request in sensor_runtime_data.run_requests:
        run = _get_or_create_sensor_run(context, instance, repo_location,
                                        external_sensor, external_pipeline,
                                        run_request)

        if isinstance(run, SkippedSensorRun):
            skipped_runs.append(run)
            yield
            continue

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED")

        try:
            context.logger.info("Launching run for {sensor_name}".format(
                sensor_name=external_sensor.name))
            instance.submit_run(run.run_id, external_pipeline)
            context.logger.info(
                "Completed launch of run {run_id} for {sensor_name}".format(
                    run_id=run.run_id, sensor_name=external_sensor.name))
        except Exception:  # pylint: disable=broad-except
            context.logger.error(
                f"Run {run.run_id} created successfully but failed to launch: "
                f"{str(serializable_error_info_from_exc_info(sys.exc_info()))}"
            )
        yield

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED")

        context.add_run(run_id=run.run_id, run_key=run_request.run_key)

    if skipped_runs:
        run_keys = [skipped.run_key for skipped in skipped_runs]
        skipped_count = len(skipped_runs)
        context.logger.info(
            f"Skipping {skipped_count} {'run' if skipped_count == 1 else 'runs'} for sensor "
            f"{external_sensor.name} already completed with run keys: {seven.json.dumps(run_keys)}"
        )

    if context.run_count:
        context.update_state(JobTickStatus.SUCCESS)
    else:
        context.update_state(JobTickStatus.SKIPPED)

    yield
예제 #11
0
def _evaluate_sensor(
    context,
    instance,
    repo_location,
    external_repo,
    external_sensor,
    job_state,
    sensor_debug_crash_flags=None,
):
    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
        instance,
        external_repo.handle,
        external_sensor.name,
        job_state.job_specific_data.last_tick_timestamp
        if job_state.job_specific_data else None,
        job_state.job_specific_data.last_run_key
        if job_state.job_specific_data else None,
    )
    if isinstance(sensor_runtime_data, ExternalSensorExecutionErrorData):
        context.logger.error(
            "Failed to resolve sensor for {sensor_name} : {error_info}".format(
                sensor_name=external_sensor.name,
                error_info=sensor_runtime_data.error.to_string(),
            ))
        context.update_state(JobTickStatus.FAILURE,
                             error=sensor_runtime_data.error)
        return

    assert isinstance(sensor_runtime_data, ExternalSensorExecutionData)
    if not sensor_runtime_data.run_requests:
        if sensor_runtime_data.skip_message:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping: "
                f"{sensor_runtime_data.skip_message}")
            context.update_state(JobTickStatus.SKIPPED,
                                 skip_reason=sensor_runtime_data.skip_message)
        else:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping")
            context.update_state(JobTickStatus.SKIPPED)
        return

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_sensor.pipeline_name,
        solid_selection=external_sensor.solid_selection,
    )
    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    for run_request in sensor_runtime_data.run_requests:
        run = _get_or_create_sensor_run(context, instance, repo_location,
                                        external_sensor, external_pipeline,
                                        run_request)

        if not run:
            # we already found and resolved a run
            continue

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED")

        try:
            context.logger.info("Launching run for {sensor_name}".format(
                sensor_name=external_sensor.name))
            instance.submit_run(run.run_id, external_pipeline)
            context.logger.info(
                "Completed launch of run {run_id} for {sensor_name}".format(
                    run_id=run.run_id, sensor_name=external_sensor.name))
        except Exception:  # pylint: disable=broad-except
            context.logger.error(
                "Run {run_id} created successfully but failed to launch.".
                format(run_id=run.run_id))

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED")

        context.add_run(run_id=run.run_id, run_key=run_request.run_key)

    if context.run_count:
        context.update_state(JobTickStatus.SUCCESS)
    else:
        context.update_state(JobTickStatus.SKIPPED)
예제 #12
0
파일: pipeline.py 프로젝트: drat/dagster
def _create_external_pipeline_run(
    instance,
    repo_location,
    external_repo,
    external_pipeline,
    run_config,
    mode,
    preset,
    tags,
    solid_selection,
    run_id,
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_repo, "external_repo", ExternalRepository)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.opt_dict_param(run_config, "run_config")

    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.opt_str_param(run_id, "run_id")

    run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args(
        external_pipeline,
        run_config,
        mode,
        preset,
        tags,
        solid_selection,
    )

    pipeline_name = external_pipeline.name
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    if subset_pipeline_result.success == False:
        raise DagsterLaunchFailedError(
            "Failed to load external pipeline subset: {error_message}".format(
                error_message=subset_pipeline_result.error.message),
            serializable_error_info=subset_pipeline_result.error,
        )

    external_pipeline_subset = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    pipeline_mode = mode or external_pipeline_subset.get_default_mode_name()

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline_subset,
        run_config,
        pipeline_mode,
        step_keys_to_execute=None,
    )
    if isinstance(external_execution_plan, ExecutionPlanSnapshotErrorData):
        raise DagsterLaunchFailedError(
            "Failed to load external execution plan",
            serializable_error_info=external_execution_plan.error,
        )
    else:
        execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    return instance.create_run(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=run_config,
        mode=pipeline_mode,
        solids_to_execute=external_pipeline_subset.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline_subset.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline_subset.
        parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline_subset.get_external_origin(
        ),
    )
예제 #13
0
def _schedule_runs_at_time(
    instance,
    logger,
    repo_location,
    external_repo,
    external_schedule,
    schedule_time,
    tick_context,
    debug_crash_flags,
):
    schedule_name = external_schedule.name

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        scheduled_execution_time=schedule_time,
    )

    if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData):
        error = schedule_execution_data.error
        logger.error(
            f"Failed to fetch schedule data for {external_schedule.name}: {error.to_string()}"
        )
        tick_context.update_state(JobTickStatus.FAILURE, error=error)
        return

    if not schedule_execution_data.run_requests:
        logger.info(
            f"No run requests returned for {external_schedule.name}, skipping")

        # Update tick to skipped state and return
        tick_context.update_state(JobTickStatus.SKIPPED)
        return

    for run_request in schedule_execution_data.run_requests:
        run = _get_existing_run_for_request(instance, external_schedule,
                                            schedule_time, run_request)
        if run:
            if run.status != PipelineRunStatus.NOT_STARTED:
                # A run already exists and was launched for this time period,
                # but the scheduler must have crashed before the tick could be put
                # into a SUCCESS state

                logger.info(
                    f"Run {run.run_id} already completed for this execution of {external_schedule.name}"
                )
                tick_context.add_run(run_id=run.run_id,
                                     run_key=run_request.run_key)
                continue
            else:
                logger.info(
                    f"Run {run.run_id} already created for this execution of {external_schedule.name}"
                )
        else:
            run = _create_scheduler_run(
                instance,
                logger,
                schedule_time,
                repo_location,
                external_schedule,
                external_pipeline,
                run_request,
            )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

        if run.status != PipelineRunStatus.FAILURE:
            try:
                instance.submit_run(run.run_id, external_pipeline)
                logger.info(
                    f"Completed scheduled launch of run {run.run_id} for {schedule_name}"
                )
            except Exception:  # pylint: disable=broad-except
                logger.error(
                    f"Run {run.run_id} created successfully but failed to launch."
                )

        _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")
        tick_context.add_run(run_id=run.run_id, run_key=run_request.run_key)
        _check_for_debug_crash(debug_crash_flags, "RUN_ADDED")

    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
    tick_context.update_state(JobTickStatus.SUCCESS)
예제 #14
0
def _check_execute_external_pipeline_args(
    external_pipeline: ExternalPipeline,
    run_config: Dict[str, object],
    mode: Optional[str],
    preset: Optional[str],
    tags: Optional[Mapping[str, object]],
    solid_selection: Optional[List[str]],
) -> Tuple[Dict[str, object], str, Mapping[str, object], Optional[List[str]]]:
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    run_config = check.opt_dict_param(run_config, "run_config")
    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.invariant(
        not (mode is not None and preset is not None),
        "You may set only one of `mode` (got {mode}) or `preset` (got {preset})."
        .format(mode=mode, preset=preset),
    )

    tags = check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)

    if preset is not None:
        pipeline_preset = external_pipeline.get_preset(preset)

        if pipeline_preset.run_config is not None:
            check.invariant(
                (not run_config) or (pipeline_preset.run_config == run_config),
                "The environment set in preset '{preset}' does not agree with the environment "
                "passed in the `run_config` argument.".format(preset=preset),
            )

            run_config = pipeline_preset.run_config

        # load solid_selection from preset
        if pipeline_preset.solid_selection is not None:
            check.invariant(
                solid_selection is None
                or solid_selection == pipeline_preset.solid_selection,
                "The solid_selection set in preset '{preset}', {preset_subset}, does not agree with "
                "the `solid_selection` argument: {solid_selection}".format(
                    preset=preset,
                    preset_subset=pipeline_preset.solid_selection,
                    solid_selection=solid_selection,
                ),
            )
            solid_selection = pipeline_preset.solid_selection

        check.invariant(
            mode is None or mode == pipeline_preset.mode,
            "Mode {mode} does not agree with the mode set in preset '{preset}': "
            "('{preset_mode}')".format(preset=preset,
                                       preset_mode=pipeline_preset.mode,
                                       mode=mode),
        )

        mode = pipeline_preset.mode

        tags = merge_dicts(pipeline_preset.tags, tags)

    if mode is not None:
        if not external_pipeline.has_mode(mode):
            raise DagsterInvariantViolationError((
                "You have attempted to execute pipeline {name} with mode {mode}. "
                "Available modes: {modes}").format(
                    name=external_pipeline.name,
                    mode=mode,
                    modes=external_pipeline.available_modes,
                ))
    else:
        if len(external_pipeline.available_modes) > 1:
            raise DagsterInvariantViolationError((
                "Pipeline {name} has multiple modes (Available modes: {modes}) and you have "
                "attempted to execute it without specifying a mode. Set "
                "mode property on the PipelineRun object.").format(
                    name=external_pipeline.name,
                    modes=external_pipeline.available_modes))
        mode = external_pipeline.get_default_mode_name()

    tags = merge_dicts(external_pipeline.tags, tags)

    return (
        run_config,
        mode,
        tags,
        solid_selection,
    )
예제 #15
0
def _schedule_run_at_time(
    instance,
    logger,
    repo_location,
    external_repo,
    external_schedule,
    schedule_time,
    tick_holder,
    debug_crash_flags,
):
    schedule_name = external_schedule.name

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    # Rule out the case where the scheduler crashed between creating a run for this time
    # and launching it
    runs_filter = PipelineRunsFilter(tags=merge_dicts(
        PipelineRun.tags_for_schedule(external_schedule),
        {SCHEDULED_EXECUTION_TIME_TAG: schedule_time.in_tz("UTC").isoformat()},
    ))
    existing_runs = instance.get_runs(runs_filter)

    run_to_launch = None

    if len(existing_runs):
        check.invariant(len(existing_runs) == 1)

        run = existing_runs[0]

        if run.status != PipelineRunStatus.NOT_STARTED:
            # A run already exists and was launched for this time period,
            # but the scheduler must have crashed before the tick could be put
            # into a SUCCESS state

            logger.info(
                "Run {run_id} already completed for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_name))
            tick_holder.update_with_status(JobTickStatus.SUCCESS,
                                           run_id=run.run_id)

            return
        else:
            logger.info(
                "Run {run_id} already created for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_name))
            run_to_launch = run
    else:
        run_to_launch = _create_scheduler_run(
            instance,
            logger,
            schedule_time,
            repo_location,
            external_repo,
            external_schedule,
            external_pipeline,
            tick_holder,
        )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

    if not run_to_launch:
        check.invariant(tick_holder.status != JobTickStatus.STARTED
                        and tick_holder.status != JobTickStatus.SUCCESS)
        return

    if run_to_launch.status != PipelineRunStatus.FAILURE:
        try:
            instance.submit_run(run_to_launch.run_id, external_pipeline)
            logger.info(
                "Completed scheduled launch of run {run_id} for {schedule_name}"
                .format(run_id=run_to_launch.run_id,
                        schedule_name=schedule_name))
        except Exception:  # pylint: disable=broad-except
            logger.error(
                "Run {run_id} created successfully but failed to launch.".
                format(run_id=run_to_launch.run_id))

    _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")

    tick_holder.update_with_status(JobTickStatus.SUCCESS,
                                   run_id=run_to_launch.run_id)
    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
예제 #16
0
def _schedule_run_at_time(
    instance,
    logger,
    repo_location,
    schedule_state,
    schedule_time_utc,
    tick_holder,
    debug_crash_flags,
):
    schedule_name = schedule_state.name

    repo_dict = repo_location.get_repositories()
    check.invariant(
        len(repo_dict) == 1,
        "Reconstructed repository location should have exactly one repository",
    )
    external_repo = next(iter(repo_dict.values()))

    external_schedule = external_repo.get_external_schedule(schedule_name)

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    # Rule out the case where the scheduler crashed between creating a run for this time
    # and launching it
    runs_filter = PipelineRunsFilter(tags=merge_dicts(
        PipelineRun.tags_for_schedule(schedule_state),
        {SCHEDULED_EXECUTION_TIME_TAG: schedule_time_utc.isoformat()},
    ))
    existing_runs = instance.get_runs(runs_filter)

    run_to_launch = None

    if len(existing_runs):
        check.invariant(len(existing_runs) == 1)

        run = existing_runs[0]

        if run.status != PipelineRunStatus.NOT_STARTED:
            # A run already exists and was launched for this time period,
            # but the scheduler must have crashed before the tick could be put
            # into a SUCCESS state

            logger.info(
                "Run {run_id} already completed for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_state.name))
            tick_holder.update_with_status(ScheduleTickStatus.SUCCESS,
                                           run_id=run.run_id)

            return
        else:
            logger.info(
                "Run {run_id} already created for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_state.name))
            run_to_launch = run
    else:
        run_to_launch = _create_scheduler_run(
            instance,
            logger,
            schedule_time_utc,
            repo_location,
            external_repo,
            external_schedule,
            external_pipeline,
            tick_holder,
        )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

    if not run_to_launch:
        check.invariant(tick_holder.status != ScheduleTickStatus.STARTED
                        and tick_holder.status != ScheduleTickStatus.SUCCESS)
        return

    if run_to_launch.status != PipelineRunStatus.FAILURE:
        try:
            instance.launch_run(run_to_launch.run_id, external_pipeline)
            logger.info(
                "Completed scheduled launch of run {run_id} for {schedule_name}"
                .format(run_id=run_to_launch.run_id,
                        schedule_name=schedule_name))
        except Exception as e:  # pylint: disable=broad-except
            if not isinstance(e, KeyboardInterrupt):
                error = serializable_error_info_from_exc_info(sys.exc_info())
                instance.report_engine_event(
                    error.message,
                    run_to_launch,
                    EngineEventData.engine_error(error),
                )
                instance.report_run_failed(run_to_launch)
                logger.error(
                    "Run {run_id} created successfully but failed to launch.".
                    format(run_id=run_to_launch.run_id))

    _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")

    tick_holder.update_with_status(ScheduleTickStatus.SUCCESS,
                                   run_id=run_to_launch.run_id)
    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
예제 #17
0
def _create_external_pipeline_run(
    instance,
    repo_location,
    external_repo,
    external_pipeline,
    run_config,
    mode,
    preset,
    tags,
    solid_selection,
    run_id,
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_repo, "external_repo", ExternalRepository)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.opt_dict_param(run_config, "run_config")

    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.opt_str_param(run_id, "run_id")

    run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args(
        external_pipeline,
        run_config,
        mode,
        preset,
        tags,
        solid_selection,
    )

    pipeline_name = external_pipeline.name
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(pipeline_selector)

    external_pipeline_subset = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    pipeline_mode = mode or external_pipeline_subset.get_default_mode_name()

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline_subset,
        run_config,
        pipeline_mode,
        step_keys_to_execute=None,
        known_state=None,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    return instance.create_run(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=run_config,
        mode=pipeline_mode,
        solids_to_execute=external_pipeline_subset.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline_subset.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline_subset.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline_subset.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
예제 #18
0
def _create_external_pipeline_run(
    instance: DagsterInstance,
    repo_location: RepositoryLocation,
    external_repo: ExternalRepository,
    external_pipeline: ExternalPipeline,
    run_config: Dict[str, object],
    mode: Optional[str],
    preset: Optional[str],
    tags: Optional[Mapping[str, object]],
    solid_selection: Optional[List[str]],
    run_id: Optional[str],
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_repo, "external_repo", ExternalRepository)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.opt_dict_param(run_config, "run_config", key_type=str)

    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.opt_str_param(run_id, "run_id")

    run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args(
        external_pipeline,
        run_config,
        mode,
        preset,
        tags,
        solid_selection,
    )

    pipeline_name = external_pipeline.name
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    external_pipeline = repo_location.get_external_pipeline(pipeline_selector)

    pipeline_mode = mode or external_pipeline.get_default_mode_name()

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_config,
        pipeline_mode,
        step_keys_to_execute=None,
        known_state=None,
        instance=instance,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    return instance.create_run(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=run_config,
        mode=pipeline_mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=execution_plan_snapshot.step_keys_to_execute,
        solid_selection=solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )