Python PipelineSelector Examples, dagster.core.host_representation.PipelineSelector Python Examples

Example #1

0

Show file

File: utils.py Project: cy56/dagster

def pipeline_selector_from_graphql(context, data):
    from dagster_graphql.implementation.context import DagsterGraphQLContext

    check.inst_param(context, 'context', DagsterGraphQLContext)

    # legacy case
    if data.get('name'):
        check.invariant(
            data.get('repositoryLocationName') is None
            and data.get('repositoryName') is None
            and data.get('pipelineName') is None,
            'Invalid legacy PipelineSelector, contains modern name fields',
        )

        return legacy_pipeline_selector(
            context,
            name=data['name'],
            solid_selection=data.get('solidSelection'),
        )

    # can be removed once DauphinPipelineSelector fields
    # can be made NonNull
    check.invariant(
        data.get('repositoryLocationName') and data.get('repositoryName')
        and data.get('pipelineName'),
        'Invalid PipelineSelector, must have all name fields',
    )

    return PipelineSelector(
        location_name=data['repositoryLocationName'],
        repository_name=data['repositoryName'],
        pipeline_name=data['pipelineName'],
        solid_selection=data.get('solidSelection'),
    )

Example #2

0

Show file

File: utils.py Project: trevenrawr/dagster

def pipeline_selector_from_graphql(data):
    return PipelineSelector(
        location_name=data["repositoryLocationName"],
        repository_name=data["repositoryName"],
        pipeline_name=data.get("pipelineName") or data.get("jobName"),
        solid_selection=data.get("solidSelection"),
    )

Example #3

0

Show file

File: utils.py Project: yingjiebyron/dagster

def pipeline_selector_from_graphql(context, data):
    from dagster_graphql.implementation.context import DagsterGraphQLContext

    check.inst_param(context, "context", DagsterGraphQLContext)

    return PipelineSelector(
        location_name=data["repositoryLocationName"],
        repository_name=data["repositoryName"],
        pipeline_name=data["pipelineName"],
        solid_selection=data.get("solidSelection"),
    )

Example #4

0

Show file

File: utils.py Project: cy56/dagster

def legacy_pipeline_selector(context, name, solid_selection):
    from dagster_graphql.implementation.context import DagsterGraphQLContext

    check.inst_param(context, 'context', DagsterGraphQLContext)

    return PipelineSelector(
        location_name=context.legacy_location.name,
        repository_name=context.legacy_external_repository.name,
        pipeline_name=name,
        solid_selection=solid_selection,
    )

Example #5

0

Show file

File: scheduler.py Project: naralogics/dagster

def _schedule_run_at_time(
    instance,
    logger,
    repo_location,
    schedule_state,
    schedule_time_utc,
    tick_holder,
    debug_crash_flags,
):
    schedule_name = schedule_state.name

    repo_dict = repo_location.get_repositories()
    check.invariant(
        len(repo_dict) == 1,
        "Reconstructed repository location should have exactly one repository",
    )
    external_repo = next(iter(repo_dict.values()))

    external_schedule = external_repo.get_external_schedule(schedule_name)

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    # Rule out the case where the scheduler crashed between creating a run for this time
    # and launching it
    runs_filter = PipelineRunsFilter(tags=merge_dicts(
        PipelineRun.tags_for_schedule(schedule_state),
        {SCHEDULED_EXECUTION_TIME_TAG: schedule_time_utc.isoformat()},
    ))
    existing_runs = instance.get_runs(runs_filter)

    run_to_launch = None

    if len(existing_runs):
        check.invariant(len(existing_runs) == 1)

        run = existing_runs[0]

        if run.status != PipelineRunStatus.NOT_STARTED:
            # A run already exists and was launched for this time period,
            # but the scheduler must have crashed before the tick could be put
            # into a SUCCESS state

            logger.info(
                "Run {run_id} already completed for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_state.name))
            tick_holder.update_with_status(ScheduleTickStatus.SUCCESS,
                                           run_id=run.run_id)

            return
        else:
            logger.info(
                "Run {run_id} already created for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_state.name))
            run_to_launch = run
    else:
        run_to_launch = _create_scheduler_run(
            instance,
            logger,
            schedule_time_utc,
            repo_location,
            external_repo,
            external_schedule,
            external_pipeline,
            tick_holder,
        )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

    if not run_to_launch:
        check.invariant(tick_holder.status != ScheduleTickStatus.STARTED
                        and tick_holder.status != ScheduleTickStatus.SUCCESS)
        return

    if run_to_launch.status != PipelineRunStatus.FAILURE:
        try:
            instance.launch_run(run_to_launch.run_id, external_pipeline)
            logger.info(
                "Completed scheduled launch of run {run_id} for {schedule_name}"
                .format(run_id=run_to_launch.run_id,
                        schedule_name=schedule_name))
        except Exception as e:  # pylint: disable=broad-except
            if not isinstance(e, KeyboardInterrupt):
                error = serializable_error_info_from_exc_info(sys.exc_info())
                instance.report_engine_event(
                    error.message,
                    run_to_launch,
                    EngineEventData.engine_error(error),
                )
                instance.report_run_failed(run_to_launch)
                logger.error(
                    "Run {run_id} created successfully but failed to launch.".
                    format(run_id=run_to_launch.run_id))

    _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")

    tick_holder.update_with_status(ScheduleTickStatus.SUCCESS,
                                   run_id=run_to_launch.run_id)
    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")

Example #6

0

Show file

def _evaluate_sensor(
    context,
    instance,
    repo_location,
    external_repo,
    external_sensor,
    job_state,
    sensor_debug_crash_flags=None,
):
    context.logger.info(
        f"Checking for new runs for sensor: {external_sensor.name}")
    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
        instance,
        external_repo.handle,
        external_sensor.name,
        job_state.job_specific_data.last_tick_timestamp
        if job_state.job_specific_data else None,
        job_state.job_specific_data.last_run_key
        if job_state.job_specific_data else None,
    )
    if isinstance(sensor_runtime_data, ExternalSensorExecutionErrorData):
        context.logger.error(
            f"Failed to resolve sensor for {external_sensor.name} : {sensor_runtime_data.error.to_string()}"
        )
        context.update_state(JobTickStatus.FAILURE,
                             error=sensor_runtime_data.error)
        yield
        return

    assert isinstance(sensor_runtime_data, ExternalSensorExecutionData)
    if not sensor_runtime_data.run_requests:
        if sensor_runtime_data.skip_message:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping: "
                f"{sensor_runtime_data.skip_message}")
            context.update_state(JobTickStatus.SKIPPED,
                                 skip_reason=sensor_runtime_data.skip_message)
        else:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping")
            context.update_state(JobTickStatus.SKIPPED)
        yield
        return

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_sensor.pipeline_name,
        solid_selection=external_sensor.solid_selection,
    )
    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    skipped_runs = []
    for run_request in sensor_runtime_data.run_requests:
        run = _get_or_create_sensor_run(context, instance, repo_location,
                                        external_sensor, external_pipeline,
                                        run_request)

        if isinstance(run, SkippedSensorRun):
            skipped_runs.append(run)
            yield
            continue

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED")

        try:
            context.logger.info("Launching run for {sensor_name}".format(
                sensor_name=external_sensor.name))
            instance.submit_run(run.run_id, external_pipeline)
            context.logger.info(
                "Completed launch of run {run_id} for {sensor_name}".format(
                    run_id=run.run_id, sensor_name=external_sensor.name))
        except Exception:  # pylint: disable=broad-except
            context.logger.error(
                f"Run {run.run_id} created successfully but failed to launch: "
                f"{str(serializable_error_info_from_exc_info(sys.exc_info()))}"
            )
        yield

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED")

        context.add_run(run_id=run.run_id, run_key=run_request.run_key)

    if skipped_runs:
        run_keys = [skipped.run_key for skipped in skipped_runs]
        skipped_count = len(skipped_runs)
        context.logger.info(
            f"Skipping {skipped_count} {'run' if skipped_count == 1 else 'runs'} for sensor "
            f"{external_sensor.name} already completed with run keys: {seven.json.dumps(run_keys)}"
        )

    if context.run_count:
        context.update_state(JobTickStatus.SUCCESS)
    else:
        context.update_state(JobTickStatus.SKIPPED)

    yield

Example #7

0

Show file

File: sensor.py Project: kiennh2909/dagster

def _evaluate_sensor(
    context,
    instance,
    repo_location,
    external_repo,
    external_sensor,
    job_state,
    sensor_debug_crash_flags=None,
):
    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
        instance,
        external_repo.handle,
        external_sensor.name,
        job_state.job_specific_data.last_tick_timestamp
        if job_state.job_specific_data else None,
        job_state.job_specific_data.last_run_key
        if job_state.job_specific_data else None,
    )
    if isinstance(sensor_runtime_data, ExternalSensorExecutionErrorData):
        context.logger.error(
            "Failed to resolve sensor for {sensor_name} : {error_info}".format(
                sensor_name=external_sensor.name,
                error_info=sensor_runtime_data.error.to_string(),
            ))
        context.update_state(JobTickStatus.FAILURE,
                             error=sensor_runtime_data.error)
        return

    assert isinstance(sensor_runtime_data, ExternalSensorExecutionData)
    if not sensor_runtime_data.run_requests:
        if sensor_runtime_data.skip_message:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping: "
                f"{sensor_runtime_data.skip_message}")
            context.update_state(JobTickStatus.SKIPPED,
                                 skip_reason=sensor_runtime_data.skip_message)
        else:
            context.logger.info(
                f"Sensor returned false for {external_sensor.name}, skipping")
            context.update_state(JobTickStatus.SKIPPED)
        return

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_sensor.pipeline_name,
        solid_selection=external_sensor.solid_selection,
    )
    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    for run_request in sensor_runtime_data.run_requests:
        run = _get_or_create_sensor_run(context, instance, repo_location,
                                        external_sensor, external_pipeline,
                                        run_request)

        if not run:
            # we already found and resolved a run
            continue

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED")

        try:
            context.logger.info("Launching run for {sensor_name}".format(
                sensor_name=external_sensor.name))
            instance.submit_run(run.run_id, external_pipeline)
            context.logger.info(
                "Completed launch of run {run_id} for {sensor_name}".format(
                    run_id=run.run_id, sensor_name=external_sensor.name))
        except Exception:  # pylint: disable=broad-except
            context.logger.error(
                "Run {run_id} created successfully but failed to launch.".
                format(run_id=run.run_id))

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED")

        context.add_run(run_id=run.run_id, run_key=run_request.run_key)

    if context.run_count:
        context.update_state(JobTickStatus.SUCCESS)
    else:
        context.update_state(JobTickStatus.SKIPPED)

Example #8

0

Show file

def _schedule_runs_at_time(
    instance,
    logger,
    repo_location,
    external_repo,
    external_schedule,
    schedule_time,
    tick_context,
    debug_crash_flags,
):
    schedule_name = external_schedule.name

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        scheduled_execution_time=schedule_time,
    )

    if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData):
        error = schedule_execution_data.error
        logger.error(
            f"Failed to fetch schedule data for {external_schedule.name}: {error.to_string()}"
        )
        tick_context.update_state(JobTickStatus.FAILURE, error=error)
        return

    if not schedule_execution_data.run_requests:
        logger.info(
            f"No run requests returned for {external_schedule.name}, skipping")

        # Update tick to skipped state and return
        tick_context.update_state(JobTickStatus.SKIPPED)
        return

    for run_request in schedule_execution_data.run_requests:
        run = _get_existing_run_for_request(instance, external_schedule,
                                            schedule_time, run_request)
        if run:
            if run.status != PipelineRunStatus.NOT_STARTED:
                # A run already exists and was launched for this time period,
                # but the scheduler must have crashed before the tick could be put
                # into a SUCCESS state

                logger.info(
                    f"Run {run.run_id} already completed for this execution of {external_schedule.name}"
                )
                tick_context.add_run(run_id=run.run_id,
                                     run_key=run_request.run_key)
                continue
            else:
                logger.info(
                    f"Run {run.run_id} already created for this execution of {external_schedule.name}"
                )
        else:
            run = _create_scheduler_run(
                instance,
                logger,
                schedule_time,
                repo_location,
                external_schedule,
                external_pipeline,
                run_request,
            )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

        if run.status != PipelineRunStatus.FAILURE:
            try:
                instance.submit_run(run.run_id, external_pipeline)
                logger.info(
                    f"Completed scheduled launch of run {run.run_id} for {schedule_name}"
                )
            except Exception:  # pylint: disable=broad-except
                logger.error(
                    f"Run {run.run_id} created successfully but failed to launch."
                )

        _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")
        tick_context.add_run(run_id=run.run_id, run_key=run_request.run_key)
        _check_for_debug_crash(debug_crash_flags, "RUN_ADDED")

    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
    tick_context.update_state(JobTickStatus.SUCCESS)

Example #9

0

Show file

def _evaluate_sensor(
    context,
    instance,
    workspace,
    repo_location,
    external_repo,
    external_sensor,
    job_state,
    sensor_debug_crash_flags=None,
):
    context.logger.info(
        f"Checking for new runs for sensor: {external_sensor.name}")
    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
        instance,
        external_repo.handle,
        external_sensor.name,
        job_state.job_specific_data.last_tick_timestamp
        if job_state.job_specific_data else None,
        job_state.job_specific_data.last_run_key
        if job_state.job_specific_data else None,
        job_state.job_specific_data.cursor
        if job_state.job_specific_data else None,
    )

    yield

    assert isinstance(sensor_runtime_data, SensorExecutionData)
    if not sensor_runtime_data.run_requests:
        if sensor_runtime_data.pipeline_run_reactions:
            for pipeline_run_reaction in sensor_runtime_data.pipeline_run_reactions:
                origin_run_id = pipeline_run_reaction.pipeline_run.run_id
                if pipeline_run_reaction.error:
                    context.logger.error(
                        f"Got a reaction request for run {origin_run_id} but execution errorred: {pipeline_run_reaction.error}"
                    )
                    context.update_state(
                        TickStatus.FAILURE,
                        cursor=sensor_runtime_data.cursor,
                        error=pipeline_run_reaction.error,
                    )
                else:
                    # log to the original pipeline run
                    message = (
                        f'Sensor "{external_sensor.name}" acted on run status '
                        f"{pipeline_run_reaction.pipeline_run.status.value} of run {origin_run_id}."
                    )
                    instance.report_engine_event(
                        message=message,
                        pipeline_run=pipeline_run_reaction.pipeline_run)
                    context.logger.info(
                        f"Completed a reaction request for run {origin_run_id}: {message}"
                    )
                    context.update_state(
                        TickStatus.SUCCESS,
                        cursor=sensor_runtime_data.cursor,
                        origin_run_id=origin_run_id,
                    )
        elif sensor_runtime_data.skip_message:
            context.logger.info(
                f"Sensor {external_sensor.name} skipped: {sensor_runtime_data.skip_message}"
            )
            context.update_state(
                TickStatus.SKIPPED,
                skip_reason=sensor_runtime_data.skip_message,
                cursor=sensor_runtime_data.cursor,
            )
        else:
            context.logger.info(
                f"No run requests returned for {external_sensor.name}, skipping"
            )
            context.update_state(TickStatus.SKIPPED,
                                 cursor=sensor_runtime_data.cursor)

        yield
        return

    skipped_runs = []
    for run_request in sensor_runtime_data.run_requests:

        target_data = external_sensor.get_target_data(run_request.job_name)

        pipeline_selector = PipelineSelector(
            location_name=repo_location.name,
            repository_name=external_repo.name,
            pipeline_name=target_data.pipeline_name,
            solid_selection=target_data.solid_selection,
        )
        external_pipeline = repo_location.get_external_pipeline(
            pipeline_selector)
        run = _get_or_create_sensor_run(
            context,
            instance,
            repo_location,
            external_sensor,
            external_pipeline,
            run_request,
            target_data,
        )

        if isinstance(run, SkippedSensorRun):
            skipped_runs.append(run)
            yield
            continue

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED")

        error_info = None

        try:
            context.logger.info("Launching run for {sensor_name}".format(
                sensor_name=external_sensor.name))
            instance.submit_run(run.run_id, workspace)
            context.logger.info(
                "Completed launch of run {run_id} for {sensor_name}".format(
                    run_id=run.run_id, sensor_name=external_sensor.name))
        except Exception:
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            context.logger.error(
                f"Run {run.run_id} created successfully but failed to launch: "
                f"{str(error_info)}")

        yield error_info

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED")

        context.add_run(run_id=run.run_id, run_key=run_request.run_key)

    if skipped_runs:
        run_keys = [skipped.run_key for skipped in skipped_runs]
        skipped_count = len(skipped_runs)
        context.logger.info(
            f"Skipping {skipped_count} {'run' if skipped_count == 1 else 'runs'} for sensor "
            f"{external_sensor.name} already completed with run keys: {seven.json.dumps(run_keys)}"
        )

    if context.run_count:
        context.update_state(TickStatus.SUCCESS,
                             cursor=sensor_runtime_data.cursor)
    else:
        context.update_state(TickStatus.SKIPPED,
                             cursor=sensor_runtime_data.cursor)

    yield

Example #10

0

Show file

def _schedule_run_at_time(
    instance,
    logger,
    repo_location,
    external_repo,
    external_schedule,
    schedule_time,
    tick_holder,
    debug_crash_flags,
):
    schedule_name = external_schedule.name

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    # Rule out the case where the scheduler crashed between creating a run for this time
    # and launching it
    runs_filter = PipelineRunsFilter(tags=merge_dicts(
        PipelineRun.tags_for_schedule(external_schedule),
        {SCHEDULED_EXECUTION_TIME_TAG: schedule_time.in_tz("UTC").isoformat()},
    ))
    existing_runs = instance.get_runs(runs_filter)

    run_to_launch = None

    if len(existing_runs):
        check.invariant(len(existing_runs) == 1)

        run = existing_runs[0]

        if run.status != PipelineRunStatus.NOT_STARTED:
            # A run already exists and was launched for this time period,
            # but the scheduler must have crashed before the tick could be put
            # into a SUCCESS state

            logger.info(
                "Run {run_id} already completed for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_name))
            tick_holder.update_with_status(JobTickStatus.SUCCESS,
                                           run_id=run.run_id)

            return
        else:
            logger.info(
                "Run {run_id} already created for this execution of {schedule_name}"
                .format(run_id=run.run_id, schedule_name=schedule_name))
            run_to_launch = run
    else:
        run_to_launch = _create_scheduler_run(
            instance,
            logger,
            schedule_time,
            repo_location,
            external_repo,
            external_schedule,
            external_pipeline,
            tick_holder,
        )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

    if not run_to_launch:
        check.invariant(tick_holder.status != JobTickStatus.STARTED
                        and tick_holder.status != JobTickStatus.SUCCESS)
        return

    if run_to_launch.status != PipelineRunStatus.FAILURE:
        try:
            instance.submit_run(run_to_launch.run_id, external_pipeline)
            logger.info(
                "Completed scheduled launch of run {run_id} for {schedule_name}"
                .format(run_id=run_to_launch.run_id,
                        schedule_name=schedule_name))
        except Exception:  # pylint: disable=broad-except
            logger.error(
                "Run {run_id} created successfully but failed to launch.".
                format(run_id=run_to_launch.run_id))

    _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")

    tick_holder.update_with_status(JobTickStatus.SUCCESS,
                                   run_id=run_to_launch.run_id)
    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")

Example #11

0

Show file

File: sensor.py Project: trevenrawr/dagster

def _evaluate_sensor(
    context,
    instance,
    workspace,
    external_sensor,
    state,
    sensor_debug_crash_flags=None,
):
    context.logger.info(
        f"Checking for new runs for sensor: {external_sensor.name}")

    sensor_origin = external_sensor.get_external_origin()
    repository_handle = external_sensor.handle.repository_handle
    repo_location = workspace.get_location(
        sensor_origin.external_repository_origin.repository_location_origin.
        location_name)

    sensor_runtime_data = repo_location.get_external_sensor_execution_data(
        instance,
        repository_handle,
        external_sensor.name,
        state.instigator_data.last_tick_timestamp
        if state.instigator_data else None,
        state.instigator_data.last_run_key if state.instigator_data else None,
        state.instigator_data.cursor if state.instigator_data else None,
    )

    yield

    assert isinstance(sensor_runtime_data, SensorExecutionData)
    if not sensor_runtime_data.run_requests:
        if sensor_runtime_data.pipeline_run_reactions:
            for pipeline_run_reaction in sensor_runtime_data.pipeline_run_reactions:
                origin_run_id = pipeline_run_reaction.pipeline_run.run_id
                if pipeline_run_reaction.error:
                    context.logger.error(
                        f"Got a reaction request for run {origin_run_id} but execution errorred: {pipeline_run_reaction.error}"
                    )
                    context.update_state(
                        TickStatus.FAILURE,
                        cursor=sensor_runtime_data.cursor,
                        error=pipeline_run_reaction.error,
                    )
                    # Since run status sensors have side effects that we don't want to repeat,
                    # we still want to update the cursor, even though the tick failed
                    context.set_should_update_cursor_on_failure(True)
                else:
                    # Use status from the PipelineRunReaction object if it is from a new enough
                    # version (0.14.4) to be set (the status on the PipelineRun object itself
                    # may have since changed)
                    status = (pipeline_run_reaction.run_status.value
                              if pipeline_run_reaction.run_status else
                              pipeline_run_reaction.pipeline_run.status.value)
                    # log to the original pipeline run
                    message = (
                        f'Sensor "{external_sensor.name}" acted on run status '
                        f"{status} of run {origin_run_id}.")
                    instance.report_engine_event(
                        message=message,
                        pipeline_run=pipeline_run_reaction.pipeline_run)
                    context.logger.info(
                        f"Completed a reaction request for run {origin_run_id}: {message}"
                    )
                    context.update_state(
                        TickStatus.SUCCESS,
                        cursor=sensor_runtime_data.cursor,
                        origin_run_id=origin_run_id,
                    )
        elif sensor_runtime_data.skip_message:
            context.logger.info(
                f"Sensor {external_sensor.name} skipped: {sensor_runtime_data.skip_message}"
            )
            context.update_state(
                TickStatus.SKIPPED,
                skip_reason=sensor_runtime_data.skip_message,
                cursor=sensor_runtime_data.cursor,
            )
        else:
            context.logger.info(
                f"No run requests returned for {external_sensor.name}, skipping"
            )
            context.update_state(TickStatus.SKIPPED,
                                 cursor=sensor_runtime_data.cursor)

        yield
        return

    skipped_runs = []
    existing_runs_by_key = _fetch_existing_runs(
        instance, external_sensor, sensor_runtime_data.run_requests)

    for run_request in sensor_runtime_data.run_requests:
        target_data = external_sensor.get_target_data(run_request.job_name)

        pipeline_selector = PipelineSelector(
            location_name=repo_location.name,
            repository_name=sensor_origin.external_repository_origin.
            repository_name,
            pipeline_name=target_data.pipeline_name,
            solid_selection=target_data.solid_selection,
        )
        external_pipeline = repo_location.get_external_pipeline(
            pipeline_selector)
        run = _get_or_create_sensor_run(
            context,
            instance,
            repo_location,
            external_sensor,
            external_pipeline,
            run_request,
            target_data,
            existing_runs_by_key,
        )

        if isinstance(run, SkippedSensorRun):
            skipped_runs.append(run)
            context.add_run_info(run_id=None, run_key=run_request.run_key)
            yield
            continue

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_CREATED")

        error_info = None

        try:
            context.logger.info("Launching run for {sensor_name}".format(
                sensor_name=external_sensor.name))
            instance.submit_run(run.run_id, workspace)
            context.logger.info(
                "Completed launch of run {run_id} for {sensor_name}".format(
                    run_id=run.run_id, sensor_name=external_sensor.name))
        except Exception:
            error_info = serializable_error_info_from_exc_info(sys.exc_info())
            context.logger.error(
                f"Run {run.run_id} created successfully but failed to launch: "
                f"{str(error_info)}")

        yield error_info

        _check_for_debug_crash(sensor_debug_crash_flags, "RUN_LAUNCHED")

        context.add_run_info(run_id=run.run_id, run_key=run_request.run_key)

    if skipped_runs:
        run_keys = [skipped.run_key for skipped in skipped_runs]
        skipped_count = len(skipped_runs)
        context.logger.info(
            f"Skipping {skipped_count} {'run' if skipped_count == 1 else 'runs'} for sensor "
            f"{external_sensor.name} already completed with run keys: {seven.json.dumps(run_keys)}"
        )

    if context.run_count:
        context.update_state(TickStatus.SUCCESS,
                             cursor=sensor_runtime_data.cursor)
    else:
        context.update_state(TickStatus.SKIPPED,
                             cursor=sensor_runtime_data.cursor)

    yield

Example #12

0

Show file

File: scheduler.py Project: amarrella/dagster

def _schedule_runs_at_time(
    instance,
    logger,
    workspace,
    repo_location,
    external_repo,
    external_schedule,
    schedule_time,
    tick_context,
    debug_crash_flags,
):
    schedule_name = external_schedule.name

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    external_pipeline = repo_location.get_external_pipeline(pipeline_selector)

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        scheduled_execution_time=schedule_time,
    )
    yield

    if not schedule_execution_data.run_requests:
        if schedule_execution_data.skip_message:
            logger.info(
                f"Schedule {external_schedule.name} skipped: {schedule_execution_data.skip_message}"
            )
        else:
            logger.info(
                f"No run requests returned for {external_schedule.name}, skipping"
            )

        # Update tick to skipped state and return
        tick_context.update_state(
            TickStatus.SKIPPED,
            skip_reason=schedule_execution_data.skip_message)
        return

    for run_request in schedule_execution_data.run_requests:
        run = _get_existing_run_for_request(instance, external_schedule,
                                            schedule_time, run_request)
        if run:
            if run.status != PipelineRunStatus.NOT_STARTED:
                # A run already exists and was launched for this time period,
                # but the scheduler must have crashed or errored before the tick could be put
                # into a SUCCESS state

                logger.info(
                    f"Run {run.run_id} already completed for this execution of {external_schedule.name}"
                )
                tick_context.add_run(run_id=run.run_id,
                                     run_key=run_request.run_key)
                yield
                continue
            else:
                logger.info(
                    f"Run {run.run_id} already created for this execution of {external_schedule.name}"
                )
        else:
            run = _create_scheduler_run(
                instance,
                schedule_time,
                repo_location,
                external_schedule,
                external_pipeline,
                run_request,
            )

        _check_for_debug_crash(debug_crash_flags, "RUN_CREATED")

        if run.status != PipelineRunStatus.FAILURE:
            try:
                instance.submit_run(run.run_id, workspace)
                logger.info(
                    f"Completed scheduled launch of run {run.run_id} for {schedule_name}"
                )
            except Exception:
                error_info = serializable_error_info_from_exc_info(
                    sys.exc_info())
                logger.error(
                    f"Run {run.run_id} created successfully but failed to launch: {str(serializable_error_info_from_exc_info(sys.exc_info()))}"
                )
                yield error_info

        _check_for_debug_crash(debug_crash_flags, "RUN_LAUNCHED")
        tick_context.add_run(run_id=run.run_id, run_key=run_request.run_key)
        _check_for_debug_crash(debug_crash_flags, "RUN_ADDED")
        yield

    _check_for_debug_crash(debug_crash_flags, "TICK_SUCCESS")
    tick_context.update_state(TickStatus.SUCCESS)