Ejemplo n.º 1
0
def _create_sensor_run(instance, repo_location, external_sensor, external_pipeline, run_request):
    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline, run_request.run_config, external_sensor.mode, step_keys_to_execute=None,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(
        merge_dicts(pipeline_tags, run_request.tags), PipelineRun.tags_for_sensor(external_sensor),
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    return instance.create_run(
        pipeline_name=external_sensor.pipeline_name,
        run_id=None,
        run_config=run_request.run_config,
        mode=external_sensor.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
        solid_selection=external_sensor.solid_selection,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )
Ejemplo n.º 2
0
def _get_or_create_sensor_run(context, instance, repo_location,
                              external_sensor, external_pipeline, run_request):

    if not run_request.run_key:
        return _create_sensor_run(instance, repo_location, external_sensor,
                                  external_pipeline, run_request)

    existing_runs = instance.get_runs(
        PipelineRunsFilter(tags=merge_dicts(
            PipelineRun.tags_for_sensor(external_sensor),
            {RUN_KEY_TAG: run_request.run_key},
        )))

    if len(existing_runs):
        run = existing_runs[0]
        if run.status != PipelineRunStatus.NOT_STARTED:
            # A run already exists and was launched for this time period,
            # but the scheduler must have crashed before the tick could be put
            # into a SUCCESS state
            return SkippedSensorRun(run_key=run_request.run_key,
                                    existing_run=run)
        else:
            context.logger.info(
                f"Run {run.run_id} already created with the run key "
                f"`{run_request.run_key}` for {external_sensor.name}")
            return run

    context.logger.info(f"Creating new run for {external_sensor.name}")

    return _create_sensor_run(instance, repo_location, external_sensor,
                              external_pipeline, run_request)
Ejemplo n.º 3
0
def test_queue_from_schedule_and_sensor(instance, foo_example_workspace, foo_example_repo):
    external_schedule = foo_example_repo.get_external_schedule("always_run_schedule")
    external_sensor = foo_example_repo.get_external_sensor("always_on_sensor")
    external_pipeline = foo_example_repo.get_full_external_pipeline("foo_pipeline")

    instance.start_schedule_and_update_storage_state(external_schedule)
    instance.start_sensor(external_sensor)

    with start_daemon(timeout=180):
        run = create_run(instance, external_pipeline)
        instance.submit_run(run.run_id, foo_example_workspace)

        runs = [
            poll_for_finished_run(instance, run.run_id),
            poll_for_finished_run(instance, run_tags=PipelineRun.tags_for_sensor(external_sensor)),
            poll_for_finished_run(
                instance,
                run_tags=PipelineRun.tags_for_schedule(external_schedule),
                timeout=90,
            ),
        ]

        for run in runs:
            logs = instance.all_logs(run.run_id)
            assert_events_in_order(
                logs,
                [
                    "PIPELINE_ENQUEUED",
                    "PIPELINE_DEQUEUED",
                    "PIPELINE_STARTING",
                    "PIPELINE_START",
                    "PIPELINE_SUCCESS",
                ],
            )
Ejemplo n.º 4
0
def _create_sensor_run(
    instance, repo_location, external_sensor, external_pipeline, run_request, target_data
):
    from dagster.daemon.daemon import get_telemetry_daemon_session_id

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_request.run_config,
        target_data.mode,
        step_keys_to_execute=None,
        known_state=None,
        instance=instance,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(
        merge_dicts(pipeline_tags, run_request.tags),
        PipelineRun.tags_for_sensor(external_sensor),
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    log_action(
        instance,
        SENSOR_RUN_CREATED,
        metadata={
            "DAEMON_SESSION_ID": get_telemetry_daemon_session_id(),
            "SENSOR_NAME_HASH": hash_name(external_sensor.name),
            "pipeline_name_hash": hash_name(external_pipeline.name),
            "repo_hash": hash_name(repo_location.name),
        },
    )

    return instance.create_run(
        pipeline_name=target_data.pipeline_name,
        run_id=None,
        run_config=run_request.run_config,
        mode=target_data.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        status=PipelineRunStatus.NOT_STARTED,
        solid_selection=target_data.solid_selection,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Ejemplo n.º 5
0
def test_queue_from_schedule_and_sensor(tmpdir, foo_example_repo):
    dagster_home_path = tmpdir.strpath
    with setup_instance(
            dagster_home_path,
            """run_coordinator:
    module: dagster.core.run_coordinator
    class: QueuedRunCoordinator
    config:
        dequeue_interval_seconds: 1
    """,
    ) as instance:
        external_schedule = foo_example_repo.get_external_schedule(
            "never_run_schedule")
        external_sensor = foo_example_repo.get_external_sensor(
            "never_on_sensor")

        foo_pipeline_handle = PipelineHandle("foo_pipeline",
                                             foo_example_repo.handle)

        instance.start_schedule_and_update_storage_state(external_schedule)
        instance.start_sensor(external_sensor)

        with start_daemon(timeout=180):
            run = create_run(instance, foo_pipeline_handle)
            with external_pipeline_from_run(run) as external_pipeline:
                instance.submit_run(run.run_id, external_pipeline)

                runs = [
                    poll_for_finished_run(instance, run.run_id),
                    poll_for_finished_run(
                        instance,
                        run_tags=PipelineRun.tags_for_sensor(external_sensor)),
                    poll_for_finished_run(
                        instance,
                        run_tags=PipelineRun.tags_for_schedule(
                            external_schedule),
                        timeout=90,
                    ),
                ]

                for run in runs:
                    logs = instance.all_logs(run.run_id)
                    assert_events_in_order(
                        logs,
                        [
                            "PIPELINE_ENQUEUED",
                            "PIPELINE_DEQUEUED",
                            "PIPELINE_STARTING",
                            "PIPELINE_START",
                            "PIPELINE_SUCCESS",
                        ],
                    )
Ejemplo n.º 6
0
def _fetch_existing_runs(instance, external_sensor, run_requests):
    run_keys = [
        run_request.run_key for run_request in run_requests
        if run_request.run_key
    ]

    if not run_keys:
        return {}

    existing_runs = {}

    if instance.supports_bucket_queries:
        runs = instance.get_runs(
            filters=RunsFilter(
                tags=PipelineRun.tags_for_sensor(external_sensor), ),
            bucket_by=TagBucket(
                tag_key=RUN_KEY_TAG,
                bucket_limit=1,
                tag_values=run_keys,
            ),
        )
        for run in runs:
            tags = run.tags or {}
            run_key = tags.get(RUN_KEY_TAG)
            existing_runs[run_key] = run
        return existing_runs

    else:
        for run_key in run_keys:
            runs = instance.get_runs(
                filters=RunsFilter(tags=merge_dicts(
                    PipelineRun.tags_for_sensor(external_sensor),
                    {RUN_KEY_TAG: run_key},
                )),
                limit=1,
            )
            if runs:
                existing_runs[run_key] = runs[0]
    return existing_runs
Ejemplo n.º 7
0
def _create_sensor_run(context, instance, repo_location, external_sensor,
                       external_pipeline, run_request):
    execution_plan_errors = []
    execution_plan_snapshot = None
    try:
        external_execution_plan = repo_location.get_external_execution_plan(
            external_pipeline,
            run_request.run_config,
            external_sensor.mode,
            step_keys_to_execute=None,
        )
        execution_plan_snapshot = external_execution_plan.execution_plan_snapshot
    except DagsterSubprocessError as e:
        execution_plan_errors.extend(e.subprocess_error_infos)
    except Exception as e:  # pylint: disable=broad-except
        execution_plan_errors.append(
            serializable_error_info_from_exc_info(sys.exc_info()))

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(
        merge_dicts(pipeline_tags, run_request.tags),
        PipelineRun.tags_for_sensor(external_sensor),
    )
    if run_request.run_key:
        tags[RUN_KEY_TAG] = run_request.run_key

    run = instance.create_run(
        pipeline_name=external_sensor.pipeline_name,
        run_id=None,
        run_config=run_request.run_config,
        mode=external_sensor.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_sensor.solid_selection,
        status=(PipelineRunStatus.FAILURE if len(execution_plan_errors) > 0
                else PipelineRunStatus.NOT_STARTED),
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
    )

    if len(execution_plan_errors) > 0:
        for error in execution_plan_errors:
            instance.report_engine_event(
                error.message,
                run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(run)
        context.logger.error(
            "Failed to fetch execution plan for {sensor_name}: {error_string}".
            format(
                sensor_name=external_sensor.name,
                error_string="\n".join(
                    [error.to_string() for error in execution_plan_errors]),
            ), )

    return run