Esempio n. 1
0
def external_pipeline_from_location_handle(repository_location_handle,
                                           external_pipeline_origin,
                                           solid_selection):
    check.inst_param(repository_location_handle, "repository_location_handle",
                     RepositoryLocationHandle)
    check.inst_param(external_pipeline_origin, "external_pipeline_origin",
                     ExternalPipelineOrigin)

    repo_location = repository_location_handle.create_location()
    repo_name = external_pipeline_origin.external_repository_origin.repository_name
    pipeline_name = external_pipeline_origin.pipeline_name

    check.invariant(
        repo_location.has_repository(repo_name),
        "Could not find repository {repo_name} in location {repo_location_name}"
        .format(repo_name=repo_name, repo_location_name=repo_location.name),
    )
    external_repo = repo_location.get_repository(repo_name)

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )
    return external_pipeline
Esempio n. 2
0
def external_pipeline_from_location_handle(repository_location_handle,
                                           pipeline_name, solid_selection):
    check.inst_param(repository_location_handle, "repository_location_handle",
                     RepositoryLocationHandle)

    repo_location = RepositoryLocation.from_handle(repository_location_handle)
    repo_dict = repo_location.get_repositories()
    check.invariant(
        len(repo_dict) == 1,
        "Reconstructed repository location should have exactly one repository",
    )
    external_repo = next(iter(repo_dict.values()))

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )
    return external_pipeline
Esempio n. 3
0
def external_pipeline_from_run(pipeline_run):
    check.inst_param(pipeline_run, "pipeline_run", PipelineRun)
    external_pipeline_origin = check.inst(
        pipeline_run.external_pipeline_origin, ExternalPipelineOrigin)

    with RepositoryLocationHandle.create_from_repository_location_origin(
            external_pipeline_origin.external_repository_origin.
            repository_location_origin) as repo_location_handle:
        repo_location = RepositoryLocation.from_handle(repo_location_handle)

        repo_dict = repo_location.get_repositories()
        check.invariant(
            len(repo_dict) == 1,
            "Reconstructed repository location should have exactly one repository",
        )
        external_repo = next(iter(repo_dict.values()))

        pipeline_selector = PipelineSelector(
            location_name=repo_location.name,
            repository_name=external_repo.name,
            pipeline_name=pipeline_run.pipeline_name,
            solid_selection=pipeline_run.solid_selection,
        )

        subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
            pipeline_selector)
        external_pipeline = ExternalPipeline(
            subset_pipeline_result.external_pipeline_data,
            external_repo.handle,
        )
        yield external_pipeline
Esempio n. 4
0
File: api.py Progetto: xjhc/dagster
def _launch_scheduled_executions(instance, repo_location, external_repo,
                                 external_schedule, tick_context):
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        scheduled_execution_time=
        None,  # No way to know this in general for this scheduler
    )

    if not schedule_execution_data.run_requests:
        # Update tick to skipped state and return
        tick_context.update_state(JobTickStatus.SKIPPED)
        tick_context.stream.send(ScheduledExecutionSkipped())
        return

    for run_request in schedule_execution_data.run_requests:
        _launch_run(instance, repo_location, external_schedule,
                    external_pipeline, tick_context, run_request)

    tick_context.update_state(JobTickStatus.SUCCESS)
Esempio n. 5
0
def trigger_execution(graphene_info, trigger_selector):
    check.inst_param(graphene_info, "graphene_info", ResolveInfo)
    check.inst_param(trigger_selector, "trigger_selector", TriggerSelector)
    location = graphene_info.context.get_repository_location(
        trigger_selector.location_name)
    repository = location.get_repository(trigger_selector.repository_name)

    matches = [
        triggered_execution for triggered_execution in
        repository.get_external_triggered_executions()
        if triggered_execution.name == trigger_selector.trigger_name
    ]

    launched_run_ids = []
    for external_triggered_execution in matches:
        external_pipeline = repository.get_full_external_pipeline(
            external_triggered_execution.pipeline_name)
        result = graphene_info.context.get_external_triggered_execution_param_data(
            repository.handle, external_triggered_execution.name)
        if isinstance(result, ExternalExecutionParamsErrorData):
            continue

        assert isinstance(result, ExternalExecutionParamsData)

        pipeline_selector = PipelineSelector(
            location_name=location.name,
            repository_name=repository.name,
            pipeline_name=external_pipeline.name,
            solid_selection=external_triggered_execution.solid_selection,
        )
        execution_params = ExecutionParams(
            selector=pipeline_selector,
            run_config=result.run_config,
            mode=external_triggered_execution.mode,
            execution_metadata=ExecutionMetadata(run_id=None,
                                                 tags=result.tags),
            step_keys=None,
        )
        pipeline_run = create_valid_pipeline_run(graphene_info,
                                                 external_pipeline,
                                                 execution_params)
        graphene_info.context.instance.launch_run(pipeline_run.run_id,
                                                  external_pipeline)
        launched_run_ids.append(pipeline_run.run_id)

    return graphene_info.schema.type_named("TriggerExecutionSuccess")(
        launched_run_ids=launched_run_ids)
Esempio n. 6
0
def external_pipeline_from_location(repo_location, external_pipeline_origin,
                                    solid_selection):
    check.inst_param(repo_location, "repository_location", RepositoryLocation)
    check.inst_param(external_pipeline_origin, "external_pipeline_origin",
                     ExternalPipelineOrigin)

    repo_name = external_pipeline_origin.external_repository_origin.repository_name
    pipeline_name = external_pipeline_origin.pipeline_name

    check.invariant(
        repo_location.has_repository(repo_name),
        "Could not find repository {repo_name} in location {repo_location_name}"
        .format(repo_name=repo_name, repo_location_name=repo_location.name),
    )
    external_repo = repo_location.get_repository(repo_name)

    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    return repo_location.get_external_pipeline(pipeline_selector)
Esempio n. 7
0
def _create_external_pipeline_run(
    instance,
    repo_location,
    external_repo,
    external_pipeline,
    run_config,
    mode,
    preset,
    tags,
    solid_selection,
    run_id,
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_repo, "external_repo", ExternalRepository)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.opt_dict_param(run_config, "run_config")

    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.opt_str_param(run_id, "run_id")

    run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args(
        external_pipeline,
        run_config,
        mode,
        preset,
        tags,
        solid_selection,
    )

    pipeline_name = external_pipeline.name
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(pipeline_selector)

    external_pipeline_subset = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    pipeline_mode = mode or external_pipeline_subset.get_default_mode_name()

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline_subset,
        run_config,
        pipeline_mode,
        step_keys_to_execute=None,
        known_state=None,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    return instance.create_run(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=run_config,
        mode=pipeline_mode,
        solids_to_execute=external_pipeline_subset.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline_subset.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline_subset.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline_subset.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Esempio n. 8
0
 def to_selector(self):
     return PipelineSelector(self.location_name, self.repository_name,
                             self.pipeline_name, None)
Esempio n. 9
0
def _create_external_pipeline_run(
    instance,
    repo_location,
    external_repo,
    external_pipeline,
    run_config,
    mode,
    preset,
    tags,
    solid_selection,
    run_id,
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_repo, "external_repo", ExternalRepository)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.opt_dict_param(run_config, "run_config")

    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.opt_str_param(run_id, "run_id")

    run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args(
        external_pipeline,
        run_config,
        mode,
        preset,
        tags,
        solid_selection,
    )

    pipeline_name = external_pipeline.name
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    if subset_pipeline_result.success == False:
        raise DagsterLaunchFailedError(
            "Failed to load external pipeline subset: {error_message}".format(
                error_message=subset_pipeline_result.error.message),
            serializable_error_info=subset_pipeline_result.error,
        )

    external_pipeline_subset = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    pipeline_mode = mode or external_pipeline_subset.get_default_mode_name()

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline_subset,
        run_config,
        pipeline_mode,
        step_keys_to_execute=None,
    )
    if isinstance(external_execution_plan, ExecutionPlanSnapshotErrorData):
        raise DagsterLaunchFailedError(
            "Failed to load external execution plan",
            serializable_error_info=external_execution_plan.error,
        )
    else:
        execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    return instance.create_run(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=run_config,
        mode=pipeline_mode,
        solids_to_execute=external_pipeline_subset.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline_subset.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline_subset.
        parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline_subset.get_external_origin(
        ),
    )
Esempio n. 10
0
def _launch_scheduled_execution(instance, repo_location, external_repo,
                                external_schedule, tick, stream):
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=external_schedule.pipeline_name,
        solid_selection=external_schedule.solid_selection,
    )

    subset_pipeline_result = repo_location.get_subset_external_pipeline_result(
        pipeline_selector)
    external_pipeline = ExternalPipeline(
        subset_pipeline_result.external_pipeline_data,
        external_repo.handle,
    )

    schedule_execution_data = repo_location.get_external_schedule_execution_data(
        instance=instance,
        repository_handle=external_repo.handle,
        schedule_name=external_schedule.name,
        schedule_execution_data_mode=ScheduleExecutionDataMode.
        LAUNCH_SCHEDULED_EXECUTION,
        scheduled_execution_time=
        None,  # No way to know this in general for this scheduler
    )

    run_config = {}
    schedule_tags = {}
    execution_plan_snapshot = None
    errors = []

    if isinstance(schedule_execution_data, ExternalScheduleExecutionErrorData):
        error = schedule_execution_data.error
        tick.update_with_status(ScheduleTickStatus.FAILURE, error=error)
        stream.send(ScheduledExecutionFailed(run_id=None, errors=[error]))
        return
    elif not schedule_execution_data.should_execute:
        # Update tick to skipped state and return
        tick.update_with_status(ScheduleTickStatus.SKIPPED)
        stream.send(ScheduledExecutionSkipped())
        return
    else:
        run_config = schedule_execution_data.run_config
        schedule_tags = schedule_execution_data.tags
        try:
            external_execution_plan = repo_location.get_external_execution_plan(
                external_pipeline,
                run_config,
                external_schedule.mode,
                step_keys_to_execute=None,
            )
            execution_plan_snapshot = external_execution_plan.execution_plan_snapshot
        except DagsterSubprocessError as e:
            errors.extend(e.subprocess_error_infos)
        except Exception as e:  # pylint: disable=broad-except
            errors.append(serializable_error_info_from_exc_info(
                sys.exc_info()))

    pipeline_tags = external_pipeline.tags or {}
    check_tags(pipeline_tags, "pipeline_tags")
    tags = merge_dicts(pipeline_tags, schedule_tags)

    # Enter the run in the DB with the information we have
    possibly_invalid_pipeline_run = instance.create_run(
        pipeline_name=external_schedule.pipeline_name,
        run_id=None,
        run_config=run_config,
        mode=external_schedule.mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=None,
        solid_selection=external_pipeline.solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
    )

    tick.update_with_status(ScheduleTickStatus.SUCCESS,
                            run_id=possibly_invalid_pipeline_run.run_id)

    # If there were errors, inject them into the event log and fail the run
    if len(errors) > 0:
        for error in errors:
            instance.report_engine_event(
                error.message,
                possibly_invalid_pipeline_run,
                EngineEventData.engine_error(error),
            )
        instance.report_run_failed(possibly_invalid_pipeline_run)
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=errors))
        return

    try:
        launched_run = instance.launch_run(
            possibly_invalid_pipeline_run.run_id, external_pipeline)
    except Exception:  # pylint: disable=broad-except
        stream.send(
            ScheduledExecutionFailed(
                run_id=possibly_invalid_pipeline_run.run_id, errors=[error]))
        return

    stream.send(ScheduledExecutionSuccess(run_id=launched_run.run_id))
    return
Esempio n. 11
0
def _create_external_pipeline_run(
    instance: DagsterInstance,
    repo_location: RepositoryLocation,
    external_repo: ExternalRepository,
    external_pipeline: ExternalPipeline,
    run_config: Dict[str, object],
    mode: Optional[str],
    preset: Optional[str],
    tags: Optional[Mapping[str, object]],
    solid_selection: Optional[List[str]],
    run_id: Optional[str],
):
    check.inst_param(instance, "instance", DagsterInstance)
    check.inst_param(repo_location, "repo_location", RepositoryLocation)
    check.inst_param(external_repo, "external_repo", ExternalRepository)
    check.inst_param(external_pipeline, "external_pipeline", ExternalPipeline)
    check.opt_dict_param(run_config, "run_config", key_type=str)

    check.opt_str_param(mode, "mode")
    check.opt_str_param(preset, "preset")
    check.opt_dict_param(tags, "tags", key_type=str)
    check.opt_list_param(solid_selection, "solid_selection", of_type=str)
    check.opt_str_param(run_id, "run_id")

    run_config, mode, tags, solid_selection = _check_execute_external_pipeline_args(
        external_pipeline,
        run_config,
        mode,
        preset,
        tags,
        solid_selection,
    )

    pipeline_name = external_pipeline.name
    pipeline_selector = PipelineSelector(
        location_name=repo_location.name,
        repository_name=external_repo.name,
        pipeline_name=pipeline_name,
        solid_selection=solid_selection,
    )

    external_pipeline = repo_location.get_external_pipeline(pipeline_selector)

    pipeline_mode = mode or external_pipeline.get_default_mode_name()

    external_execution_plan = repo_location.get_external_execution_plan(
        external_pipeline,
        run_config,
        pipeline_mode,
        step_keys_to_execute=None,
        known_state=None,
        instance=instance,
    )
    execution_plan_snapshot = external_execution_plan.execution_plan_snapshot

    return instance.create_run(
        pipeline_name=pipeline_name,
        run_id=run_id,
        run_config=run_config,
        mode=pipeline_mode,
        solids_to_execute=external_pipeline.solids_to_execute,
        step_keys_to_execute=execution_plan_snapshot.step_keys_to_execute,
        solid_selection=solid_selection,
        status=None,
        root_run_id=None,
        parent_run_id=None,
        tags=tags,
        pipeline_snapshot=external_pipeline.pipeline_snapshot,
        execution_plan_snapshot=execution_plan_snapshot,
        parent_pipeline_snapshot=external_pipeline.parent_pipeline_snapshot,
        external_pipeline_origin=external_pipeline.get_external_origin(),
        pipeline_code_origin=external_pipeline.get_python_origin(),
    )
Esempio n. 12
0
def create_and_launch_partition_backfill(graphene_info, backfill_params):
    partition_set_selector = backfill_params.get("selector")
    partition_set_name = partition_set_selector.get("partitionSetName")
    repository_selector = RepositorySelector.from_graphql_input(
        partition_set_selector.get("repositorySelector"))
    location = graphene_info.context.get_repository_location(
        repository_selector.location_name)
    repository = location.get_repository(repository_selector.repository_name)
    matches = [
        partition_set
        for partition_set in repository.get_external_partition_sets()
        if partition_set.name == partition_set_selector.get("partitionSetName")
    ]
    if not matches:
        return graphene_info.schema.type_named("PartitionSetNotFoundError")(
            partition_set_name)

    check.invariant(
        len(matches) == 1,
        "Partition set names must be unique: found {num} matches for {partition_set_name}"
        .format(num=len(matches), partition_set_name=partition_set_name),
    )

    external_partition_set = next(iter(matches))
    external_pipeline = repository.get_full_external_pipeline(
        external_partition_set.pipeline_name)
    pipeline_selector = PipelineSelector(
        location_name=location.name,
        repository_name=repository.name,
        pipeline_name=external_pipeline.name,
        solid_selection=external_partition_set.solid_selection,
    )

    partition_names = backfill_params.get("partitionNames")

    backfill_id = make_new_backfill_id()
    result = graphene_info.context.get_external_partition_set_execution_param_data(
        repository.handle, partition_set_name, partition_names)

    if isinstance(result, ExternalPartitionExecutionErrorData):
        return graphene_info.schema.type_named("PythonError")(result.error)

    assert isinstance(result, ExternalPartitionSetExecutionParamData)

    launched_run_ids = []
    execution_param_list = _build_execution_param_list_for_backfill(
        graphene_info.context.instance,
        result.partition_data,
        backfill_id,
        backfill_params,
        pipeline_selector,
        external_partition_set,
    )

    for execution_params in execution_param_list:
        pipeline_run = create_valid_pipeline_run(graphene_info,
                                                 external_pipeline,
                                                 execution_params)
        graphene_info.context.instance.launch_run(pipeline_run.run_id,
                                                  external_pipeline)
        launched_run_ids.append(pipeline_run.run_id)

    return graphene_info.schema.type_named("PartitionBackfillSuccess")(
        backfill_id=backfill_id, launched_run_ids=launched_run_ids)