예제 #1
0
파일: __init__.py 프로젝트: xyzlat/dagster
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        solid_selection=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
        check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        # note that solids_to_execute is required to execute the solid subset, which is the
        # frozenset version of the previous solid_subset.
        # solid_selection is not required and will not be converted to solids_to_execute here.
        # i.e. this function doesn't handle solid queries.
        # solid_selection is only used to pass the user queries further down.
        check.opt_set_param(solids_to_execute,
                            'solids_to_execute',
                            of_type=str)
        check.opt_list_param(solid_selection, 'solid_selection', of_type=str)

        if solids_to_execute:
            if isinstance(pipeline_def, PipelineSubsetDefinition):
                # for the case when pipeline_def is created by ExecutablePipeline or ExternalPipeline
                check.invariant(
                    solids_to_execute == pipeline_def.solids_to_execute,
                    'Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} '
                    'that conflicts with solids_to_execute arg {solids_to_execute}'
                    .format(
                        pipeline_solids_to_execute=str_format_list(
                            pipeline_def.solids_to_execute),
                        solids_to_execute=str_format_list(solids_to_execute),
                    ),
                )
            else:
                # for cases when `create_run_for_pipeline` is directly called
                pipeline_def = pipeline_def.get_pipeline_subset_def(
                    solids_to_execute=solids_to_execute)

        if execution_plan is None:
            execution_plan = create_execution_plan(
                pipeline_def,
                run_config=run_config,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
            )

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=check.opt_str_param(
                mode, 'mode', default=pipeline_def.get_default_mode_name()),
            solid_selection=solid_selection,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline_def.get_pipeline_snapshot_id()),
            parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(
            ),
        )
예제 #2
0
def _make_airflow_dag(
    recon_repo,
    pipeline_name,
    run_config=None,
    mode=None,
    instance=None,
    dag_id=None,
    dag_description=None,
    dag_kwargs=None,
    op_kwargs=None,
    operator=DagsterPythonOperator,
):
    check.inst_param(recon_repo, 'recon_repo', ReconstructableRepository)
    check.str_param(pipeline_name, 'pipeline_name')
    run_config = check.opt_dict_param(run_config, 'run_config', key_type=str)
    mode = check.opt_str_param(mode, 'mode')
    # Default to use the (persistent) system temp directory rather than a seven.TemporaryDirectory,
    # which would not be consistent between Airflow task invocations.
    instance = (check.inst_param(instance, 'instance', DagsterInstance)
                if instance else DagsterInstance.get(
                    fallback_storage=seven.get_system_temp_directory()))

    # Only used for Airflow; internally we continue to use pipeline.name
    dag_id = check.opt_str_param(dag_id, 'dag_id',
                                 _rename_for_airflow(pipeline_name))

    dag_description = check.opt_str_param(dag_description, 'dag_description',
                                          _make_dag_description(pipeline_name))
    check.subclass_param(operator, 'operator', BaseOperator)

    dag_kwargs = dict({'default_args': DEFAULT_ARGS},
                      **check.opt_dict_param(dag_kwargs,
                                             'dag_kwargs',
                                             key_type=str))

    op_kwargs = check.opt_dict_param(op_kwargs, 'op_kwargs', key_type=str)

    dag = DAG(dag_id=dag_id, description=dag_description, **dag_kwargs)
    pipeline = recon_repo.get_definition().get_pipeline(pipeline_name)

    if mode is None:
        mode = pipeline.get_default_mode_name()

    execution_plan = create_execution_plan(pipeline, run_config, mode=mode)

    tasks = {}

    coalesced_plan = coalesce_execution_steps(execution_plan)

    for solid_handle, solid_steps in coalesced_plan.items():
        step_keys = [step.key for step in solid_steps]

        operator_parameters = DagsterOperatorParameters(
            recon_repo=recon_repo,
            pipeline_name=pipeline_name,
            run_config=run_config,
            mode=mode,
            task_id=solid_handle,
            step_keys=step_keys,
            dag=dag,
            instance_ref=instance.get_ref(),
            op_kwargs=op_kwargs,
            pipeline_snapshot=pipeline.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan,
                pipeline_snapshot_id=pipeline.get_pipeline_snapshot_id()),
        )
        task = operator(operator_parameters)

        tasks[solid_handle] = task

        for solid_step in solid_steps:
            for step_input in solid_step.step_inputs:
                for key in step_input.dependency_keys:
                    prev_solid_handle = execution_plan.get_step_by_key(
                        key).solid_handle.to_string()
                    if solid_handle != prev_solid_handle:
                        tasks[prev_solid_handle].set_downstream(task)

    return (dag,
            [tasks[solid_handle] for solid_handle in coalesced_plan.keys()])
예제 #3
0

@solid
def nonce_solid(_):
    return


@pipeline
def nonce_pipeline():
    return nonce_solid()


nonce_pipeline_snapshot = nonce_pipeline.get_pipeline_snapshot()

nonce_execution_plan_snapshot = snapshot_from_execution_plan(
    create_execution_plan(nonce_pipeline),
    nonce_pipeline.get_pipeline_snapshot_id())


def test_init_modified_docker_operator(dagster_docker_image):
    with instance_for_test() as instance:
        dagster_operator_parameters = DagsterOperatorParameters(
            task_id="nonce",
            run_config={"intermediate_storage": {
                "filesystem": {}
            }},
            pipeline_name="",
            mode="default",
            op_kwargs={
                "image": dagster_docker_image,
                "api_version": "auto",
예제 #4
0
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        run_config=None,
        mode=None,
        solids_to_execute=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
        solid_selection=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, "pipeline_def", PipelineDefinition)
        check.opt_inst_param(execution_plan, "execution_plan", ExecutionPlan)

        # note that solids_to_execute is required to execute the solid subset, which is the
        # frozenset version of the previous solid_subset.
        # solid_selection is not required and will not be converted to solids_to_execute here.
        # i.e. this function doesn't handle solid queries.
        # solid_selection is only used to pass the user queries further down.
        check.opt_set_param(solids_to_execute,
                            "solids_to_execute",
                            of_type=str)
        check.opt_list_param(solid_selection, "solid_selection", of_type=str)

        if solids_to_execute:
            if isinstance(pipeline_def, PipelineSubsetDefinition):
                # for the case when pipeline_def is created by IPipeline or ExternalPipeline
                check.invariant(
                    solids_to_execute == pipeline_def.solids_to_execute,
                    "Cannot create a PipelineRun from pipeline subset {pipeline_solids_to_execute} "
                    "that conflicts with solids_to_execute arg {solids_to_execute}"
                    .format(
                        pipeline_solids_to_execute=str_format_list(
                            pipeline_def.solids_to_execute),
                        solids_to_execute=str_format_list(solids_to_execute),
                    ),
                )
            else:
                # for cases when `create_run_for_pipeline` is directly called
                pipeline_def = pipeline_def.get_pipeline_subset_def(
                    solids_to_execute=solids_to_execute)

        full_execution_plan = execution_plan or create_execution_plan(
            pipeline_def,
            run_config=run_config,
            mode=mode,
        )
        check.invariant(
            len(full_execution_plan.step_keys_to_execute) == len(
                full_execution_plan.steps))

        if _is_memoized_run(tags):
            if step_keys_to_execute:
                raise DagsterInvariantViolationError(
                    "step_keys_to_execute parameter cannot be used in conjunction with memoized "
                    "pipeline runs.")

            step_keys_to_execute = self.resolve_unmemoized_steps(
                full_execution_plan,
                run_config=run_config,
                mode=mode,
            )  # TODO: tighter integration with existing step_keys_to_execute functionality

        subsetted_execution_plan = (
            full_execution_plan.build_subset_plan(step_keys_to_execute)
            if step_keys_to_execute else full_execution_plan)

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            run_config=run_config,
            mode=check.opt_str_param(
                mode, "mode", default=pipeline_def.get_default_mode_name()),
            solid_selection=solid_selection,
            solids_to_execute=solids_to_execute,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                subsetted_execution_plan,
                pipeline_def.get_pipeline_snapshot_id()),
            parent_pipeline_snapshot=pipeline_def.get_parent_pipeline_snapshot(
            ),
        )
예제 #5
0
    def create_run_for_pipeline(
        self,
        pipeline_def,
        execution_plan=None,
        run_id=None,
        environment_dict=None,
        mode=None,
        solid_subset=None,
        step_keys_to_execute=None,
        status=None,
        tags=None,
        root_run_id=None,
        parent_run_id=None,
    ):
        from dagster.core.execution.api import create_execution_plan
        from dagster.core.execution.plan.plan import ExecutionPlan
        from dagster.core.snap import snapshot_from_execution_plan

        check.inst_param(pipeline_def, 'pipeline_def', PipelineDefinition)
        check.opt_inst_param(execution_plan, 'execution_plan', ExecutionPlan)

        if solid_subset:
            if isinstance(pipeline_def, PipelineSubsetForExecution):
                check.invariant(
                    len(solid_subset) == len(pipeline_def.solid_subset)
                    and set(solid_subset) == set(pipeline_def.solid_subset),
                    'Cannot create a PipelineRun from pipeline subset {pipeline_solid_subset} that '
                    'conflicts with solid_subset arg {solid_subset}'.format(
                        pipeline_solid_subset=str_format_list(
                            pipeline_def.solid_subset),
                        solid_subset=str_format_list(solid_subset),
                    ),
                )
            else:
                pipeline_def = pipeline_def.subset_for_execution(
                    solid_subset=solid_subset)

        if execution_plan is None:
            execution_plan = create_execution_plan(
                pipeline_def,
                environment_dict=environment_dict,
                mode=mode,
                step_keys_to_execute=step_keys_to_execute,
            )

        return self.create_run(
            pipeline_name=pipeline_def.name,
            run_id=run_id,
            environment_dict=environment_dict,
            mode=check.opt_str_param(
                mode, 'mode', default=pipeline_def.get_default_mode_name()),
            solid_subset=solid_subset,
            step_keys_to_execute=step_keys_to_execute,
            status=status,
            tags=tags,
            root_run_id=root_run_id,
            parent_run_id=parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline_def.get_pipeline_snapshot_id()),
        )
예제 #6
0
def _start_pipeline_execution(graphene_info,
                              execution_params,
                              is_reexecuted=False):
    check.inst_param(graphene_info, 'graphene_info', ResolveInfo)
    check.inst_param(execution_params, 'execution_params', ExecutionParams)

    if is_reexecuted:
        # required fields for re-execution
        execution_metadata = check.inst_param(
            execution_params.execution_metadata, 'execution_metadata',
            ExecutionMetadata)
        check.str_param(execution_metadata.root_run_id, 'root_run_id')
        check.str_param(execution_metadata.parent_run_id, 'parent_run_id')

    instance = graphene_info.context.instance
    execution_manager_settings = instance.dagit_settings.get(
        'execution_manager')
    if execution_manager_settings and execution_manager_settings.get(
            'disabled'):
        return graphene_info.schema.type_named(
            'StartPipelineRunDisabledError')()

    pipeline_def = get_pipeline_def_from_selector(graphene_info,
                                                  execution_params.selector)

    get_validated_config(
        pipeline_def,
        environment_dict=execution_params.environment_dict,
        mode=execution_params.mode,
    )

    execution_plan = create_execution_plan(
        pipeline_def,
        execution_params.environment_dict,
        mode=execution_params.mode,
    )

    _check_start_pipeline_execution_errors(graphene_info, execution_params,
                                           execution_plan)

    try:
        pipeline_run = instance.create_run(
            pipeline_name=pipeline_def.name,
            run_id=execution_params.execution_metadata.run_id if
            execution_params.execution_metadata.run_id else make_new_run_id(),
            solid_subset=execution_params.selector.solid_subset
            if execution_params.selector else None,
            environment_dict=execution_params.environment_dict,
            mode=execution_params.mode,
            step_keys_to_execute=(get_step_keys_to_execute(
                instance, pipeline_def, execution_params)
                                  or execution_params.step_keys),
            tags=merge_dicts(pipeline_def.tags,
                             execution_params.execution_metadata.tags),
            status=PipelineRunStatus.NOT_STARTED,
            root_run_id=execution_params.execution_metadata.root_run_id,
            parent_run_id=execution_params.execution_metadata.parent_run_id,
            pipeline_snapshot=pipeline_def.get_pipeline_snapshot(),
            execution_plan_snapshot=snapshot_from_execution_plan(
                execution_plan, pipeline_def.get_pipeline_snapshot_id()),
        )
    except DagsterRunConflict as exc:
        return graphene_info.schema.type_named('PipelineRunConflict')(exc)

    graphene_info.context.execution_manager.execute_pipeline(
        graphene_info.context.get_handle(),
        pipeline_def,
        pipeline_run,
        instance=instance,
    )

    return graphene_info.schema.type_named('StartPipelineRunSuccess')(
        run=graphene_info.schema.type_named('PipelineRun')(pipeline_run))