コード例 #1
0
def test_edge_equality():
    t1 = Task()
    t2 = Task()

    assert Edge(t1, t2) == Edge(t1, t2)
    assert Edge(t1, t2, "key") == Edge(t1, t2, "key")
    assert Edge(t1, t2, "key", True) == Edge(t1, t2, "key", True)

    assert Edge(t1, t2) != Edge(t1, t1)
    assert Edge(t1, t2, "key") != Edge(t1, t2, "other_key")
    assert Edge(t1, t2, "key", True) != Edge(t1, t2, "key", False)
コード例 #2
0
ファイル: flow_runner.py プロジェクト: samlex20/prefect
    def get_flow_run_state(
        self,
        state: State,
        task_states: Dict[Task, State],
        task_contexts: Dict[Task, Dict[str, Any]],
        return_tasks: Set[Task],
        task_runner_state_handlers: Iterable[Callable],
        executor: "prefect.engine.executors.base.Executor",
    ) -> State:
        """
        Runs the flow.

        Args:
            - state (State): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - task_contexts (Dict[Task, Dict[str, Any]]): contexts that will be provided to each task
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - task_runner_state_handlers (Iterable[Callable]): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task changes
                state.
            - executor (Executor): executor to use when performing
                computation; defaults to the executor provided in your prefect configuration

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """

        if not state.is_running():
            self.logger.info("Flow is not in a Running state.")
            raise ENDRUN(state)

        if return_tasks is None:
            return_tasks = set()
        if set(return_tasks).difference(self.flow.tasks):
            raise ValueError(
                "Some tasks in return_tasks were not found in the flow.")

        # -- process each task in order

        with executor.start():

            for task in self.flow.sorted_tasks():

                task_state = task_states.get(task)
                if task_state is None and isinstance(
                        task, prefect.tasks.core.constants.Constant):
                    task_states[task] = task_state = Success(result=task.value)

                # if the state is finished, don't run the task, just use the provided state
                if (isinstance(task_state, State) and task_state.is_finished()
                        and not task_state.is_cached()
                        and not task_state.is_mapped()):
                    continue

                upstream_states = {
                }  # type: Dict[Edge, Union[State, Iterable]]

                # -- process each edge to the task
                for edge in self.flow.edges_to(task):
                    upstream_states[edge] = task_states.get(
                        edge.upstream_task,
                        Pending(message="Task state not available."))

                # augment edges with upstream constants
                for key, val in self.flow.constants[task].items():
                    edge = Edge(
                        upstream_task=prefect.tasks.core.constants.Constant(
                            val),
                        downstream_task=task,
                        key=key,
                    )
                    upstream_states[edge] = Success(
                        "Auto-generated constant value",
                        result=Result(
                            val, result_handler=ConstantResultHandler(val)),
                    )

                # -- run the task

                with prefect.context(task_full_name=task.name,
                                     task_tags=task.tags):
                    task_states[task] = executor.submit(
                        self.run_task,
                        task=task,
                        state=task_state,
                        upstream_states=upstream_states,
                        context=dict(prefect.context,
                                     **task_contexts.get(task, {})),
                        task_runner_state_handlers=task_runner_state_handlers,
                        executor=executor,
                    )

            # ---------------------------------------------
            # Collect results
            # ---------------------------------------------

            # terminal tasks determine if the flow is finished
            terminal_tasks = self.flow.terminal_tasks()

            # reference tasks determine flow state
            reference_tasks = self.flow.reference_tasks()

            # wait until all terminal tasks are finished
            final_tasks = terminal_tasks.union(reference_tasks).union(
                return_tasks)
            final_states = executor.wait({
                t:
                task_states.get(t,
                                Pending("Task not evaluated by FlowRunner."))
                for t in final_tasks
            })

            # also wait for any children of Mapped tasks to finish, and add them
            # to the dictionary to determine flow state
            all_final_states = final_states.copy()
            for t, s in list(final_states.items()):
                if s.is_mapped():
                    s.map_states = executor.wait(s.map_states)
                    s.result = [ms.result for ms in s.map_states]
                    all_final_states[t] = s.map_states

            assert isinstance(final_states, dict)

        key_states = set(
            flatten_seq([all_final_states[t] for t in reference_tasks]))
        terminal_states = set(
            flatten_seq([all_final_states[t] for t in terminal_tasks]))
        return_states = {t: final_states[t] for t in return_tasks}

        state = self.determine_final_state(
            state=state,
            key_states=key_states,
            return_states=return_states,
            terminal_states=terminal_states,
        )

        return state
コード例 #3
0
def test_edge_has_tasks_property():
    t1 = Task()
    t2 = TaskWithKey()
    t3 = Task()
    edge = Edge(t1, t2, key="a_key")
    assert edge.tasks == {t1, t2}
コード例 #4
0
def test_serialize_edge():
    assert isinstance(EdgeSchema().dump(Edge(Task(), Task())), dict)
コード例 #5
0
def test_object_inequality():
    assert Edge(Task(), Task()) != 1
コード例 #6
0
ファイル: test_task.py プロジェクト: sebastianbertoli/prefect
 def test_set_upstream_context(self):
     with Flow(name="test") as f:
         t1 = Task()
         t2 = Task()
         t2.set_upstream(t1)
         assert Edge(t1, t2) in f.edges
コード例 #7
0
 def generate(init, setup, task):
     return {
         Edge(Task(), Task(), key="mgr"): init,
         Edge(Task(), Task(), key="resource"): setup,
         Edge(Task(), Task()): task,
     }
コード例 #8
0
 def test_unmapped_annotation_takes_precedence(self):
     e = Edge(edges.unmapped(Task()), Task(), mapped=True)
     assert e.mapped is False
コード例 #9
0
 def test_flat(self):
     e = Edge(edges.flatten(Task()), Task())
     assert e.flattened is True
コード例 #10
0
 def test_mapped_kwarg(self):
     e = Edge(Task(), Task(), mapped=True)
     assert e.mapped is True
コード例 #11
0
 def test_unmapped(self):
     e = Edge(edges.unmapped(Task()), Task())
     assert e.mapped is False
コード例 #12
0
 def test_mapped_annotation_takes_precedance_over_kwarg(self):
     e = Edge(edges.mapped(Task()), Task(), mapped=False)
     assert e.mapped is True
コード例 #13
0
 def test_mapped(self):
     e = Edge(edges.mapped(Task()), Task())
     assert e.mapped is True
コード例 #14
0
 def test_none(self):
     e = Edge(Task(), Task())
     assert e.mapped is False
     assert e.flattened is False
コード例 #15
0
 def test_or(self):
     with Flow(name="test") as f:
         t1 = Task()
         t2 = Task()
         t1 | t2
     assert Edge(t1, t2) in f.edges
コード例 #16
0
 def test_flat_kwarg(self):
     e = Edge(Task(), Task(), flattened=True)
     assert e.flattened is True
コード例 #17
0
ファイル: flow_runner.py プロジェクト: manesioz/prefect
    def get_flow_run_state(
        self,
        state: State,
        task_states: Dict[Task, State],
        task_contexts: Dict[Task, Dict[str, Any]],
        return_tasks: Set[Task],
        task_runner_state_handlers: Iterable[Callable],
        executor: "prefect.engine.executors.base.Executor",
    ) -> State:
        """
        Runs the flow.

        Args:
            - state (State): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - task_contexts (Dict[Task, Dict[str, Any]]): contexts that will be provided to each task
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - task_runner_state_handlers (Iterable[Callable]): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task changes
                state.
            - executor (Executor): executor to use when performing
                computation; defaults to the executor provided in your prefect configuration

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """
        # this dictionary is used for tracking the states of "children" mapped tasks;
        # when running on Dask, we want to avoid serializing futures, so instead
        # of storing child task states in the `map_states` attribute we instead store
        # in this dictionary and only after they are resolved do we attach them to the Mapped state
        mapped_children = dict()  # type: Dict[Task, list]

        if not state.is_running():
            self.logger.info("Flow is not in a Running state.")
            raise ENDRUN(state)

        if return_tasks is None:
            return_tasks = set()
        if set(return_tasks).difference(self.flow.tasks):
            raise ValueError(
                "Some tasks in return_tasks were not found in the flow.")

        def extra_context(task: Task, task_index: int = None) -> dict:
            return {
                "task_name": task.name,
                "task_tags": task.tags,
                "task_index": task_index,
            }

        # -- process each task in order

        with executor.start():

            for task in self.flow.sorted_tasks():
                task_state = task_states.get(task)

                # if a task is a constant task, we already know its return value
                # no need to use up resources by running it through a task runner
                if task_state is None and isinstance(
                        task, prefect.tasks.core.constants.Constant):
                    task_states[task] = task_state = Success(result=task.value)

                # if the state is finished, don't run the task, just use the provided state
                # if the state is cached / mapped, we still want to run the task runner pipeline steps
                # to either ensure the cache is still valid / or to recreate the mapped pipeline for
                # possible retries
                if (isinstance(task_state, State) and task_state.is_finished()
                        and not task_state.is_cached()
                        and not task_state.is_mapped()):
                    continue

                upstream_states = {}  # type: Dict[Edge, State]

                # this dictionary is used exclusively for "reduce" tasks
                # in particular we store the states / futures corresponding to
                # the upstream children, and if running on Dask, let Dask resolve them at the appropriate time
                upstream_mapped_states = {}  # type: Dict[Edge, list]

                # -- process each edge to the task
                for edge in self.flow.edges_to(task):
                    upstream_states[edge] = task_states.get(
                        edge.upstream_task,
                        Pending(message="Task state not available."))

                    # this checks whether the task is a "reduce" task for a mapped pipeline
                    # and if so, collects the appropriate upstream children
                    if not edge.mapped and isinstance(upstream_states[edge],
                                                      Mapped):
                        upstream_mapped_states[edge] = mapped_children.get(
                            edge.upstream_task, [])

                # augment edges with upstream constants
                for key, val in self.flow.constants[task].items():
                    edge = Edge(
                        upstream_task=prefect.tasks.core.constants.Constant(
                            val),
                        downstream_task=task,
                        key=key,
                    )
                    upstream_states[edge] = Success(
                        "Auto-generated constant value",
                        result=ConstantResult(value=val),
                    )

                # handle mapped tasks
                if any([edge.mapped for edge in upstream_states.keys()]):

                    ## wait on upstream states to determine the width of the pipeline
                    ## this is the key to depth-first execution
                    upstream_states = executor.wait(
                        {e: state
                         for e, state in upstream_states.items()})
                    ## we submit the task to the task runner to determine if
                    ## we can proceed with mapping - if the new task state is not a Mapped
                    ## state then we don't proceed
                    task_states[task] = executor.wait(
                        executor.submit(
                            run_task,
                            task=task,
                            state=task_state,  # original state
                            upstream_states=upstream_states,
                            context=dict(prefect.context,
                                         **task_contexts.get(task, {})),
                            flow_result=self.flow.result,
                            task_runner_cls=self.task_runner_cls,
                            task_runner_state_handlers=
                            task_runner_state_handlers,
                            upstream_mapped_states=upstream_mapped_states,
                            is_mapped_parent=True,
                            extra_context=extra_context(task),
                        ))

                    ## either way, we should now have enough resolved states to restructure
                    ## the upstream states into a list of upstream state dictionaries to iterate over
                    list_of_upstream_states = prepare_upstream_states_for_mapping(
                        task_states[task], upstream_states, mapped_children)

                    submitted_states = []

                    for idx, states in enumerate(list_of_upstream_states):
                        ## if we are on a future rerun of a partially complete flow run,
                        ## there might be mapped children in a retrying state; this check
                        ## looks into the current task state's map_states for such info
                        if (isinstance(task_state, Mapped)
                                and len(task_state.map_states) >= idx + 1):
                            current_state = task_state.map_states[
                                idx]  # type: Optional[State]
                        elif isinstance(task_state, Mapped):
                            current_state = None
                        else:
                            current_state = task_state

                        ## this is where each child is submitted for actual work
                        submitted_states.append(
                            executor.submit(
                                run_task,
                                task=task,
                                state=current_state,
                                upstream_states=states,
                                context=dict(
                                    prefect.context,
                                    **task_contexts.get(task, {}),
                                    map_index=idx,
                                ),
                                flow_result=self.flow.result,
                                task_runner_cls=self.task_runner_cls,
                                task_runner_state_handlers=
                                task_runner_state_handlers,
                                upstream_mapped_states=upstream_mapped_states,
                                extra_context=extra_context(task,
                                                            task_index=idx),
                            ))
                    if isinstance(task_states.get(task), Mapped):
                        mapped_children[
                            task] = submitted_states  # type: ignore

                else:
                    task_states[task] = executor.submit(
                        run_task,
                        task=task,
                        state=task_state,
                        upstream_states=upstream_states,
                        context=dict(prefect.context,
                                     **task_contexts.get(task, {})),
                        flow_result=self.flow.result,
                        task_runner_cls=self.task_runner_cls,
                        task_runner_state_handlers=task_runner_state_handlers,
                        upstream_mapped_states=upstream_mapped_states,
                        extra_context=extra_context(task),
                    )

            # ---------------------------------------------
            # Collect results
            # ---------------------------------------------

            # terminal tasks determine if the flow is finished
            terminal_tasks = self.flow.terminal_tasks()

            # reference tasks determine flow state
            reference_tasks = self.flow.reference_tasks()

            # wait until all terminal tasks are finished
            final_tasks = terminal_tasks.union(reference_tasks).union(
                return_tasks)
            final_states = executor.wait({
                t:
                task_states.get(t,
                                Pending("Task not evaluated by FlowRunner."))
                for t in final_tasks
            })

            # also wait for any children of Mapped tasks to finish, and add them
            # to the dictionary to determine flow state
            all_final_states = final_states.copy()
            for t, s in list(final_states.items()):
                if s.is_mapped():
                    # ensure we wait for any mapped children to complete
                    if t in mapped_children:
                        s.map_states = executor.wait(mapped_children[t])
                    s.result = [ms.result for ms in s.map_states]
                    all_final_states[t] = s.map_states

            assert isinstance(final_states, dict)

        key_states = set(
            flatten_seq([all_final_states[t] for t in reference_tasks]))
        terminal_states = set(
            flatten_seq([all_final_states[t] for t in terminal_tasks]))
        return_states = {t: final_states[t] for t in return_tasks}

        state = self.determine_final_state(
            state=state,
            key_states=key_states,
            return_states=return_states,
            terminal_states=terminal_states,
        )

        return state
コード例 #18
0
 def test_unmapped_annotation_takes_precedence(self):
     e = Edge(edges.flatten(Task()), Task(), flattened=False)
     assert e.flattened is True
コード例 #19
0
ファイル: test_task.py プロジェクト: sebastianbertoli/prefect
 def test_set_upstream_with_properties(self, props):
     with Flow(name="test") as f:
         t1 = Task()
         t2 = Task()
         t2.set_upstream(t1, **props)
         assert Edge(t1, t2, **props) in f.edges
コード例 #20
0
 def test_nested_annotation(self):
     e = Edge(edges.flatten(edges.mapped(Task())), Task())
     assert e.flattened is True
     assert e.mapped is True
コード例 #21
0
ファイル: test_task.py プロジェクト: znicholasbrown/prefect
 def test_set_downstream(self):
     f = Flow(name="test")
     t1 = Task()
     t2 = Task()
     t1.set_downstream(t2, flow=f)
     assert Edge(t1, t2) in f.edges
コード例 #22
0
                        children = mapped_children.get(edge.upstream_task, [])

                        # if the edge is flattened, then we need to wait for the mapped children
                        # to complete and then flatten them
                        if edge.flattened:
                            children = executors.flatten_mapped_children(
                                mapped_children=children, executor=executor
                            )

                        upstream_mapped_states[edge] = children

                # augment edges with upstream constants
                for key, val in self.flow.constants[task].items():
                    edge = Edge(
                        upstream_task=prefect.tasks.core.constants.Constant(val),
                        downstream_task=task,
                        key=key,
                    )
                    upstream_states[edge] = Success(
                        "Auto-generated constant value",
                        result=ConstantResult(value=val),
                    )

                # handle mapped tasks
<<<<<<< HEAD
                if any(edge.mapped for edge in upstream_states.keys()):
=======
                if any([edge.mapped for edge in upstream_states.keys()]):
>>>>>>> prefect clone

                    # wait on upstream states to determine the width of the pipeline