Example #1
0
def test_available_dates_sensor_retries(monkeypatch, postgres_test_db):
    """
    Test that the available_dates_sensor flow re-runs workflows that failed on
    the previous attempt, and does not re-run them again once they have succeeded.
    """
    # Mock flowclient
    flowclient_available_dates = {
        "dummy_cdr_type": ["2016-01-01", "2016-01-02", "2016-01-03"]
    }
    monkeypatch.setattr("flowclient.get_available_dates",
                        lambda connection: flowclient_available_dates)
    monkeypatch.setattr("flowclient.connect", Mock())
    monkeypatch.setenv("FLOWAPI_TOKEN", "DUMMY_TOKEN")

    # Mock workflows
    dummy_workflow = Mock()
    dummy_workflow.name = "DUMMY_WORKFLOW"
    dummy_workflow.run.side_effect = [Failed(), Success(), Success()]
    workflow_storage = Memory()
    workflow_storage.add_flow(dummy_workflow)

    workflow_configs = [WorkflowConfig(workflow_name="DUMMY_WORKFLOW")]

    # Run available dates sensor
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage)

    # Check that sensor flow ended in a 'failed' state, and dummy_workflow.run() was called 3 times
    assert flow_state.is_failed
    dummy_workflow.run.assert_has_calls([
        call(
            parameters=dict(reference_date=d,
                            date_ranges=[(d, d.add(days=1))]),
            run_on_schedule=False,
        ) for d in pendulum.period(pendulum.date(2016, 1, 1),
                                   pendulum.date(2016, 1, 3))
    ])

    # Reset workflow mock
    dummy_workflow.reset_mock()
    dummy_workflow.run.side_effect = None
    dummy_workflow.run.return_value = Success()

    # Run available dates sensor again
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage)

    # Check that sensor flow was successful, and dummy_workflow only re-ran for the date for which it previously failed
    assert flow_state.is_successful
    dummy_workflow.run.assert_called_once_with(
        parameters=dict(
            reference_date=pendulum.date(2016, 1, 1),
            date_ranges=[(pendulum.date(2016, 1, 1), pendulum.date(2016, 1,
                                                                   2))],
        ),
        run_on_schedule=False,
    )

    # Reset workflow mock again
    dummy_workflow.reset_mock()

    # Run available dates sensor once more
    with set_temporary_config({
            "flowapi_url": "DUMMY_URL",
            "db_uri": postgres_test_db.url()
    }):
        flow_state = available_dates_sensor.run(
            workflow_configs=workflow_configs,
            workflow_storage=workflow_storage)

    # Check that dummy_workflow did not run again, now that it has run successfully
    assert flow_state.is_successful
    dummy_workflow.run.assert_not_called()
Example #2
0
class TestTaskRunStates:
    async def test_set_task_run_state(self, task_run_id):
        result = await api.states.set_task_run_state(task_run_id=task_run_id,
                                                     state=Failed())

        assert result.task_run_id == task_run_id

        query = await models.TaskRun.where(id=task_run_id).first(
            {"version", "state", "serialized_state"})

        assert query.version == 2
        assert query.state == "Failed"
        assert query.serialized_state["type"] == "Failed"

    @pytest.mark.parametrize("state", [Failed(), Success()])
    async def test_set_task_run_state_fails_with_wrong_task_run_id(
            self, state):
        with pytest.raises(ValueError, match="State update failed"):
            await api.states.set_task_run_state(task_run_id=str(uuid.uuid4()),
                                                state=state)

    @pytest.mark.parametrize(
        "state", [s() for s in State.children() if not s().is_running()])
    async def test_state_does_not_set_heartbeat_unless_running(
            self, state, task_run_id):
        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=state)

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

    async def test_running_state_sets_heartbeat(self, task_run_id,
                                                running_flow_run_id):
        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

        dt = pendulum.now("UTC")
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=Running())

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat > dt

    async def test_trigger_failed_state_does_not_set_end_time(
            self, task_run_id):
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=TriggerFailed())
        task_run_info = await models.TaskRun.where(id=task_run_id).first(
            {"id", "start_time", "end_time"})
        assert not task_run_info.start_time
        assert not task_run_info.end_time

    @pytest.mark.parametrize(
        "state",
        [s() for s in State.children() if s not in _MetaState.children()],
        ids=[
            s.__name__ for s in State.children()
            if s not in _MetaState.children()
        ],
    )
    async def test_setting_a_task_run_state_pulls_cached_inputs_if_possible(
            self, task_run_id, state, running_flow_run_id):

        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"z": 2}, result_handler=JSONResultHandler())
        complex_result = {"x": res1, "y": res2}
        cached_state = Failed(cached_inputs=complex_result)
        await models.TaskRun.where(id=task_run_id).update(set=dict(
            serialized_state=cached_state.serialize()))

        # try to schedule the task run to scheduled
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=state)

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"serialized_state"})

        # ensure the state change took place
        assert task_run.serialized_state["type"] == type(state).__name__
        assert task_run.serialized_state["cached_inputs"]["x"]["value"] == 1
        assert task_run.serialized_state["cached_inputs"]["y"]["value"] == {
            "z": 2
        }

    @pytest.mark.parametrize(
        "state",
        [
            s(cached_inputs=None)
            for s in State.children() if s not in _MetaState.children()
        ],
        ids=[
            s.__name__ for s in State.children()
            if s not in _MetaState.children()
        ],
    )
    async def test_task_runs_with_null_cached_inputs_do_not_overwrite_cache(
            self, state, task_run_id, running_flow_run_id):

        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=state)
        # set up a Retrying state with non-null cached inputs
        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"z": 2}, result_handler=JSONResultHandler())
        complex_result = {"x": res1, "y": res2}
        cached_state = Retrying(cached_inputs=complex_result)
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=cached_state)
        run = await models.TaskRun.where(id=task_run_id
                                         ).first({"serialized_state"})

        assert run.serialized_state["cached_inputs"]["x"]["value"] == 1
        assert run.serialized_state["cached_inputs"]["y"]["value"] == {"z": 2}

    @pytest.mark.parametrize(
        "state_cls",
        [s for s in State.children() if s not in _MetaState.children()])
    async def test_task_runs_cached_inputs_give_preference_to_new_cached_inputs(
            self, state_cls, task_run_id, running_flow_run_id):

        # set up a Failed state with null cached inputs
        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"a": 2}, result_handler=JSONResultHandler())
        complex_result = {"b": res1, "c": res2}
        cached_state = state_cls(cached_inputs=complex_result)
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=cached_state)
        # set up a Retrying state with non-null cached inputs
        res1 = SafeResult(1, result_handler=JSONResultHandler())
        res2 = SafeResult({"z": 2}, result_handler=JSONResultHandler())
        complex_result = {"x": res1, "y": res2}
        cached_state = Retrying(cached_inputs=complex_result)
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=cached_state)
        run = Box(await models.TaskRun.where(id=task_run_id
                                             ).first({"serialized_state"}))

        # verify that we have cached inputs, and that preference has been given to the new
        # cached inputs
        assert run.serialized_state.cached_inputs
        assert run.serialized_state.cached_inputs.x.value == 1
        assert run.serialized_state.cached_inputs.y.value == {"z": 2}

    @pytest.mark.parametrize(
        "flow_run_state",
        [Pending(), Running(), Failed(),
         Success()])
    async def test_running_states_can_not_be_set_if_flow_run_is_not_running(
            self, flow_run_id, task_run_id, flow_run_state):

        await api.states.set_flow_run_state(flow_run_id=flow_run_id,
                                            state=flow_run_state)

        set_running_coroutine = api.states.set_task_run_state(
            task_run_id=task_run_id, state=Running())

        if flow_run_state.is_running():
            assert await set_running_coroutine
            assert (await
                    models.TaskRun.where(id=task_run_id
                                         ).first({"state"})).state == "Running"
        else:

            with pytest.raises(ValueError, match="is not in a running state"):
                await set_running_coroutine
            assert (await models.TaskRun.where(id=task_run_id).first(
                {"state"})).state != "Running"
Example #3
0
class TestFlowVisualize:
    def test_visualize_raises_informative_importerror_without_graphviz(
            self, monkeypatch):
        f = Flow(name="test")
        f.add_task(Task())

        with monkeypatch.context() as m:
            m.setattr(sys, "path", "")
            with pytest.raises(ImportError) as exc:
                f.visualize()

        assert "pip install prefect[viz]" in repr(exc.value)

    def test_viz_returns_graph_object_if_in_ipython(self):
        import graphviz

        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        with patch.dict("sys.modules", IPython=ipython):
            f = Flow(name="test")
            f.add_task(Task(name="a_nice_task"))
            graph = f.visualize()
        assert "label=a_nice_task" in graph.source
        assert "shape=ellipse" in graph.source

    def test_viz_reflects_mapping(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                res = AddTask(name="a_nice_task").map(
                    x=Task(name="a_list_task"), y=8)
            graph = f.visualize()
        assert 'label="a_nice_task <map>" shape=box' in graph.source
        assert "label=a_list_task shape=ellipse" in graph.source
        assert "label=x style=dashed" in graph.source
        assert "label=y style=dashed" in graph.source

    @pytest.mark.parametrize("state", [Success(), Failed(), Skipped()])
    def test_viz_if_flow_state_provided(self, state):
        import graphviz

        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        with patch.dict("sys.modules", IPython=ipython):
            t = Task(name="a_nice_task")
            f = Flow(name="test")
            f.add_task(t)
            graph = f.visualize(flow_state=Success(result={t: state}))
        assert "label=a_nice_task" in graph.source
        assert 'color="' + state.color + '80"' in graph.source
        assert "shape=ellipse" in graph.source

    def test_viz_reflects_mapping_if_flow_state_provided(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        add = AddTask(name="a_nice_task")
        list_task = Task(name="a_list_task")

        map_state = Mapped(map_states=[Success(), Failed()])
        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                res = add.map(x=list_task, y=8)
            graph = f.visualize(flow_state=Success(result={
                res: map_state,
                list_task: Success()
            }))

        # one colored node for each mapped result
        assert 'label="a_nice_task <map>" color="#00800080"' in graph.source
        assert 'label="a_nice_task <map>" color="#FF000080"' in graph.source
        assert 'label=a_list_task color="#00800080"' in graph.source
        assert 'label=8 color="#00000080"' in graph.source

        # two edges for each input to add()
        for var in ["x", "y"]:
            for index in [0, 1]:
                assert "{0} [label={1} style=dashed]".format(
                    index, var) in graph.source

    def test_viz_reflects_multiple_mapping_if_flow_state_provided(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        add = AddTask(name="a_nice_task")
        list_task = Task(name="a_list_task")

        map_state1 = Mapped(map_states=[Success(), TriggerFailed()])
        map_state2 = Mapped(map_states=[Success(), Failed()])

        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                first_res = add.map(x=list_task, y=8)
                with pytest.warns(
                        UserWarning
                ):  # making a copy of a task with dependencies
                    res = first_res.map(x=first_res, y=9)
            graph = f.visualize(flow_state=Success(
                result={
                    res: map_state1,
                    list_task: Success(),
                    first_res: map_state2,
                }))

        assert "{first} -> {second} [label=x style=dashed]".format(
            first=str(id(first_res)) + "0", second=str(id(res)) + "0")
        assert "{first} -> {second} [label=x style=dashed]".format(
            first=str(id(first_res)) + "1", second=str(id(res)) + "1")

    @pytest.mark.parametrize(
        "error",
        [
            ImportError("abc"),
            ValueError("abc"),
            TypeError("abc"),
            NameError("abc"),
            AttributeError("abc"),
        ],
    )
    def test_viz_renders_if_ipython_isnt_installed_or_errors(self, error):
        graphviz = MagicMock()
        ipython = MagicMock(get_ipython=MagicMock(side_effect=error))
        with patch.dict("sys.modules", graphviz=graphviz, IPython=ipython):
            with Flow(name="test") as f:
                res = AddTask(name="a_nice_task").map(
                    x=Task(name="a_list_task"), y=8)
            f.visualize()
Example #4
0
    def manage_jobs(self) -> None:
        """
        This function checks if jobs are `Failed` or `Succeeded` and if they are then the jobs are
        deleted from the namespace. If one of the job's pods happen to run into image pulling errors
        then the flow run is failed and the job is still deleted.
        """
        self.logger.debug(
            "Retrieving information of jobs that are currently in the cluster..."
        )

        more = True
        _continue = ""
        while more:
            try:
                jobs = self.batch_client.list_namespaced_job(
                    namespace=self.namespace,
                    label_selector="prefect.io/identifier",
                    limit=20,
                    _continue=_continue,
                )
                _continue = jobs.metadata._continue
                more = bool(_continue)

                for job in jobs.items:
                    delete_job = job.status.failed or job.status.succeeded
                    job_name = job.metadata.name
                    flow_run_id = job.metadata.labels.get(
                        "prefect.io/flow_run_id")

                    if not flow_run_id:
                        # Do not attempt to process a job without a flow run id; we need
                        # the id to manage the flow run state
                        self.logger.warning(
                            f"Cannot manage job {job_name!r}, it is missing a "
                            "'prefect.io/flow_run_id' label.")
                        continue

                    try:
                        # Do not attempt to process a job with an invalid flow run id
                        flow_run_state = self.client.get_flow_run_state(
                            flow_run_id)
                    except ObjectNotFoundError:
                        self.logger.warning(
                            f"Job {job.name!r} is for flow run {flow_run_id!r} "
                            "which does not exist. It will be ignored.")
                        continue

                    # Check for pods that are stuck with image pull errors
                    if not delete_job:
                        pods = self.core_client.list_namespaced_pod(
                            namespace=self.namespace,
                            label_selector="prefect.io/identifier={}".format(
                                job.metadata.labels.get(
                                    "prefect.io/identifier")),
                        )

                        for pod in pods.items:
                            pod_name = pod.metadata.name
                            if pod.status.container_statuses:
                                for container_status in pod.status.container_statuses:
                                    waiting = container_status.state.waiting
                                    if waiting and (waiting.reason
                                                    == "ErrImagePull"
                                                    or waiting.reason
                                                    == "ImagePullBackOff"):
                                        self.logger.debug(
                                            f"Failing flow run {flow_run_id} due to pod {waiting.reason}"
                                        )
                                        try:
                                            self.client.set_flow_run_state(
                                                flow_run_id=flow_run_id,
                                                state=Failed(
                                                    message=
                                                    "Kubernetes Error: {}".
                                                    format(
                                                        container_status.state.
                                                        waiting.message)),
                                            )
                                        except ClientError as exc:
                                            self.logger.error(
                                                "Error attempting to set flow run state for "
                                                f"{flow_run_id}: "
                                                f"{exc}")

                                        delete_job = True
                                        break

                            # Report recent events for pending pods to flow run logs
                            if pod.status.phase == "Pending":
                                pod_events = self.core_client.list_namespaced_event(
                                    namespace=self.namespace,
                                    field_selector="involvedObject.name={}".
                                    format(pod_name),
                                    timeout_seconds=30,
                                )

                                for event in sorted(
                                        pod_events.items,
                                        key=lambda e:
                                    (
                                        # Some events are missing timestamp attrs and
                                        # `None` is not sortable vs datetimes so we
                                        # default to 'now'
                                        getattr(e, "last_timestamp", None) or
                                        pendulum.now()),
                                ):

                                    # Skip events without timestamps
                                    if not getattr(event, "last_timestamp",
                                                   None):
                                        self.logger.debug(
                                            f"Encountered K8s event on pod {pod_name!r}"
                                            f" with no timestamp: {event!r}")
                                        continue

                                    # Skip old events
                                    if (event.last_timestamp <
                                            self.job_pod_event_timestamps[
                                                job_name][pod_name]):
                                        continue

                                    self.job_pod_event_timestamps[job_name][
                                        pod_name] = event.last_timestamp

                                    log_msg = (
                                        f"Event: {event.reason!r} on pod {pod_name!r}\n"
                                        f"\tMessage: {event.message}")

                                    # Send pod failure information to flow run logs
                                    self.client.write_run_logs([
                                        dict(
                                            flow_run_id=flow_run_id,
                                            name="k8s-infra",
                                            message=log_msg,
                                            level="DEBUG",
                                            timestamp=event.last_timestamp.
                                            isoformat(),
                                        )
                                    ])

                    # Report failed pods
                    if job.status.failed:
                        pods = self.core_client.list_namespaced_pod(
                            namespace=self.namespace,
                            label_selector="prefect.io/identifier={}".format(
                                job.metadata.labels.get(
                                    "prefect.io/identifier")),
                        )

                        failed_pods = []
                        for pod in pods.items:
                            if pod.status.phase != "Failed":
                                continue

                            # Format pod failure error message
                            failed_pods.append(pod.metadata.name)
                            pod_status_logs = [
                                f"Pod {pod.metadata.name} failed."
                            ]
                            if not pod.status.container_statuses:
                                pod_status_logs.append(
                                    "\tNo container statuses found for pod")
                            else:
                                for status in pod.status.container_statuses:
                                    state = ("running"
                                             if status.state.running else
                                             "waiting" if status.state.waiting
                                             else "terminated" if status.state.
                                             terminated else "Not Found")
                                    pod_status_logs.append(
                                        f"\tContainer '{status.name}' state: {state}"
                                    )

                                    if status.state.terminated:
                                        pod_status_logs.append(
                                            f"\t\tExit Code:: {status.state.terminated.exit_code}"
                                        )
                                        if status.state.terminated.message:
                                            pod_status_logs.append(
                                                f"\t\tMessage: {status.state.terminated.message}"
                                            )
                                        if status.state.terminated.reason:
                                            pod_status_logs.append(
                                                f"\t\tReason: {status.state.terminated.reason}"
                                            )
                                        if status.state.terminated.signal:
                                            pod_status_logs.append(
                                                f"\t\tSignal: {status.state.terminated.signal}"
                                            )

                            # Send pod failure information to flow run logs
                            self.client.write_run_logs([
                                dict(
                                    flow_run_id=flow_run_id,
                                    name="k8s-infra",
                                    message="\n".join(pod_status_logs),
                                    level="ERROR",
                                )
                            ])

                        # If there are failed pods and the run is not finished, fail the run
                        if failed_pods and not flow_run_state.is_finished():
                            self.logger.debug(
                                f"Failing flow run {flow_run_id} due to the failed pods {failed_pods}"
                            )
                            try:
                                self.client.set_flow_run_state(
                                    flow_run_id=flow_run_id,
                                    state=Failed(
                                        message=
                                        "Kubernetes Error: pods {} failed for this job"
                                        .format(failed_pods)),
                                )
                            except ClientError as exc:
                                self.logger.error(
                                    f"Error attempting to set flow run state for {flow_run_id}: "
                                    f"{exc}")

                    # Delete job if it is successful or failed
                    if delete_job and self.delete_finished_jobs:
                        self.logger.debug(f"Deleting job {job_name}")
                        try:
                            self.job_pod_event_timestamps.pop(job_name, None)
                            self.batch_client.delete_namespaced_job(
                                name=job_name,
                                namespace=self.namespace,
                                body=kubernetes.client.V1DeleteOptions(
                                    propagation_policy="Foreground"),
                            )
                        except kubernetes.client.rest.ApiException as exc:
                            if exc.status != 404:
                                self.logger.error(
                                    f"{exc.status} error attempting to delete job {job_name}"
                                )
            except kubernetes.client.rest.ApiException as exc:
                if exc.status == 410:
                    self.logger.debug("Refreshing job listing token...")
                    _continue = ""
                    continue
                else:
                    self.logger.debug(exc)
Example #5
0
 def determine_final_state(self, *args, **kwargs):
     return Failed("Very specific error message")
Example #6
0
    def call_runner_target_handlers(self, old_state: State, new_state: State) -> State:
        """
        A special state handler that the FlowRunner uses to call its flow's state handlers.
        This method is called as part of the base Runner's `handle_state_change()` method.

        Args:
            - old_state (State): the old (previous) state
            - new_state (State): the new (current) state

        Returns:
            - State: the new state
        """
        raise_on_exception = prefect.context.get("raise_on_exception", False)

        try:
            new_state = super().call_runner_target_handlers(
                old_state=old_state, new_state=new_state
            )
        except Exception as exc:
            msg = "Exception raised while calling state handlers: {}".format(repr(exc))
            self.logger.debug(msg)
            if raise_on_exception:
                raise exc
            new_state = Failed(msg, result=exc)

        flow_run_id = prefect.context.get("flow_run_id", None)
        version = prefect.context.get("flow_run_version")

        try:
            cloud_state = new_state
            state = self.client.set_flow_run_state(
                flow_run_id=flow_run_id,
                version=version if cloud_state.is_running() else None,
                state=cloud_state,
            )
        except VersionLockError:
            state = self.client.get_flow_run_state(flow_run_id=flow_run_id)

            if state.is_running():
                self.logger.debug(
                    "Version lock encountered and flow is already in a running state."
                )
                raise ENDRUN(state=state)

            self.logger.debug(
                "Version lock encountered, proceeding with state {}...".format(
                    type(state).__name__
                )
            )
            new_state = state
        except Exception as exc:
            self.logger.debug(
                "Failed to set flow state with error: {}".format(repr(exc))
            )
            raise ENDRUN(state=new_state)

        if state.is_queued():
            state.state = old_state  # type: ignore
            raise ENDRUN(state=state)

        prefect.context.update(flow_run_version=(version or 0) + 1)

        return new_state
Example #7
0
    def manage_jobs(self) -> None:
        """
        This function checks if jobs are `Failed` or `Succeeded` and if they are then the jobs are
        deleted from the namespace. If one of the job's pods happen to run into image pulling errors
        then the flow run is failed and the job is still deleted.
        """
        self.logger.debug(
            "Retrieving information of jobs that are currently in the cluster..."
        )

        more = True
        _continue = ""
        while more:
            try:
                jobs = self.batch_client.list_namespaced_job(
                    namespace=self.namespace,
                    label_selector="prefect.io/identifier",
                    limit=20,
                    _continue=_continue,
                )
                _continue = jobs.metadata._continue
                more = bool(_continue)

                for job in jobs.items:
                    delete_job = job.status.failed or job.status.succeeded
                    job_name = job.metadata.name
                    flow_run_id = job.metadata.labels.get("prefect.io/flow_run_id")

                    # Check for pods that are stuck with image pull errors
                    if not delete_job:
                        pods = self.core_client.list_namespaced_pod(
                            namespace=self.namespace,
                            label_selector="prefect.io/identifier={}".format(
                                job.metadata.labels.get("prefect.io/identifier")
                            ),
                        )

                        for pod in pods.items:
                            if pod.status.container_statuses:
                                for container_status in pod.status.container_statuses:
                                    waiting = container_status.state.waiting
                                    if waiting and (
                                        waiting.reason == "ErrImagePull"
                                        or waiting.reason == "ImagePullBackOff"
                                    ):
                                        self.logger.debug(
                                            f"Failing flow run {flow_run_id} due to pod {waiting.reason}"
                                        )
                                        self.client.set_flow_run_state(
                                            flow_run_id=flow_run_id,
                                            state=Failed(
                                                message="Kubernetes Error: {}".format(
                                                    container_status.state.waiting.message
                                                )
                                            ),
                                        )

                                        delete_job = True
                                        break

                    # Report failed pods
                    if job.status.failed:
                        pods = self.core_client.list_namespaced_pod(
                            namespace=self.namespace,
                            label_selector="prefect.io/identifier={}".format(
                                job.metadata.labels.get("prefect.io/identifier")
                            ),
                        )

                        failed_pods = []
                        for pod in pods.items:
                            if pod.status.phase != "Failed":
                                continue

                            # Format pod failure error message
                            failed_pods.append(pod.metadata.name)
                            pod_status_logs = [f"Pod {pod.metadata.name} failed."]
                            for status in pod.status.container_statuses:
                                state = (
                                    "running"
                                    if status.state.running
                                    else "waiting"
                                    if status.state.waiting
                                    else "terminated"
                                    if status.state.terminated
                                    else "Not Found"
                                )
                                pod_status_logs.append(
                                    f"\tContainer '{status.name}' state: {state}"
                                )

                                if status.state.terminated:
                                    pod_status_logs.append(
                                        f"\t\tExit Code:: {status.state.terminated.exit_code}"
                                    )
                                    if status.state.terminated.message:
                                        pod_status_logs.append(
                                            f"\t\tMessage: {status.state.terminated.message}"
                                        )
                                    if status.state.terminated.reason:
                                        pod_status_logs.append(
                                            f"\t\tReason: {status.state.terminated.reason}"
                                        )
                                    if status.state.terminated.signal:
                                        pod_status_logs.append(
                                            f"\t\tSignal: {status.state.terminated.signal}"
                                        )

                            # Send pod failure information to flow run logs
                            self.client.write_run_logs(
                                [
                                    dict(
                                        flow_run_id=flow_run_id,
                                        name=self.name,
                                        message="\n".join(pod_status_logs),
                                        level="ERROR",
                                    )
                                ]
                            )

                        # If there are failed pods and the run is not finished, fail the run
                        if (
                            failed_pods
                            and not self.client.get_flow_run_state(
                                flow_run_id
                            ).is_finished()
                        ):
                            self.logger.debug(
                                f"Failing flow run {flow_run_id} due to the failed pods {failed_pods}"
                            )
                            self.client.set_flow_run_state(
                                flow_run_id=flow_run_id,
                                state=Failed(
                                    message="Kubernetes Error: pods {} failed for this job".format(
                                        failed_pods
                                    )
                                ),
                            )

                    # Delete job if it is successful or failed
                    if delete_job:
                        self.logger.debug(f"Deleting job {job_name}")
                        try:
                            self.batch_client.delete_namespaced_job(
                                name=job_name,
                                namespace=self.namespace,
                                body=self.k8s_client.V1DeleteOptions(
                                    propagation_policy="Foreground"
                                ),
                            )
                        except self.k8s_client.rest.ApiException as exc:
                            if exc.status != 404:
                                self.logger.error(
                                    f"{exc.status} error attempting to delete job {job_name}"
                                )
            except self.k8s_client.rest.ApiException as exc:
                if exc.status == 410:
                    self.logger.debug("Refreshing job listing token...")
                    _continue = ""
                    continue
                else:
                    self.logger.debug(exc)
Example #8
0
    def call_runner_target_handlers(self, old_state: State, new_state: State) -> State:
        """
        A special state handler that the TaskRunner uses to call its task's state handlers.
        This method is called as part of the base Runner's `handle_state_change()` method.

        Args:
            - old_state (State): the old (previous) state
            - new_state (State): the new (current) state

        Returns:
            - State: the new state
        """
        raise_on_exception = prefect.context.get("raise_on_exception", False)

        try:
            new_state = super().call_runner_target_handlers(
                old_state=old_state, new_state=new_state
            )

        # PrefectStateSignals are trapped and turned into States
        except prefect.engine.signals.PrefectStateSignal as exc:
            self.logger.info(
                "{name} signal raised: {rep}".format(
                    name=type(exc).__name__, rep=repr(exc)
                )
            )
            if raise_on_exception:
                raise exc
            new_state = exc.state

        except Exception as exc:
            msg = "Exception raised while calling state handlers: {}".format(repr(exc))
            self.logger.exception(msg)
            if raise_on_exception:
                raise exc
            new_state = Failed(msg, result=exc)

        task_run_id = prefect.context.get("task_run_id")
        version = prefect.context.get("task_run_version")

        try:
            cloud_state = new_state
            state = self.client.set_task_run_state(
                task_run_id=task_run_id,
                version=version if cloud_state.is_running() else None,
                state=cloud_state,
                cache_for=self.task.cache_for,
            )
        except VersionLockMismatchSignal as exc:
            state = self.client.get_task_run_state(task_run_id=task_run_id)

            if state.is_running():
                self.logger.debug(
                    "Version lock encountered and task {} is already in a running state.".format(
                        self.task.name
                    )
                )
                raise ENDRUN(state=state) from exc

            self.logger.debug(
                "Version lock encountered for task {}, proceeding with state {}...".format(
                    self.task.name, type(state).__name__
                )
            )

            try:
                new_state = state.load_result(self.result)
            except Exception as exc_inner:
                self.logger.debug(
                    "Error encountered attempting to load result for state of {} task...".format(
                        self.task.name
                    )
                )
                self.logger.error(repr(exc_inner))
                raise ENDRUN(state=state) from exc_inner
        except Exception as exc:
            self.logger.exception(
                "Failed to set task state with error: {}".format(repr(exc))
            )
            raise ENDRUN(state=ClientFailed(state=new_state)) from exc

        if state.is_queued():
            state.state = old_state  # type: ignore
            raise ENDRUN(state=state)

        prefect.context.update(task_run_version=(version or 0) + 1)

        return new_state
Example #9
0
    def check_task_ready_to_map(
        self, state: State, upstream_states: Dict[Edge, State]
    ) -> State:
        """
        Checks if the parent task is ready to proceed with mapping.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, Union[State, List[State]]]): the upstream states

        Raises:
            - ENDRUN: either way, we dont continue past this point
        """
        if state.is_mapped():
            # this indicates we are executing a re-run of a mapped pipeline;
            # in this case, we populate both `map_states` and `cached_inputs`
            # to ensure the flow runner can properly regenerate the child tasks,
            # regardless of whether we mapped over an exchanged piece of data
            # or a non-data-exchanging upstream dependency
            if len(state.map_states) == 0 and state.n_map_states > 0:  # type: ignore
                state.map_states = [None] * state.n_map_states  # type: ignore
            state.cached_inputs = {
                edge.key: state._result  # type: ignore
                for edge, state in upstream_states.items()
                if edge.key
            }
            raise ENDRUN(state)

        # we can't map if there are no success states with iterables upstream
        if upstream_states and not any(
            [
                edge.mapped and state.is_successful()
                for edge, state in upstream_states.items()
            ]
        ):
            new_state = Failed("No upstream states can be mapped over.")  # type: State
            raise ENDRUN(new_state)
        elif not all(
            [
                hasattr(state.result, "__getitem__")
                for edge, state in upstream_states.items()
                if state.is_successful() and not state.is_mapped() and edge.mapped
            ]
        ):
            new_state = Failed("At least one upstream state has an unmappable result.")
            raise ENDRUN(new_state)
        else:
            # compute and set n_map_states
            n_map_states = min(
                [
                    len(s.result)
                    for e, s in upstream_states.items()
                    if e.mapped and s.is_successful() and not s.is_mapped()
                ]
                + [
                    s.n_map_states  # type: ignore
                    for e, s in upstream_states.items()
                    if e.mapped and s.is_mapped()
                ],
                default=0,
            )
            new_state = Mapped(
                "Ready to proceed with mapping.", n_map_states=n_map_states
            )
            raise ENDRUN(new_state)
Example #10
0
    states=[],
    parameters={"param": "value"},
    context={"foo": "bar"},
    labels=["label"],
    updated_at=pendulum.now(),
    run_config=UniversalRun(),
)
# On `get_latest` return the same flow run view
SUCCESS_FLOW_RUN_VIEW.get_latest = MagicMock(
    return_value=SUCCESS_FLOW_RUN_VIEW)

FAILED_FLOW_RUN_VIEW = FlowRunView(
    flow_run_id="flow-run-id",
    name="flow-run-name",
    flow_id="flow-id",
    state=Failed(message="state-1"),
    states=[],
    parameters={"param": "value"},
    context={"foo": "bar"},
    labels=["label"],
    updated_at=pendulum.now(),
    run_config=UniversalRun(),
)
# On `get_latest` return the same flow run view
FAILED_FLOW_RUN_VIEW.get_latest = MagicMock(return_value=FAILED_FLOW_RUN_VIEW)

SUBMITTED_FLOW_RUN_VIEW = FlowRunView(
    flow_run_id="flow-run-id",
    name="flow-run-name",
    flow_id="flow-id",
    state=Submitted(message="state-1"),
Example #11
0
class FlowRunner(Runner):
    """
    FlowRunners handle the execution of Flows and determine the State of a Flow
    before, during and after the Flow is run.

    In particular, through the FlowRunner you can specify which tasks should be
    the first tasks to run, which tasks should be returned after the Flow is finished,
    and what states each task should be initialized with.

    Args:
        - flow (Flow): the `Flow` to be run
        - task_runner_cls (TaskRunner, optional): The class used for running
            individual Tasks. Defaults to [TaskRunner](task_runner.html)
        - state_handlers (Iterable[Callable], optional): A list of state change handlers
            that will be called whenever the flow changes state, providing an
            opportunity to inspect or modify the new state. The handler
            will be passed the flow runner instance, the old (prior) state, and the new
            (current) state, with the following signature:
            `state_handler(fr: FlowRunner, old_state: State, new_state: State) -> Optional[State]`
            If multiple functions are passed, then the `new_state` argument will be the
            result of the previous handler.

    Note: new FlowRunners are initialized within the call to `Flow.run()` and in general,
    this is the endpoint through which FlowRunners will be interacted with most frequently.

    Example:
    ```python
    @task
    def say_hello():
        print('hello')

    with Flow("My Flow") as f:
        say_hello()

    fr = FlowRunner(flow=f)
    flow_state = fr.run()
    ```
    """

    def __init__(
        self,
        flow: Flow,
        task_runner_cls: type = None,
        state_handlers: Iterable[Callable] = None,
    ):
        self.flow = flow
        if task_runner_cls is None:
            task_runner_cls = prefect.engine.get_default_task_runner_class()
        self.task_runner_cls = task_runner_cls
        super().__init__(state_handlers=state_handlers)

    def __repr__(self) -> str:
        return "<{}: {}>".format(type(self).__name__, self.flow.name)

    def call_runner_target_handlers(self, old_state: State, new_state: State) -> State:
        """
        A special state handler that the FlowRunner uses to call its flow's state handlers.
        This method is called as part of the base Runner's `handle_state_change()` method.

        Args:
            - old_state (State): the old (previous) state
            - new_state (State): the new (current) state

        Returns:
            - State: the new state
        """
        self.logger.debug(
            "Flow '{name}': Handling state change from {old} to {new}".format(
                name=self.flow.name,
                old=type(old_state).__name__,
                new=type(new_state).__name__,
            )
        )
        for handler in self.flow.state_handlers:
            new_state = handler(self.flow, old_state, new_state) or new_state

        return new_state

    def initialize_run(  # type: ignore
        self,
        state: Optional[State],
        task_states: Dict[Task, State],
        context: Dict[str, Any],
        task_contexts: Dict[Task, Dict[str, Any]],
        parameters: Dict[str, Any],
    ) -> FlowRunnerInitializeResult:
        """
        Initializes the Task run by initializing state and context appropriately.

        If the provided state is a Submitted state, the state it wraps is extracted.

        Args:
            - state (Optional[State]): the initial state of the run
            - task_states (Dict[Task, State]): a dictionary of any initial task states
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be
                provided to each task
            - parameters(dict): the parameter values for the run

        Returns:
            - NamedTuple: a tuple of initialized objects:
                `(state, task_states, context, task_contexts)`
        """

        # overwrite context parameters one-by-one
        context_params = context.setdefault("parameters", {})
        for p in self.flow.parameters():
            if not p.required:
                context_params.setdefault(p.name, p.default)
        for param, value in (parameters or {}).items():
            context_params[param] = value

        context.update(flow_name=self.flow.name)
        context.setdefault("scheduled_start_time", pendulum.now("utc"))

        # add various formatted dates to context
        now = pendulum.now("utc")
        dates = {
            "date": now,
            "today": now.strftime("%Y-%m-%d"),
            "yesterday": now.add(days=-1).strftime("%Y-%m-%d"),
            "tomorrow": now.add(days=1).strftime("%Y-%m-%d"),
            "today_nodash": now.strftime("%Y%m%d"),
            "yesterday_nodash": now.add(days=-1).strftime("%Y%m%d"),
            "tomorrow_nodash": now.add(days=1).strftime("%Y%m%d"),
        }
        for key, val in dates.items():
            context.setdefault(key, val)

        for task in self.flow.tasks:
            task_contexts.setdefault(task, {}).update(
                task_name=task.name, task_slug=self.flow.slugs[task]
            )

        state, context = super().initialize_run(state=state, context=context)
        return FlowRunnerInitializeResult(
            state=state,
            task_states=task_states,
            context=context,
            task_contexts=task_contexts,
        )

    def run(
        self,
        state: State = None,
        task_states: Dict[Task, State] = None,
        return_tasks: Iterable[Task] = None,
        parameters: Dict[str, Any] = None,
        task_runner_state_handlers: Iterable[Callable] = None,
<<<<<<< HEAD
        executor: "prefect.executors.Executor" = None,
=======
        executor: "prefect.engine.executors.Executor" = None,
>>>>>>> prefect clone
        context: Dict[str, Any] = None,
        task_contexts: Dict[Task, Dict[str, Any]] = None,
    ) -> State:
        """
        The main endpoint for FlowRunners.  Calling this method will perform all
        computations contained within the Flow and return the final state of the Flow.

        Args:
            - state (State, optional): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict, optional): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - parameters (dict, optional): dictionary of any needed Parameter
                values, with keys being strings representing Parameter names and values being
                their corresponding values
            - task_runner_state_handlers (Iterable[Callable], optional): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task
                changes state.
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be
                provided to each task

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """
        self.logger.info("Beginning Flow run for '{}'".format(self.flow.name))

        # make copies to avoid modifying user inputs
        task_states = dict(task_states or {})
        context = dict(context or {})
        task_contexts = dict(task_contexts or {})
        parameters = dict(parameters or {})
        if executor is None:
            # Use the executor on the flow, if configured
            executor = getattr(self.flow, "executor", None)
            if executor is None:
                executor = prefect.engine.get_default_executor_class()()

        self.logger.debug("Using executor type %s", type(executor).__name__)

        try:
            state, task_states, context, task_contexts = self.initialize_run(
                state=state,
                task_states=task_states,
                context=context,
                task_contexts=task_contexts,
                parameters=parameters,
            )

            with prefect.context(context):
                state = self.check_flow_is_pending_or_running(state)
                state = self.check_flow_reached_start_time(state)
                state = self.set_flow_to_running(state)
                state = self.get_flow_run_state(
                    state,
                    task_states=task_states,
                    task_contexts=task_contexts,
                    return_tasks=return_tasks,
                    task_runner_state_handlers=task_runner_state_handlers,
                    executor=executor,
                )

        except ENDRUN as exc:
            state = exc.state

        # All other exceptions are trapped and turned into Failed states
        except Exception as exc:
            self.logger.exception(
                "Unexpected error while running flow: {}".format(repr(exc))
            )
            if prefect.context.get("raise_on_exception"):
                raise exc
            new_state = Failed(
                message="Unexpected error while running flow: {}".format(repr(exc)),
                result=exc,
            )
            state = self.handle_state_change(state or Pending(), new_state)

        return state
Example #12
0
 def test_state_type_methods_with_failed_state(self):
     state = Failed(message="")
     assert not state.is_cached()
     assert not state.is_pending()
     assert not state.is_retrying()
     assert not state.is_running()
     assert state.is_finished()
     assert not state.is_skipped()
     assert not state.is_scheduled()
     assert not state.is_successful()
     assert state.is_failed()
     assert not state.is_mapped()
     assert not state.is_meta_state()
Example #13
0
    async def run_once(self) -> int:
        """
        The Lazarus process revives any flow runs that are submitted or running but have no tasks in
        a running or scheduled state. The heartbeat must be stale in order to avoid race conditions
        with transitioning tasks.

        Returns:
            - int: the number of flow runs that were scheduled
        """
        time = pendulum.now("utc").subtract(minutes=10)

        flow_runs = await models.FlowRun.where({
            # get runs that are currently running or submitted
            "state": {
                "_in": ["Running", "Submitted"]
            },
            # that were last updated some time ago
            "heartbeat": {
                "_lte": str(time)
            },
            # but have no task runs in a near-running state
            "_not": {
                "task_runs": {
                    "state": {
                        "_in": LAZARUS_EXCLUDE
                    }
                }
            },
            # and whose do not have heartbeats or lazarus enabled
            "_not": {
                "flow": {
                    "flow_group": {
                        "_or": [
                            {
                                "settings": {
                                    "_contains": {
                                        "heartbeat_enabled": False
                                    }
                                }
                            },
                            {
                                "settings": {
                                    "_contains": {
                                        "lazarus_enabled": False
                                    }
                                }
                            },
                        ]
                    }
                }
            },
        }).get(
            selection_set={"id", "version", "tenant_id", "times_resurrected"},
            order_by={"heartbeat": EnumValue("asc")},
        )
        self.logger.info(
            f"Found {len(flow_runs)} flow runs to reschedule with a Lazarus process"
        )

        if not flow_runs:
            return 0

        run_count = 0

        for fr in flow_runs:
            # check how many times it's been resurrected, otherwise it will repeat ad infinitum
            if (fr.times_resurrected <
                    config.services.lazarus.resurrection_attempt_limit):
                try:
                    # Set flow run state to scheduled
                    await prefect.api.states.set_flow_run_state(
                        flow_run_id=fr.id,
                        state=Scheduled(
                            message="Rescheduled by a Lazarus process."),
                    )

                    # increment the times_resurrected value for the flow run
                    await models.FlowRun.where(id=fr.id).update(set=dict(
                        times_resurrected=fr.times_resurrected + 1))
                    # log flow run state change
                    await prefect.api.logs.create_logs([
                        dict(
                            tenant_id=fr.tenant_id,
                            flow_run_id=fr.id,
                            name=f"{self.logger.name}.FlowRun",
                            message=(
                                "Rescheduled by a Lazarus process. "
                                f"This is attempt {fr.times_resurrected + 1}."
                            ),
                            level="INFO",
                        )
                    ])

                    run_count += 1

                except ValueError as exc:
                    # if the error contains "Update failed", it was a version-lock situation
                    # and we don't need to interrupt execution on its account. If it was
                    # anything else, raise an error.
                    if "Update failed" in str(exc):
                        self.logger.error(exc)
                    else:
                        raise
            else:

                message = (
                    "A Lazarus process attempted to reschedule this run "
                    f"{config.services.lazarus.resurrection_attempt_limit} times "
                    "without success. Marking as failed.")
                # Set flow run state to failed
                await prefect.api.states.set_flow_run_state(
                    flow_run_id=fr.id,
                    state=Failed(message=message),
                )
                # log flow run state change
                await prefect.api.logs.create_logs([
                    dict(
                        tenant_id=fr.tenant_id,
                        flow_run_id=fr.id,
                        name=f"{self.logger.name}.FlowRun",
                        message=message,
                        level="ERROR",
                    )
                ])

        self.logger.info(f"Lazarus process rescheduled {run_count} flow runs.")
        return run_count
Example #14
0
 async def test_set_flow_run_state_with_version_fails_if_version_doesnt_match(
         self, flow_run_id):
     with pytest.raises(ValueError, match="State update failed"):
         await api.states.set_flow_run_state(flow_run_id=flow_run_id,
                                             state=Failed(),
                                             version=10)
Example #15
0
    def manage_jobs(self) -> None:
        """
        This function checks if jobs are `Failed` or `Succeeded` and if they are then the jobs are
        deleted from the namespace. If one of the job's pods happen to run into image pulling errors
        then the flow run is failed and the job is still deleted.
        """
        self.logger.debug(
            "Retrieving information of jobs that are currently in the cluster..."
        )

        more = True
        _continue = ""
        while more:
            try:
                jobs = self.batch_client.list_namespaced_job(
                    namespace=self.namespace,
                    label_selector="prefect.io/identifier",
                    limit=20,
                    _continue=_continue,
                )
                _continue = jobs.metadata._continue
                more = bool(_continue)

                for job in jobs.items:
                    delete_job = job.status.failed or job.status.succeeded
                    job_name = job.metadata.name
                    flow_run_id = job.metadata.labels.get(
                        "prefect.io/flow_run_id")

                    if not delete_job:
                        pods = self.core_client.list_namespaced_pod(
                            namespace=self.namespace,
                            label_selector="prefect.io/identifier={}".format(
                                job.metadata.labels.get(
                                    "prefect.io/identifier")),
                        )

                        for pod in pods.items:
                            if pod.status.container_statuses:
                                for container_status in pod.status.container_statuses:
                                    waiting = container_status.state.waiting
                                    if waiting and (waiting.reason
                                                    == "ErrImagePull"
                                                    or waiting.reason
                                                    == "ImagePullBackOff"):
                                        self.logger.debug(
                                            f"Failing flow run {flow_run_id} due to pod {waiting.reason}"
                                        )
                                        self.client.set_flow_run_state(
                                            flow_run_id=flow_run_id,
                                            state=Failed(
                                                message="Kubernetes Error: {}".
                                                format(container_status.state.
                                                       waiting.message)),
                                        )

                                        delete_job = True
                                        break

                    if delete_job:
                        self.logger.debug(f"Deleting job {job_name}")
                        try:
                            self.batch_client.delete_namespaced_job(
                                name=job_name,
                                namespace=self.namespace,
                                body=self.k8s_client.V1DeleteOptions(
                                    propagation_policy="Foreground"),
                            )
                        except self.k8s_client.rest.ApiException as exc:
                            if exc.status != 404:
                                self.logger.error(
                                    f"{exc.status} error attempting to delete job {job_name}"
                                )
            except self.k8s_client.rest.ApiException as exc:
                if exc.status == 410:
                    self.logger.debug("Refreshing job listing token...")
                    _continue = ""
                    continue
                else:
                    self.logger.debug(exc)
Example #16
0
    async def reap_zombie_task_runs(
        self, heartbeat_cutoff: datetime.datetime = None
    ) -> int:
        """
        Zombie tasks are tasks that claim to be Running, but haven't updated their heartbeat.

        This method either retries them or marks them as failed.

        Returns:
            - int: the number of zombie task runs that were handled
        """
        zombies = 0
        heartbeat_cutoff = heartbeat_cutoff or pendulum.now("utc").subtract(minutes=10)

        where_clause = await self.get_task_runs_where_clause(
            heartbeat_cutoff=heartbeat_cutoff
        )

        task_runs = await models.TaskRun.where(where_clause).get(
            selection_set={
                "id": True,
                "flow_run_id": True,
                "tenant_id": True,
                # Information about the current flow run state
                "flow_run": {"state"},
                # get information about retries from task
                "task": {"max_retries", "retry_delay"},
                # count the number of retrying states for this task run
                with_args(
                    "retry_count: states_aggregate",
                    {"where": {"state": {"_eq": "Retrying"}}},
                ): {"aggregate": {"count"}},
            },
            limit=5000,
            order_by={"updated": EnumValue("desc")},
            apply_schema=False,
        )

        if task_runs:
            self.logger.info(f"Zombie killer found {len(task_runs)} task runs.")

        # Set task run states to failed
        for tr in task_runs:
            try:
                # if the flow run is running and retries are available, mark as retrying
                if (
                    tr.flow_run.state == "Running"
                    and tr.retry_count.aggregate.count < (tr.task.max_retries or 0)
                ):
                    message = (
                        "No heartbeat detected from the remote task; retrying the run."
                        f"This will be retry {tr.retry_count.aggregate.count + 1} of {tr.task.max_retries}."
                    )
                    retry_delay = orm._as_timedelta(tr.task.retry_delay or "0")
                    await prefect.api.states.set_task_run_state(
                        task_run_id=tr.id,
                        state=Retrying(
                            message=message,
                            run_count=tr.retry_count.aggregate.count + 1,
                            start_time=pendulum.now("UTC") + retry_delay,
                        ),
                    )

                # mark failed
                else:
                    message = "No heartbeat detected from the remote task; marking the run as failed."
                    await prefect.api.states.set_task_run_state(
                        task_run_id=tr.id,
                        state=Failed(message=message),
                    )

                # log the state change to the task run
                await prefect.api.logs.create_logs(
                    [
                        dict(
                            tenant_id=tr.tenant_id,
                            flow_run_id=tr.flow_run_id,
                            task_run_id=tr.id,
                            name=f"{self.logger.name}.TaskRun",
                            message=message,
                            level="ERROR",
                        )
                    ]
                )

                zombies += 1

            except ValueError as exc:
                self.logger.error(exc)

        if zombies:
            self.logger.info(f"Addressed {zombies} zombie task runs.")

        return zombies
Example #17
0
    def get_task_run_state(self, state: State, inputs: Dict[str,
                                                            Result]) -> State:
        """
        Runs the task and traps any signals or errors it raises.
        Also checkpoints the result of a successful task, if `task.checkpoint` is `True`.

        Args:
            - state (State): the current state of this task
            - inputs (Dict[str, Result], optional): a dictionary of inputs whose keys correspond
                to the task's `run()` arguments.

        Returns:
            - State: the state of the task after running the check

        Raises:
            - signals.PAUSE: if the task raises PAUSE
            - ENDRUN: if the task is not ready to run
        """
        task_name = prefect.context.get("task_full_name", self.task.name)

        if not state.is_running():
            self.logger.debug(
                f"Task {task_name!r}: Can't run task because it's not in a Running "
                "state; ending run.")

            raise ENDRUN(state)

        value = None
        raw_inputs = {k: r.value for k, r in inputs.items()}
        new_state = None
        try:
            self.logger.debug(
                f"Task {task_name!r}: Calling task.run() method...")

            # Create a stdout redirect if the task has log_stdout enabled
            log_context = (
                redirect_stdout(
                    prefect.utilities.logging.RedirectToLog(self.logger))
                if getattr(self.task, "log_stdout", False) else nullcontext()
            )  # type: AbstractContextManager

            with log_context:
                value = prefect.utilities.executors.run_task_with_timeout(
                    task=self.task,
                    args=(),
                    kwargs=raw_inputs,
                    logger=self.logger,
                )

        except TaskTimeoutSignal as exc:  # Convert timeouts to a `TimedOut` state
            if prefect.context.get("raise_on_exception"):
                raise exc
            state = TimedOut("Task timed out during execution.", result=exc)
            return state

        except signals.LOOP as exc:  # Convert loop signals to a `Looped` state
            new_state = exc.state
            assert isinstance(new_state, Looped)
            value = new_state.result
            new_state.message = exc.state.message or "Task is looping ({})".format(
                new_state.loop_count)

        except signals.SUCCESS as exc:
            # Success signals can be treated like a normal result
            new_state = exc.state
            assert isinstance(new_state, Success)
            value = new_state.result

        except Exception as exc:  # Handle exceptions in the task
            if prefect.context.get("raise_on_exception"):
                raise
            self.logger.error(
                f"Task {task_name!r}: Exception encountered during task execution!",
                exc_info=True,
            )
            state = Failed(f"Error during execution of task: {exc!r}",
                           result=exc)
            return state

        # checkpoint tasks if a result is present, except for when the user has opted out by
        # disabling checkpointing
        if (prefect.context.get("checkpointing") is True
                and self.task.checkpoint is not False and value is not None):
            try:
                formatting_kwargs = {
                    **prefect.context.get("parameters", {}).copy(),
                    **prefect.context,
                    **raw_inputs,
                }
                result = self.result.write(value, **formatting_kwargs)
            except ResultNotImplementedError:
                result = self.result.from_value(value=value)
        else:
            result = self.result.from_value(value=value)

        if new_state is not None:
            new_state.result = result
            return new_state

        state = Success(result=result, message="Task run succeeded.")
        return state
Example #18
0
class TestRunFlowStep:
    def test_running_state_finishes(self):
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).get_flow_run_state(
            state=Running(),
            task_states={},
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state.is_successful()

    @pytest.mark.parametrize(
        "state",
        [Pending(),
         Retrying(),
         Finished(),
         Success(),
         Failed(),
         Skipped()])
    def test_other_states_raise_endrun(self, state):
        flow = Flow(name="test", tasks=[Task()])
        with pytest.raises(ENDRUN):
            FlowRunner(flow=flow).get_flow_run_state(
                state=state,
                task_states={},
                task_contexts={},
                return_tasks=set(),
                task_runner_state_handlers=[],
                executor=Executor(),
            )

    def test_determine_final_state_has_final_say(self):
        class MyFlowRunner(FlowRunner):
            def determine_final_state(self, *args, **kwargs):
                return Failed("Very specific error message")

        flow = Flow(name="test", tasks=[Task()])
        new_state = MyFlowRunner(flow=flow).get_flow_run_state(
            state=Running(),
            task_states={},
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state.is_failed()
        assert new_state.message == "Very specific error message"

    def test_determine_final_state_preserves_running_states_when_tasks_still_running(
        self, ):
        task = Task()
        flow = Flow(name="test", tasks=[task])
        old_state = Running()
        new_state = FlowRunner(flow=flow).get_flow_run_state(
            state=old_state,
            task_states={
                task: Retrying(start_time=pendulum.now("utc").add(days=1))
            },
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state is old_state
Example #19
0
    def run(
        self,
        state: State = None,
        task_states: Dict[Task, State] = None,
        return_tasks: Iterable[Task] = None,
        parameters: Dict[str, Any] = None,
        task_runner_state_handlers: Iterable[Callable] = None,
        executor: "prefect.engine.executors.Executor" = None,
        context: Dict[str, Any] = None,
        task_contexts: Dict[Task, Dict[str, Any]] = None,
    ) -> State:
        """
        The main endpoint for FlowRunners.  Calling this method will perform all
        computations contained within the Flow and return the final state of the Flow.

        Args:
            - state (State, optional): starting state for the Flow. Defaults to
                `Pending`
            - task_states (dict, optional): dictionary of task states to begin
                computation with, with keys being Tasks and values their corresponding state
            - return_tasks ([Task], optional): list of Tasks to include in the
                final returned Flow state. Defaults to `None`
            - parameters (dict, optional): dictionary of any needed Parameter
                values, with keys being strings representing Parameter names and values being
                their corresponding values
            - task_runner_state_handlers (Iterable[Callable], optional): A list of state change
                handlers that will be provided to the task_runner, and called whenever a task changes
                state.
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be provided to each task

        Returns:
            - State: `State` representing the final post-run state of the `Flow`.

        """

        self.logger.info("Beginning Flow run for '{}'".format(self.flow.name))

        # make copies to avoid modifying user inputs
        task_states = dict(task_states or {})
        context = dict(context or {})
        task_contexts = dict(task_contexts or {})
        parameters = dict(parameters or {})
        if executor is None:
            executor = prefect.engine.get_default_executor_class()()

        try:
            state, task_states, context, task_contexts = self.initialize_run(
                state=state,
                task_states=task_states,
                context=context,
                task_contexts=task_contexts,
                parameters=parameters,
            )

            with prefect.context(context):
                state = self.check_flow_is_pending_or_running(state)
                state = self.check_flow_reached_start_time(state)
                state = self.set_flow_to_running(state)
                state = self.get_flow_run_state(
                    state,
                    task_states=task_states,
                    task_contexts=task_contexts,
                    return_tasks=return_tasks,
                    task_runner_state_handlers=task_runner_state_handlers,
                    executor=executor,
                )

        except ENDRUN as exc:
            state = exc.state

        except KeyboardInterrupt:
            self.logger.debug("Interrupt signal raised, cancelling Flow run.")
            state = Cancelled(
                message="Interrupt signal raised, cancelling flow run.")

        # All other exceptions are trapped and turned into Failed states
        except Exception as exc:
            self.logger.exception(
                "Unexpected error while running flow: {}".format(repr(exc)))
            if prefect.context.get("raise_on_exception"):
                raise exc
            new_state = Failed(
                message="Unexpected error while running flow: {}".format(
                    repr(exc)),
                result=exc,
            )
            state = self.handle_state_change(state or Pending(), new_state)

        return state
Example #20
0
    def initialize_run(  # type: ignore
        self,
        state: Optional[State],
        task_states: Dict[Task, State],
        context: Dict[str, Any],
        task_contexts: Dict[Task, Dict[str, Any]],
        parameters: Dict[str, Any],
    ) -> FlowRunnerInitializeResult:
        """
        Initializes the Task run by initializing state and context appropriately.

        If the provided state is a Submitted state, the state it wraps is extracted.

        Args:
            - state (Optional[State]): the initial state of the run
            - task_states (Dict[Task, State]): a dictionary of any initial task states
            - context (Dict[str, Any], optional): prefect.Context to use for execution
                to use for each Task run
            - task_contexts (Dict[Task, Dict[str, Any]], optional): contexts that will be
                provided to each task
            - parameters(dict): the parameter values for the run

        Returns:
            - NamedTuple: a tuple of initialized objects:
                `(state, task_states, context, task_contexts)`
        """

        # load id from context
        flow_run_id = prefect.context.get("flow_run_id")

        try:
            flow_run_info = self.client.get_flow_run_info(flow_run_id)
        except Exception as exc:
            self.logger.debug(
                "Failed to retrieve flow state with error: {}".format(repr(exc))
            )
            if state is None:
                state = Failed(
                    message="Could not retrieve state from Prefect Cloud", result=exc
                )
            raise ENDRUN(state=state)

        updated_context = context or {}
        updated_context.update(flow_run_info.context or {})
        updated_context.update(
            flow_id=flow_run_info.flow_id,
            flow_run_id=flow_run_info.id,
            flow_run_version=flow_run_info.version,
            flow_run_name=flow_run_info.name,
            scheduled_start_time=flow_run_info.scheduled_start_time,
        )

        tasks = {slug: t for t, slug in self.flow.slugs.items()}
        # update task states and contexts
        for task_run in flow_run_info.task_runs:
            try:
                task = tasks[task_run.task_slug]
            except KeyError:
                msg = (
                    f"Task slug {task_run.task_slug} not found in the current Flow; "
                    f"this is usually caused by changing the Flow without reregistering "
                    f"it with the Prefect API."
                )
                raise KeyError(msg)
            task_states.setdefault(task, task_run.state)
            task_contexts.setdefault(task, {}).update(
                task_id=task_run.task_id,
                task_run_id=task_run.id,
                task_run_version=task_run.version,
            )

        # if state is set, keep it; otherwise load from Cloud
        state = state or flow_run_info.state  # type: ignore

        # update parameters, prioritizing kwarg-provided params
        updated_parameters = flow_run_info.parameters or {}  # type: ignore
        updated_parameters.update(parameters)

        return super().initialize_run(
            state=state,
            task_states=task_states,
            context=updated_context,
            task_contexts=task_contexts,
            parameters=updated_parameters,
        )
Example #21
0
    async def reschedule_flow_runs(
        self, heartbeat_cutoff: datetime.datetime = None
    ) -> int:
        heartbeat_cutoff = heartbeat_cutoff or pendulum.now("utc").subtract(minutes=10)
        run_count = 0

        where_clause = await self.get_flow_runs_where_clause(
            heartbeat_cutoff=heartbeat_cutoff
        )
        flow_runs = await models.FlowRun.where(where_clause).get(
            selection_set={"id", "version", "tenant_id", "times_resurrected"},
            order_by={"updated": EnumValue("asc")},
            limit=5000,
        )

        if flow_runs:
            self.logger.info(
                f"Found {len(flow_runs)} flow runs to reschedule with a Lazarus process"
            )

        for fr in flow_runs:
            # check how many times it's been resurrected, otherwise it will repeat ad infinitum
            if (
                fr.times_resurrected
                < config.services.lazarus.resurrection_attempt_limit
            ):
                try:
                    # Set flow run state to scheduled
                    await prefect.api.states.set_flow_run_state(
                        flow_run_id=fr.id,
                        state=Scheduled(message="Rescheduled by a Lazarus process."),
                    )

                    # increment the times_resurrected value for the flow run
                    await models.FlowRun.where(id=fr.id).update(
                        set=dict(times_resurrected=fr.times_resurrected + 1)
                    )
                    # log flow run state change
                    await prefect.api.logs.create_logs(
                        [
                            dict(
                                tenant_id=fr.tenant_id,
                                flow_run_id=fr.id,
                                name=f"{self.logger.name}.FlowRun",
                                message=(
                                    "Rescheduled by a Lazarus process. "
                                    f"This is attempt {fr.times_resurrected + 1}."
                                ),
                                level="INFO",
                            )
                        ],
                        defer_db_write=False,
                    )

                    run_count += 1

                except ValueError as exc:
                    # if the error contains "Update failed", it was a version-lock situation
                    # and we don't need to interrupt execution on its account. If it was
                    # anything else, raise an error.
                    if "Update failed" in str(exc):
                        self.logger.error(exc)
                    else:
                        raise
            else:

                message = (
                    "A Lazarus process attempted to reschedule this run "
                    f"{config.services.lazarus.resurrection_attempt_limit} times "
                    "without success. Marking as failed."
                )
                # Set flow run state to failed
                await prefect.api.states.set_flow_run_state(
                    flow_run_id=fr.id,
                    state=Failed(message=message),
                )
                # log flow run state change
                await prefect.api.logs.create_logs(
                    [
                        dict(
                            tenant_id=fr.tenant_id,
                            flow_run_id=fr.id,
                            name=f"{self.logger.name}.FlowRun",
                            message=message,
                            level="ERROR",
                        )
                    ],
                    defer_db_write=False,
                )

        if run_count:
            self.logger.info(f"Lazarus process rescheduled {run_count} flow runs.")
        return run_count
Example #22
0
def test_flow_run_respects_state_kwarg():
    f = Flow(name="test")
    state = f.run(state=Failed("Unique."))
    assert state.is_failed()
    assert state.message == "Unique."
Example #23
0
    async def test_resume_from_reduce(self, flow, agent):
        """
        This test sets the "sum" task to Failed before running the flow.

        It then sets it to Scheduled and reruns from that point. This tests whether "sum"
        properly loads its upstream mapped states.
        """
        flow_run_id = await api.runs.create_flow_run(flow_id=flow.server_id)

        # ----------------------------------------------------------
        # first run - start with numbers 1

        # set sum to paused so it won't run
        await api.states.set_task_run_state(
            task_run_id=await
            api.runs.get_or_create_task_run(flow_run_id=flow_run_id,
                                            task_id=flow.sum.id),
            state=Failed(),
        )

        await agent.run_scheduled(flow_id=flow.server_id)
        # wait for states to be written to the database
        await asyncio.sleep(1.0)
        fr = await models.FlowRun.where(id=flow_run_id).first(
            {
                "serialized_state": True,
                "task_runs": {
                    "task": {"slug"},
                    "serialized_state": True,
                    "map_index": True,
                },
            }, )

        task_states = {(tr.task.slug, tr.map_index):
                       state_schema.load(tr.serialized_state)
                       for tr in fr.task_runs}

        assert fr.serialized_state["type"] == "Failed"

        assert len(task_states) == 6
        # numbers task
        assert task_states[(flow.numbers.slug, -1)].is_successful()
        # add parent task
        assert task_states[(flow.add.slug, -1)].is_mapped()
        # add children tasks
        for i in range(3):
            assert task_states[(flow.add.slug, i)].is_successful()
        # sum task
        assert task_states[(flow.sum.slug, -1)].is_failed()

        # ----------------------------------------------------------
        # second run - make sum scheduled and set flow to running

        await api.states.set_task_run_state(
            task_run_id=await
            api.runs.get_or_create_task_run(flow_run_id=flow_run_id,
                                            task_id=flow.sum.id),
            state=Scheduled(),
        )
        await api.states.set_flow_run_state(flow_run_id, state=Running())

        from prefect.utilities.debug import raise_on_exception

        with raise_on_exception():
            await agent.run_scheduled(flow_id=flow.server_id)
            # wait for states to be written to the database
            await asyncio.sleep(1.0)
        fr = await models.FlowRun.where(id=flow_run_id).first(
            {
                "serialized_state": True,
                "task_runs": {
                    "task": {"slug"},
                    "serialized_state": True,
                    "map_index": True,
                },
            }, )

        task_states = {(tr.task.slug, tr.map_index):
                       state_schema.load(tr.serialized_state)
                       for tr in fr.task_runs}

        assert fr.serialized_state["type"] == "Success"
        # numbers task
        assert task_states[(flow.numbers.slug, -1)].is_successful()
        # add parent task
        assert task_states[(flow.add.slug, -1)].is_mapped()
        # add children tasks
        for i in range(3):
            assert task_states[(flow.add.slug, i)].is_successful()
        # sum task
        assert task_states[(flow.sum.slug, -1)].is_successful()
        assert prefect.RESULTS[(flow.sum.slug, -1)] == 9
Example #24
0
    def run(
        self,
        state: State = None,
        upstream_states: Dict[Edge, State] = None,
        context: Dict[str, Any] = None,
        executor: "prefect.engine.executors.Executor" = None,
    ) -> State:
        """
        The main endpoint for TaskRunners.  Calling this method will conditionally execute
        `self.task.run` with any provided inputs, assuming the upstream dependencies are in a
        state which allow this Task to run.

        Args:
            - state (State, optional): initial `State` to begin task run from;
                defaults to `Pending()`
            - upstream_states (Dict[Edge, State]): a dictionary
                representing the states of any tasks upstream of this one. The keys of the
                dictionary should correspond to the edges leading to the task.
            - context (dict, optional): prefect Context to use for execution
            - executor (Executor, optional): executor to use when performing
                computation; defaults to the executor specified in your prefect configuration

        Returns:
            - `State` object representing the final post-run state of the Task
        """
        upstream_states = upstream_states or {}
        context = context or {}
        map_index = context.setdefault("map_index", None)
        context["task_full_name"] = "{name}{index}".format(
            name=self.task.name,
            index=("" if map_index is None else "[{}]".format(map_index)),
        )

        if executor is None:
            executor = prefect.engine.get_default_executor_class()()

        # if mapped is true, this task run is going to generate a Mapped state. It won't
        # actually run, but rather spawn children tasks to map over its inputs. We
        # detect this case by checking for:
        #   - upstream edges that are `mapped`
        #   - no `map_index` (which indicates that this is the child task, not the parent)
        mapped = any([e.mapped for e in upstream_states]) and map_index is None
        task_inputs = {}  # type: Dict[str, Any]

        try:
            # initialize the run
            state, context = self.initialize_run(state, context)

            # run state transformation pipeline
            with prefect.context(context):

                if prefect.context.get("task_loop_count") is None:
                    self.logger.info(
                        "Task '{name}': Starting task run...".format(
                            name=context["task_full_name"]))

                # check to make sure the task is in a pending state
                state = self.check_task_is_ready(state)

                # check if the task has reached its scheduled time
                state = self.check_task_reached_start_time(state)

                # Tasks never run if the upstream tasks haven't finished
                state = self.check_upstream_finished(
                    state, upstream_states=upstream_states)

                # check if any upstream tasks skipped (and if we need to skip)
                state = self.check_upstream_skipped(
                    state, upstream_states=upstream_states)

                # populate / hydrate all result objects
                state, upstream_states = self.load_results(
                    state=state, upstream_states=upstream_states)

                # if the task is mapped, process the mapped children and exit
                if mapped:
                    state = self.run_mapped_task(
                        state=state,
                        upstream_states=upstream_states,
                        context=context,
                        executor=executor,
                    )

                    state = self.wait_for_mapped_task(state=state,
                                                      executor=executor)

                    self.logger.debug(
                        "Task '{name}': task has been mapped; ending run.".
                        format(name=context["task_full_name"]))
                    raise ENDRUN(state)

                # retrieve task inputs from upstream and also explicitly passed inputs
                task_inputs = self.get_task_inputs(
                    state=state, upstream_states=upstream_states)

                if self.task.target:
                    # check to see if there is a Result at the task's target
                    state = self.check_target(state, inputs=task_inputs)
                else:
                    # check to see if the task has a cached result
                    state = self.check_task_is_cached(state,
                                                      inputs=task_inputs)

                # check if the task's trigger passes
                # triggers can raise Pauses, which require task_inputs to be available for caching
                # so we run this after the previous step
                state = self.check_task_trigger(
                    state, upstream_states=upstream_states)

                # set the task state to running
                state = self.set_task_to_running(state, inputs=task_inputs)

                # run the task
                state = self.get_task_run_state(
                    state,
                    inputs=task_inputs,
                    timeout_handler=executor.timeout_handler)

                # cache the output, if appropriate
                state = self.cache_result(state, inputs=task_inputs)

                # check if the task needs to be retried
                state = self.check_for_retry(state, inputs=task_inputs)

                state = self.check_task_is_looping(
                    state,
                    inputs=task_inputs,
                    upstream_states=upstream_states,
                    context=context,
                    executor=executor,
                )

        # for pending signals, including retries and pauses we need to make sure the
        # task_inputs are set
        except (ENDRUN, signals.PrefectStateSignal) as exc:
            exc.state.cached_inputs = task_inputs or {}
            state = exc.state
        except RecursiveCall as exc:
            raise exc

        except Exception as exc:
            msg = "Task '{name}': unexpected error while running task: {exc}".format(
                name=context["task_full_name"], exc=repr(exc))
            self.logger.exception(msg)
            state = Failed(message=msg, result=exc, cached_inputs=task_inputs)
            if prefect.context.get("raise_on_exception"):
                raise exc

        # to prevent excessive repetition of this log
        # since looping relies on recursively calling self.run
        # TODO: figure out a way to only log this one single time instead of twice
        if prefect.context.get("task_loop_count") is None:
            # wrapping this final log in prefect.context(context) ensures
            # that any run-context, including task-run-ids, are respected
            with prefect.context(context):
                self.logger.info(
                    "Task '{name}': finished task run for task with final state: '{state}'"
                    .format(name=context["task_full_name"],
                            state=type(state).__name__))

        return state
Example #25
0
    def run(
        self,
        state: State = None,
        upstream_states: Dict[Edge, State] = None,
        context: Dict[str, Any] = None,
        is_mapped_parent: bool = False,
    ) -> State:
        """
        The main endpoint for TaskRunners.  Calling this method will conditionally execute
        `self.task.run` with any provided inputs, assuming the upstream dependencies are in a
        state which allow this Task to run.

        Args:
            - state (State, optional): initial `State` to begin task run from;
                defaults to `Pending()`
            - upstream_states (Dict[Edge, State]): a dictionary
                representing the states of any tasks upstream of this one. The keys of the
                dictionary should correspond to the edges leading to the task.
            - context (dict, optional): prefect Context to use for execution
            - is_mapped_parent (bool): a boolean indicating whether this task run is the run of
                a parent mapped task

        Returns:
            - `State` object representing the final post-run state of the Task
        """
        upstream_states = upstream_states or {}
        context = context or prefect.context.to_dict()
        map_index = context.setdefault("map_index", None)
        context["task_full_name"] = "{name}{index}".format(
            name=self.task.name,
            index=("" if map_index is None else "[{}]".format(map_index)),
        )

        task_inputs = {}  # type: Dict[str, Any]

        try:
            # initialize the run
            state, context = self.initialize_run(state, context)

            # run state transformation pipeline
            with prefect.context(context):

                if prefect.context.get("task_loop_count") is None:
                    self.logger.info(
                        "Task '{name}': Starting task run...".format(
                            name=context["task_full_name"]))

                # check to make sure the task is in a pending state
                state = self.check_task_is_ready(state)

                # check if the task has reached its scheduled time
                state = self.check_task_reached_start_time(state)

                # Tasks never run if the upstream tasks haven't finished
                state = self.check_upstream_finished(
                    state, upstream_states=upstream_states)

                # check if any upstream tasks skipped (and if we need to skip)
                state = self.check_upstream_skipped(
                    state, upstream_states=upstream_states)

                # populate / hydrate all result objects
                state, upstream_states = self.load_results(
                    state=state, upstream_states=upstream_states)

                # retrieve task inputs from upstream and also explicitly passed inputs
                task_inputs = self.get_task_inputs(
                    state=state, upstream_states=upstream_states)

                if is_mapped_parent:
                    state = self.check_task_ready_to_map(
                        state, upstream_states=upstream_states)

                # dynamically set task run name
                self.set_task_run_name(task_inputs=task_inputs)

                if self.task.target:
                    # check to see if there is a Result at the task's target
                    state = self.check_target(state, inputs=task_inputs)
                else:
                    # check to see if the task has a cached result
                    state = self.check_task_is_cached(state,
                                                      inputs=task_inputs)

                # check if the task's trigger passes
                # triggers can raise Pauses, which require task_inputs to be available for caching
                # so we run this after the previous step
                state = self.check_task_trigger(
                    state, upstream_states=upstream_states)

                # set the task state to running
                state = self.set_task_to_running(state, inputs=task_inputs)

                # run the task
                state = self.get_task_run_state(state, inputs=task_inputs)

                # cache the output, if appropriate
                state = self.cache_result(state, inputs=task_inputs)

                # check if the task needs to be retried
                state = self.check_for_retry(state, inputs=task_inputs)

                state = self.check_task_is_looping(
                    state,
                    inputs=task_inputs,
                    upstream_states=upstream_states,
                    context=context,
                )

        # for pending signals, including retries and pauses we need to make sure the
        # task_inputs are set
        except (ENDRUN, signals.PrefectStateSignal) as exc:
            state = exc.state
        except RecursiveCall as exc:
            raise exc

        except Exception as exc:
            msg = "Task '{name}': Unexpected error while running task: {exc}".format(
                name=context["task_full_name"], exc=repr(exc))
            self.logger.exception(msg)
            state = Failed(message=msg, result=exc)
            if prefect.context.get("raise_on_exception"):
                raise exc

        # to prevent excessive repetition of this log
        # since looping relies on recursively calling self.run
        # TODO: figure out a way to only log this one single time instead of twice
        if prefect.context.get("task_loop_count") is None:
            # wrapping this final log in prefect.context(context) ensures
            # that any run-context, including task-run-ids, are respected
            with prefect.context(context):
                self.logger.info(
                    "Task '{name}': Finished task run for task with final state: "
                    "'{state}'".format(name=context["task_full_name"],
                                       state=type(state).__name__))

        return state
Example #26
0
    def test_validation_failed_is_failed(self):
        assert issubclass(ValidationFailed, Failed)

    def test_paused_is_scheduled(self):
        assert issubclass(Paused, Scheduled)


@pytest.mark.parametrize(
    "state_check",
    [
        dict(state=Cancelled(), assert_true={"is_finished"}),
        dict(state=Cached(),
             assert_true={"is_cached", "is_finished", "is_successful"}),
        dict(state=ClientFailed(), assert_true={"is_meta_state"}),
        dict(state=Failed(), assert_true={"is_finished", "is_failed"}),
        dict(state=Finished(), assert_true={"is_finished"}),
        dict(state=Looped(), assert_true={"is_finished", "is_looped"}),
        dict(state=Mapped(),
             assert_true={"is_finished", "is_mapped", "is_successful"}),
        dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Pending(), assert_true={"is_pending"}),
        dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}),
        dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Retrying(),
             assert_true={"is_pending", "is_scheduled", "is_retrying"}),
        dict(state=Running(), assert_true={"is_running"}),
        dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Skipped(),
             assert_true={"is_finished", "is_successful", "is_skipped"}),
        dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}),
Example #27
0
class TestTaskRunStates:
    async def test_set_task_run_state(self, task_run_id):
        result = await api.states.set_task_run_state(task_run_id=task_run_id,
                                                     state=Failed())

        assert result.task_run_id == task_run_id

        query = await models.TaskRun.where(id=task_run_id).first(
            {"version", "state", "serialized_state"})

        assert query.version == 2
        assert query.state == "Failed"
        assert query.serialized_state["type"] == "Failed"

    @pytest.mark.parametrize("state", [Failed(), Success()])
    async def test_set_task_run_state_fails_with_wrong_task_run_id(
            self, state):
        with pytest.raises(ValueError, match="State update failed"):
            await api.states.set_task_run_state(task_run_id=str(uuid.uuid4()),
                                                state=state)

    @pytest.mark.parametrize(
        "state", [s() for s in State.children() if not s().is_running()])
    async def test_state_does_not_set_heartbeat_unless_running(
            self, state, task_run_id):
        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=state)

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

    async def test_running_state_sets_heartbeat(self, task_run_id,
                                                running_flow_run_id):
        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat is None

        dt = pendulum.now("UTC")
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=Running())

        task_run = await models.TaskRun.where(id=task_run_id
                                              ).first({"heartbeat"})
        assert task_run.heartbeat > dt

    async def test_trigger_failed_state_does_not_set_end_time(
            self, task_run_id):
        await api.states.set_task_run_state(task_run_id=task_run_id,
                                            state=TriggerFailed())
        task_run_info = await models.TaskRun.where(id=task_run_id).first(
            {"id", "start_time", "end_time"})
        assert not task_run_info.start_time
        assert not task_run_info.end_time

    @pytest.mark.parametrize(
        "flow_run_state",
        [Pending(), Running(), Failed(),
         Success()])
    async def test_running_states_can_not_be_set_if_flow_run_is_not_running(
            self, flow_run_id, task_run_id, flow_run_state):

        await api.states.set_flow_run_state(flow_run_id=flow_run_id,
                                            state=flow_run_state)

        set_running_coroutine = api.states.set_task_run_state(
            task_run_id=task_run_id, state=Running())

        if flow_run_state.is_running():
            assert await set_running_coroutine
            assert (await
                    models.TaskRun.where(id=task_run_id
                                         ).first({"state"})).state == "Running"
        else:

            with pytest.raises(ValueError, match="is not in a running state"):
                await set_running_coroutine
            assert (await models.TaskRun.where(id=task_run_id).first(
                {"state"})).state != "Running"