Example #1
0
def test_deploy_flow_run_sleeps_until_start_time(monkeypatch, cloud_api):
    gql_return = MagicMock(return_value=MagicMock(data=MagicMock(
        write_run_logs=MagicMock(success=True))))
    client = MagicMock()
    client.return_value.write_run_logs = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client",
                        MagicMock(return_value=client))
    sleep = MagicMock()
    monkeypatch.setattr("time.sleep", sleep)

    dt = pendulum.now()
    agent = Agent()
    agent.deploy_flow = MagicMock()
    agent._deploy_flow_run(flow_run=GraphQLResult({
        "id":
        "id",
        "serialized_state":
        Scheduled().serialize(),
        "scheduled_start_time":
        str(dt.add(seconds=10)),
        "version":
        1,
        "task_runs": [
            GraphQLResult({
                "id": "id",
                "version": 1,
                "serialized_state": Scheduled().serialize(),
            })
        ],
    }))

    sleep_time = sleep.call_args[0][0]
    assert 10 >= sleep_time > 9
    agent.deploy_flow.assert_called_once()
Example #2
0
def test_mark_flow_as_submitted(monkeypatch, cloud_api, with_task_runs):
    agent = Agent()
    agent.client = MagicMock()
    agent._mark_flow_as_submitted(flow_run=GraphQLResult({
        "id":
        "id",
        "serialized_state":
        Scheduled().serialize(),
        "version":
        1,
        "task_runs": ([
            GraphQLResult({
                "id": "task-id",
                "version": 1,
                "serialized_state": Scheduled().serialize(),
            })
        ] if with_task_runs else []),
    }))

    agent.client.set_flow_run_state.assert_called_once_with(
        flow_run_id="id",
        version=1,
        state=Submitted(message="Submitted for execution"))

    if with_task_runs:
        agent.client.set_task_run_state.assert_called_once_with(
            task_run_id="task-id",
            version=1,
            state=Submitted(message="Submitted for execution"),
        )
    else:
        agent.client.set_task_run_state.assert_not_called()
class TestRunModels:
    @pytest.mark.parametrize(
        "state",
        [
            Running(message="running", result=1),
            Scheduled(message="scheduled", result=1, start_time=pendulum.now()),
        ],
    )
    async def test_flow_run_fields_from_state(self, state):
        dt = pendulum.now()
        info = models.FlowRunState.fields_from_state(state)

        assert info["state"] == type(state).__name__
        assert info["timestamp"] > dt
        assert info["message"] == state.message
        assert info["result"] == state.result
        assert info["serialized_state"] == state.serialize()

    @pytest.mark.parametrize(
        "state",
        [
            Running(message="running", result=1),
            Scheduled(message="scheduled", result=1, start_time=pendulum.now()),
        ],
    )
    async def test_task_run_fields_from_state(self, state):
        dt = pendulum.now()
        info = models.TaskRunState.fields_from_state(state)

        assert info["state"] == type(state).__name__
        assert info["timestamp"] > dt
        assert info["message"] == state.message
        assert info["result"] == state.result
        assert info["serialized_state"] == state.serialize()
Example #4
0
def test_agent_logs_flow_run_exceptions(monkeypatch, runner_token, caplog,
                                        cloud_api):
    gql_return = MagicMock(return_value=MagicMock(data=MagicMock(
        write_run_logs=MagicMock(success=True))))
    client = MagicMock()
    client.return_value.write_run_logs = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client",
                        MagicMock(return_value=client))

    agent = Agent()
    agent.deploy_flow = MagicMock(side_effect=Exception("Error Here"))
    agent.deploy_and_update_flow_run(flow_run=GraphQLResult({
        "id":
        "id",
        "serialized_state":
        Scheduled().serialize(),
        "version":
        1,
        "task_runs": [
            GraphQLResult({
                "id": "id",
                "version": 1,
                "serialized_state": Scheduled().serialize(),
            })
        ],
    }))

    assert client.write_run_logs.called
    client.write_run_logs.assert_called_with([
        dict(flow_run_id="id",
             level="ERROR",
             message="Error Here",
             name="agent")
    ])
    assert "Logging platform error for flow run" in caplog.text
Example #5
0
def test_get_flow_run_scheduled_start_time_from_state_time(cloud_mocks):
    start_time = pendulum.now("utc")
    states = [
        Scheduled(start_time=start_time.add(seconds=10)).serialize(),
        Scheduled(start_time=start_time).serialize(),
        Scheduled().serialize(),
    ]

    # Attach db "created" times to the states, the second one is the newest
    states[0]["created"] = pendulum.now().subtract(seconds=10).isoformat()
    states[1]["created"] = pendulum.now().isoformat()

    # The last state will have an empty start time and no created time to test handling
    # of malformed data
    states[2]["start_time"] = None

    cloud_mocks.Client().graphql.return_value = GraphQLResult({
        "data": {
            "flow_run": [{
                "scheduled_start_time":
                (start_time.subtract(seconds=10).isoformat()),
                "states":
                states,
            }]
        }
    })

    result = _get_flow_run_scheduled_start_time("flow-run-id")
    assert result == start_time
Example #6
0
def test_update_states_passes_task_runs(monkeypatch, runner_token):
    gql_return = MagicMock(return_value=MagicMock(
        data=MagicMock(set_flow_run_state=None, set_task_run_state=None)))
    client = MagicMock()
    client.return_value.graphql = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client", client)

    agent = Agent()
    assert not agent.update_states(flow_runs=[
        GraphQLResult({
            "id":
            "id",
            "serialized_state":
            Scheduled().serialize(),
            "version":
            1,
            "task_runs": [
                GraphQLResult({
                    "id": "id",
                    "version": 1,
                    "serialized_state": Scheduled().serialize(),
                })
            ],
        })
    ])
Example #7
0
def test_agent_process(monkeypatch, runner_token):
    gql_return = MagicMock(return_value=MagicMock(data=MagicMock(
        set_flow_run_state=None,
        set_task_run_state=None,
        getRunsInQueue=MagicMock(flow_run_ids=["id"]),
        flow_run=[
            GraphQLResult({
                "id":
                "id",
                "serialized_state":
                Scheduled().serialize(),
                "version":
                1,
                "task_runs": [
                    GraphQLResult({
                        "id": "id",
                        "version": 1,
                        "serialized_state": Scheduled().serialize(),
                    })
                ],
            })
        ],
    )))
    client = MagicMock()
    client.return_value.graphql = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client", client)

    # Assert it doesn't return everything but all functions are called properly
    agent = Agent()
    assert agent.agent_process("id")
Example #8
0
def test_agent_logs_flow_run_exceptions(monkeypatch, runner_token):
    gql_return = MagicMock(
        return_value=MagicMock(data=MagicMock(writeRunLogs=MagicMock(success=True)))
    )
    client = MagicMock()
    client.return_value.write_run_logs = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client", MagicMock(return_value=client))

    agent = Agent()
    agent._log_flow_run_exceptions(
        flow_runs=[
            GraphQLResult(
                {
                    "id": "id",
                    "serialized_state": Scheduled().serialize(),
                    "version": 1,
                    "task_runs": [
                        GraphQLResult(
                            {
                                "id": "id",
                                "version": 1,
                                "serialized_state": Scheduled().serialize(),
                            }
                        )
                    ],
                }
            )
        ],
        exc=ValueError("Error Here"),
    )

    assert client.write_run_logs.called
    client.write_run_logs.assert_called_with(
        [dict(flowRunId="id", level="ERROR", message="Error Here", name="agent")]
    )
Example #9
0
def test_deploy_flow_run_logs_flow_run_exceptions(monkeypatch, caplog, cloud_api):
    gql_return = MagicMock(
        return_value=MagicMock(data=MagicMock(write_run_logs=MagicMock(success=True)))
    )
    client = MagicMock()
    client.return_value.write_run_logs = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client", MagicMock(return_value=client))

    agent = Agent()
    agent.deploy_flow = MagicMock(side_effect=Exception("Error Here"))
    agent._deploy_flow_run(
        flow_run=GraphQLResult(
            {
                "id": "id",
                "serialized_state": Scheduled().serialize(),
                "scheduled_start_time": str(pendulum.now()),
                "version": 1,
                "task_runs": [
                    GraphQLResult(
                        {
                            "id": "id",
                            "version": 1,
                            "serialized_state": Scheduled().serialize(),
                        }
                    )
                ],
            }
        )
    )

    assert client.write_run_logs.called
    client.write_run_logs.assert_called_with(
        [dict(flow_run_id="id", level="ERROR", message="Error Here", name="agent")]
    )
    assert "Exception encountered while deploying flow run id" in caplog.text
Example #10
0
def test_agent_process(monkeypatch, runner_token, cloud_api):
    gql_return = MagicMock(return_value=MagicMock(data=MagicMock(
        set_flow_run_state=None,
        set_task_run_state=None,
        get_runs_in_queue=MagicMock(flow_run_ids=["id"]),
        flow_run=[
            GraphQLResult({
                "id":
                "id",
                "serialized_state":
                Scheduled().serialize(),
                "version":
                1,
                "task_runs": [
                    GraphQLResult({
                        "id": "id",
                        "version": 1,
                        "serialized_state": Scheduled().serialize(),
                    })
                ],
            })
        ],
    )))
    client = MagicMock()
    client.return_value.graphql = gql_return
    monkeypatch.setattr("prefect.agent.agent.Client", client)

    executor = MagicMock()
    future_mock = MagicMock()
    executor.submit = MagicMock(return_value=future_mock)

    agent = Agent()
    assert agent.agent_process(executor)
    assert executor.submit.called
    assert future_mock.add_done_callback.called
Example #11
0
    async def test_get_flow_run_in_queue_filters_labels_on_task_runs_correctly(
        self,
        flow_run_id,
        labeled_flow_run_id,
        labeled_task_run_id,
        task_run_id,
    ):

        await states.set_task_run_state(
            task_run_id=labeled_task_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )
        await states.set_task_run_state(
            task_run_id=task_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )

        super_flow_runs = await runs.get_runs_in_queue(
            labels=["foo", "bar", "chris"])
        random_flow_runs = await runs.get_runs_in_queue(labels=["dev"])
        mixed_flow_runs = await runs.get_runs_in_queue(
            labels=["foo", "staging"])
        assert labeled_flow_run_id in super_flow_runs
        assert flow_run_id not in super_flow_runs

        assert labeled_flow_run_id not in random_flow_runs
        assert flow_run_id not in random_flow_runs

        assert labeled_flow_run_id not in mixed_flow_runs
        assert flow_run_id not in mixed_flow_runs
Example #12
0
    async def test_start_with_one_root_then_other_succeeds(
            self, tenant_id, flow, agent):
        flow_run_id = await api.runs.create_flow_run(flow_id=flow.server_id)
        await api.states.set_flow_run_state(flow_run_id, state=Running())

        # ----------------------------------------------------------
        # first run - start with numbers 1

        # schedule numbers1 task to run
        await api.states.set_task_run_state(
            task_run_id=await
            api.runs.get_or_create_task_run(flow_run_id=flow_run_id,
                                            task_id=flow.numbers1.id),
            state=Scheduled(),
        )

        await agent.run_scheduled(flow_id=flow.server_id)
        # wait for states to be written to the db
        await asyncio.sleep(1.0)

        fr, task_states = await await_flow_run_state(flow_run_id, "Running", 1)

        assert fr.serialized_state["type"] == "Running"
        assert len(task_states) == 3
        # numbers1 task
        assert task_states[(flow.numbers1.slug, -1)].is_successful()
        # numbers2 task
        assert task_states[(flow.numbers2.slug, -1)].is_pending()
        # add parent task
        assert task_states[(flow.add.slug, -1)].is_pending()

        # ----------------------------------------------------------
        # second run - start with numbers 2

        # schedule numbers 2 task to run
        await api.states.set_task_run_state(
            task_run_id=await
            api.runs.get_or_create_task_run(flow_run_id=flow_run_id,
                                            task_id=flow.numbers2.id),
            state=Scheduled(),
        )

        await agent.run_scheduled(flow_id=flow.server_id)
        # wait for states to be written to the db
        await asyncio.sleep(1.0)

        fr, task_states = await await_flow_run_state(flow_run_id, "Success")

        assert fr.serialized_state["type"] == "Success"
        assert len(task_states) == 6
        # numbers1 task
        assert task_states[(flow.numbers1.slug, -1)].is_successful()
        # numbers2 task
        assert task_states[(flow.numbers2.slug, -1)].is_successful()
        # add parent task
        assert task_states[(flow.add.slug, -1)].is_mapped()

        # add child tasks
        for i in range(3):
            assert task_states[(flow.add.slug, i)].is_successful()
Example #13
0
 def test_scheduled_states_with_future_start_time(self):
     state = Scheduled(start_time=pendulum.now("utc") +
                       datetime.timedelta(minutes=10))
     with pytest.raises(ENDRUN) as exc:
         FlowRunner(flow=Flow(name="test")).check_flow_reached_start_time(
             state=state)
     assert exc.value.state is state
Example #14
0
    def _from_task_run_data(cls, task_run: dict) -> "TaskRunView":
        """
        Instantiate a `TaskRunView` from serialized data

        This method deserializes objects into their Prefect types.

        Args:
            - task_run: The serialized task run data

        Returns:
            A populated `TaskRunView` instance
        """
        task_run = task_run.copy()  # Create a copy to avoid mutation
        task_run_id = task_run.pop("id")
        task_data = task_run.pop("task")

        # The serialized state _could_ be null if the backend has not
        # created it yet, this would typically be seen with mapped tasks
        serialized_state = task_run.pop(
            "serialized_state") or Scheduled().serialize()

        return cls(
            task_run_id=task_run_id,
            state=State.deserialize(serialized_state),
            task_id=task_data["id"],
            task_slug=task_data["slug"],
            **task_run,
        )
Example #15
0
    async def test_future_flow_runs_are_not_retrieved(self, flow_run_id, tenant_id):
        await api.states.set_flow_run_state(
            flow_run_id=flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").add(days=1)),
        )

        assert not await api.runs.get_runs_in_queue(tenant_id=tenant_id)
Example #16
0
    async def test_get_flow_run_in_queue_uses_labels_on_task_runs(
        self, flow_run_id, labeled_flow_run_id, labeled_task_run_id, task_run_id,
    ):

        await states.set_task_run_state(
            task_run_id=labeled_task_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )
        await states.set_task_run_state(
            task_run_id=task_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )

        flow_runs = await runs.get_runs_in_queue(labels=["foo", "bar"])
        assert labeled_flow_run_id in flow_runs
        assert flow_run_id not in flow_runs
Example #17
0
 def test_scheduled_states_without_start_time(self):
     state = Scheduled(start_time=None)
     assert (
         FlowRunner(flow=Flow(name="test")).check_flow_reached_start_time(
             state=state
         )
         is state
     )
Example #18
0
    async def test_get_flow_run_in_queue_uses_labels(self, tenant_id,
                                                     flow_run_id,
                                                     labeled_flow_run_id):

        await api.states.set_flow_run_state(
            flow_run_id=flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )
        await api.states.set_flow_run_state(
            flow_run_id=labeled_flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )

        flow_runs = await api.runs.get_runs_in_queue(tenant_id=tenant_id,
                                                     labels=["foo", "bar"])
        assert labeled_flow_run_id in flow_runs
        assert flow_run_id not in flow_runs
Example #19
0
    async def test_get_flow_run_in_queue(self, flow_run_id, tenant_id):

        await api.states.set_flow_run_state(
            flow_run_id=flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )

        flow_runs = await api.runs.get_runs_in_queue(tenant_id=tenant_id)
        assert flow_run_id in flow_runs
Example #20
0
 def test_scheduled_states_with_past_start_time(self):
     state = Scheduled(
         start_time=pendulum.now("utc") - datetime.timedelta(minutes=1)
     )
     assert (
         FlowRunner(flow=Flow(name="test")).check_flow_reached_start_time(
             state=state
         )
         is state
     )
Example #21
0
    def test_mapped_task_can_be_scheduled(self, executor):

        with Flow(name="test") as flow:
            res = ReturnTask().map([0, 0])

        state = FlowRunner(flow=flow).run(
            return_tasks=[res],
            executor=executor,
            task_states={res: Scheduled(start_time=pendulum.now().subtract(minutes=1))},
        )
        assert state.is_successful()
Example #22
0
    async def test_future_flow_runs_are_not_retrieved(
        self, flow_run_id,
    ):

        await models.FlowRun.where({"id": {"_neq": flow_run_id}}).delete()
        await states.set_flow_run_state(
            flow_run_id=flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").add(days=1)),
        )

        assert not await runs.get_runs_in_queue()
Example #23
0
    def test_mapped_task_can_be_scheduled_for_future(self, executor):

        with Flow(name="test") as flow:
            res = ReturnTask().map([0, 0])

        state = FlowRunner(flow=flow).run(
            return_tasks=[res],
            executor=executor,
            task_states={res: Scheduled(start_time=pendulum.now().add(hours=1))},
        )
        assert state.is_running()
        assert isinstance(state.result[res], Scheduled)
Example #24
0
class TestCheckFlowPendingOrRunning:
    @pytest.mark.parametrize("state", [Pending(), Running(), Retrying(), Scheduled()])
    def test_pending_or_running_are_ok(self, state):
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
        assert new_state is state

    @pytest.mark.parametrize("state", [Finished(), Success(), Failed(), Skipped()])
    def test_not_pending_or_running_raise_endrun(self, state):
        flow = Flow(name="test", tasks=[Task()])
        with pytest.raises(ENDRUN):
            FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
Example #25
0
    def test_creates_subprocess_correctly(self, cloud_mocks, mocks,
                                          include_local_env):
        # Returned a scheduled flow run to start
        cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled()
        # Return a finished flow run after the first iteration
        cloud_mocks.FlowRunView().get_latest().state = Success()

        execute_flow_run_in_subprocess("flow-run-id",
                                       include_local_env=include_local_env)

        # Should pass the correct flow run id to wait for
        mocks.wait_for_flow_run_start_time.assert_called_once_with(
            "flow-run-id")

        # Merge the starting env and the env generated for a flow run
        base_env = os.environ.copy() if include_local_env else {}
        generated_env = {
            "PREFECT__CLOUD__SEND_FLOW_RUN_LOGS":
            "True",
            "PREFECT__LOGGING__LEVEL":
            "INFO",
            "PREFECT__LOGGING__FORMAT":
            "[%(asctime)s] %(levelname)s - %(name)s | %(message)s",
            "PREFECT__LOGGING__DATEFMT":
            "%Y-%m-%d %H:%M:%S%z",
            "PREFECT__BACKEND":
            "cloud",
            "PREFECT__CLOUD__API":
            "https://api.prefect.io",
            "PREFECT__CLOUD__TENANT_ID":
            "",
            "PREFECT__CLOUD__API_KEY":
            cloud_mocks.Client().api_key,
            "PREFECT__CONTEXT__FLOW_RUN_ID":
            "flow-run-id",
            "PREFECT__CONTEXT__FLOW_ID":
            cloud_mocks.FlowRunView.from_flow_run_id().flow_id,
            "PREFECT__ENGINE__FLOW_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudFlowRunner",
            "PREFECT__ENGINE__TASK_RUNNER__DEFAULT_CLASS":
            "prefect.engine.cloud.CloudTaskRunner",
        }
        expected_env = {**base_env, **generated_env}

        # Calls the correct command w/ environment variables
        mocks.subprocess.run.assert_called_once_with(
            [sys.executable, "-m", "prefect", "execute", "flow-run"],
            env=expected_env,
        )

        # Return code is checked
        mocks.subprocess.run().check_returncode.assert_called_once()
Example #26
0
    async def test_get_runs_in_queue(
        self,
        run_query,
        flow_run_id,
    ):
        await api.states.set_flow_run_state(
            flow_run_id=flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )

        result = await run_query(query=self.mutation,
                                 variables=dict(input=dict()))
        assert flow_run_id in result.data.get_runs_in_queue.flow_run_ids
Example #27
0
    def test_loops_until_flow_run_is_finished(self, cloud_mocks, mocks):
        cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled()
        cloud_mocks.FlowRunView.from_flow_run_id().get_latest.side_effect = [
            MagicMock(state=Running()),
            MagicMock(state=Running()),
            MagicMock(state=Success()),
        ]

        execute_flow_run_in_subprocess("flow-run-id")

        # Ran the subprocess twice
        assert mocks.subprocess.run.call_count == 2
        # Waited each time
        assert mocks.wait_for_flow_run_start_time.call_count == 2
Example #28
0
    def test_handles_bad_subprocess_result(self, cloud_mocks, mocks):
        cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled()
        mocks.subprocess.run.return_value.check_returncode.side_effect = (
            CalledProcessError(cmd="foo", returncode=1))

        # Re-raised as `RuntmeError`
        with pytest.raises(RuntimeError, match="flow run process failed"):
            execute_flow_run_in_subprocess("flow-run-id")

        # Only tried to run once
        mocks.subprocess.run.assert_called_once()

        # Flow run is not failed at this time -- left to the FlowRunner
        mocks.fail_flow_run.assert_not_called()
Example #29
0
    def test_handles_signal_interrupt(self, cloud_mocks, mocks):
        cloud_mocks.FlowRunView.from_flow_run_id().state = Scheduled()
        mocks.subprocess.run.side_effect = KeyboardInterrupt()

        # Keyboard interrupt should be re-raised
        with pytest.raises(KeyboardInterrupt):
            execute_flow_run_in_subprocess("flow-run-id")

        # Only tried to run once
        mocks.subprocess.run.assert_called_once()

        # Flow run is failed with the proper message
        mocks.fail_flow_run.assert_called_once_with(
            flow_run_id="flow-run-id",
            message="Flow run received an interrupt signal.")
Example #30
0
    async def test_getting_a_flow_run_from_queue_doesnt_dequeue_it(
            self, flow_run_id, tenant_id):

        await api.states.set_flow_run_state(
            flow_run_id=flow_run_id,
            state=Scheduled(start_time=pendulum.now("utc").subtract(days=1)),
        )

        # retrieve api.runs multiple times
        flow_runs1 = await api.runs.get_runs_in_queue(tenant_id=tenant_id)
        flow_runs2 = await api.runs.get_runs_in_queue(tenant_id=tenant_id)
        flow_runs3 = await api.runs.get_runs_in_queue(tenant_id=tenant_id)

        for frs in [flow_runs1, flow_runs2, flow_runs3]:
            assert flow_run_id in frs