Example #1
0
def test_simple_two_task_flow_with_final_task_already_running(monkeypatch, executor):

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())

    with prefect.Flow(name="test") as flow:
        t1 = prefect.Task()
        t2 = prefect.Task()
        t2.set_upstream(t1)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(
                id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id
            ),
            TaskRun(
                id=task_run_id_2,
                task_slug=flow.slugs[t2],
                version=1,
                flow_run_id=flow_run_id,
                state=Running(),
            ),
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(
            return_tasks=flow.tasks, executor=executor
        )

    assert state.is_running()
    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_successful()
    assert client.task_runs[task_run_id_1].version == 2
    assert client.task_runs[task_run_id_2].state.is_running()
    assert client.task_runs[task_run_id_2].version == 1
Example #2
0
def test_simple_three_task_flow_with_one_failing_task(monkeypatch, executor):
    @prefect.task
    def error():
        1 / 0

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test") as flow:
        t1 = prefect.Task()
        t2 = prefect.Task()
        t3 = error()
        t2.set_upstream(t1)
        t3.set_upstream(t2)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id),
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks,
                                               executor=executor)

    assert state.is_failed()
    assert client.flow_runs[flow_run_id].state.is_failed()
    assert client.task_runs[task_run_id_1].state.is_successful()
    assert client.task_runs[task_run_id_1].version == 2
    assert client.task_runs[task_run_id_2].state.is_successful()
    assert client.task_runs[task_run_id_2].version == 2
    assert client.task_runs[task_run_id_3].state.is_failed()
    assert client.task_runs[task_run_id_2].version == 2
def test_flow_runner_heartbeat_sets_command(monkeypatch, setting_available):
    client = MagicMock()
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client",
                        MagicMock(return_value=client))

    client.graphql.return_value.data.flow_run_by_pk.flow.settings = (dict(
        heartbeat_enabled=True) if setting_available else {})

    runner = CloudFlowRunner(flow=prefect.Flow(name="test"))
    with prefect.context(flow_run_id="foo"):
        res = runner._heartbeat()

    assert res is True
    assert runner.heartbeat_cmd == [
        sys.executable,
        "-m",
        "prefect",
        "heartbeat",
        "flow-run",
        "-i",
        "foo",
    ]
def test_flow_runner_calls_client_the_appropriate_number_of_times(client):
    flow = prefect.Flow(name="test")

    res = CloudFlowRunner(flow=flow).run()

    ## assertions
    assert client.get_flow_run_info.call_count == 2  # initial state & cancel check
    assert client.set_flow_run_state.call_count == 2  # Pending -> Running -> Success

    states = [
        call[1]["state"] for call in client.set_flow_run_state.call_args_list
    ]
    assert states == [Running(), Success(result={})]
Example #5
0
def test_task_failure_caches_inputs_automatically(client):
    @prefect.task(max_retries=2, retry_delay=timedelta(seconds=100))
    def is_p_three(p):
        if p == 3:
            raise ValueError("No thank you.")

    with prefect.Flow("test") as f:
        p = prefect.Parameter("p")
        res = is_p_three(p)

    state = CloudFlowRunner(flow=f).run(return_tasks=[res],
                                        parameters=dict(p=3))
    assert state.is_running()
    assert isinstance(state.result[res], Retrying)
    exp_res = Result(3, result_handler=JSONResultHandler())
    assert not state.result[res].cached_inputs["p"] == exp_res
    exp_res.store_safe_value()
    assert state.result[res].cached_inputs["p"] == exp_res

    last_state = client.set_task_run_state.call_args_list[-1][-1]["state"]
    assert isinstance(last_state, Retrying)
    assert last_state.cached_inputs["p"] == exp_res
    def do_mocked_run(self,
                      client,
                      monkeypatch,
                      n_attempts=None,
                      n_queries=None,
                      query_end_state=None):
        """Mock out a cloud flow run that starts in a queued state and either
        succeeds or exits early due to a state change."""
        mock_sleep = MagicMock()

        def run(*args, **kwargs):
            if n_attempts is None or mock_run.call_count < n_attempts:
                info = get_flow_run_info()
                if info.state.is_queued():
                    return Queued(start_time=pendulum.now("UTC").add(
                        seconds=self.queue_time))
                return info.state
            return Success()

        mock_run = MagicMock(side_effect=run)

        def get_flow_run_info(*args, **kwargs):
            if n_queries is None or mock_get_flow_run_info.call_count < n_queries:
                state = Queued()
            else:
                state = query_end_state
            return MagicMock(version=mock_get_flow_run_info.call_count,
                             state=state)

        mock_get_flow_run_info = MagicMock(side_effect=get_flow_run_info)

        client.get_flow_run_info = mock_get_flow_run_info
        monkeypatch.setattr("prefect.engine.cloud.flow_runner.FlowRunner.run",
                            mock_run)
        monkeypatch.setattr("prefect.engine.cloud.flow_runner.time_sleep",
                            mock_sleep)

        @prefect.task
        def return_one():
            return 1

        with prefect.Flow("test-cloud-flow-runner-with-queues") as flow:
            return_one()

        with set_temporary_config({
                "cloud.check_cancellation_interval":
                self.check_cancellation_interval
        }):
            state = CloudFlowRunner(flow=flow).run()
        return state, mock_sleep, mock_run
Example #7
0
def test_starting_at_arbitrary_loop_index_from_cloud_context(client):
    @prefect.task
    def looper(x):
        if prefect.context.get("task_loop_count", 1) < 20:
            raise LOOP(result=prefect.context.get("task_loop_result", 0) + x)
        return prefect.context.get("task_loop_result", 0) + x

    @prefect.task
    def downstream(l):
        return l**2

    with prefect.Flow(name="looping", result_handler=JSONResultHandler()) as f:
        inter = looper(10)
        final = downstream(inter)

    client.get_flow_run_info = MagicMock(return_value=MagicMock(
        context={"task_loop_count": 20}))

    flow_state = CloudFlowRunner(flow=f).run(return_tasks=[inter, final])

    assert flow_state.is_successful()
    assert flow_state.result[inter].result == 10
    assert flow_state.result[final].result == 100
Example #8
0
def test_flow_runner_retries_forever_on_queued_state(client, monkeypatch,
                                                     num_attempts):

    mock_sleep = MagicMock()
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.time.sleep",
                        mock_sleep)

    run_states = [
        Queued(start_time=pendulum.now("UTC").add(seconds=i))
        for i in range(num_attempts - 1)
    ]
    run_states.append(Success())

    mock_run = MagicMock(side_effect=run_states)

    client.get_flow_run_info = MagicMock(
        side_effect=[MagicMock(version=i) for i in range(num_attempts)])

    # Mock out the actual flow execution
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.FlowRunner.run",
                        mock_run)

    @prefect.task
    def return_one():
        return 1

    with prefect.Flow("test-cloud-flow-runner-with-queues") as flow:
        one = return_one()

    # Without these (actual, not mocked) sleep calls, when running full test suite this
    # test can fail for no reason.
    final_state = CloudFlowRunner(flow=flow).run()
    assert final_state.is_successful()

    assert mock_run.call_count == num_attempts
    # Not called on the initial run attempt
    assert client.get_flow_run_info.call_count == num_attempts - 1
Example #9
0
def test_scheduled_start_time_is_in_context(monkeypatch, executor):
    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())

    flow = prefect.Flow(name="test", tasks=[whats_the_time])

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1,
                    task_id=whats_the_time.id,
                    flow_run_id=flow_run_id)
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks,
                                               executor=executor)

    assert state.is_successful()
    assert client.flow_runs[flow_run_id].state.is_successful()
    assert client.task_runs[task_run_id_1].state.is_successful()
    assert isinstance(state.result[whats_the_time].result, datetime.datetime)
Example #10
0
def test_simple_map(monkeypatch):

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())

    with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow:
        t1 = plus_one.map([0, 1, 2])

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id)
        ]
        + [
            TaskRun(
                id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id
            )
            for t in flow.tasks
            if t is not t1
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(
            return_tasks=flow.tasks, executor=LocalExecutor()
        )

    assert state.is_successful()
    assert client.flow_runs[flow_run_id].state.is_successful()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    # there should be a total of 4 task runs corresponding to the mapped task
    assert (
        len([tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1]])
        == 4
    )
Example #11
0
def test_flow_runner_loads_context_from_cloud(monkeypatch):
    flow = prefect.Flow(name="test")
    get_flow_run_info = MagicMock(return_value=MagicMock(context={"a": 1}))
    set_flow_run_state = MagicMock()
    client = MagicMock(
        get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state
    )
    monkeypatch.setattr(
        "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)
    )
    res = CloudFlowRunner(flow=flow).initialize_run(
        state=Pending(), task_states={}, context={}, task_contexts={}, parameters={}
    )

    assert res.context["a"] == 1
Example #12
0
def test_flow_runner_respects_the_db_state(monkeypatch, state):
    flow = prefect.Flow(name="test")
    db_state = state("already", result=10)
    get_flow_run_info = MagicMock(return_value=MagicMock(state=db_state))
    set_flow_run_state = MagicMock()
    client = MagicMock(get_flow_run_info=get_flow_run_info,
                       set_flow_run_state=set_flow_run_state)
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client",
                        MagicMock(return_value=client))
    res = CloudFlowRunner(flow=flow).run()

    ## assertions
    assert get_flow_run_info.call_count == 1  # one time to pull latest state
    assert set_flow_run_state.call_count == 0  # never needs to update state
    assert res == db_state
Example #13
0
def test_flow_runner_raises_endrun_with_correct_state_if_client_cant_retrieve_state(
    monkeypatch, ):
    flow = prefect.Flow(name="test")
    get_flow_run_info = MagicMock(side_effect=SyntaxError)
    set_flow_run_state = MagicMock()
    client = MagicMock(get_flow_run_info=get_flow_run_info,
                       set_flow_run_state=set_flow_run_state)
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client",
                        MagicMock(return_value=client))

    ## if ENDRUN is raised, res will be last state seen
    state = Pending("unique message", result=22)
    res = CloudFlowRunner(flow=flow).run(state=state)
    assert get_flow_run_info.called
    assert res is state
Example #14
0
def test_flow_runner_prioritizes_user_context_over_default_context(monkeypatch):
    flow = prefect.Flow(name="test")
    get_flow_run_info = MagicMock(return_value=MagicMock(context={"today": "is a day"}))
    set_flow_run_state = MagicMock()
    client = MagicMock(
        get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state
    )
    monkeypatch.setattr(
        "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)
    )
    res = CloudFlowRunner(flow=flow).initialize_run(
        state=None, task_states={}, context={}, task_contexts={}, parameters={}
    )

    assert "today" in res.context
    assert res.context["today"] == "is a day"
def test_flow_runner_initializes_context_from_cloud(monkeypatch):
    from prefect.client.client import FlowRunInfoResult, ProjectInfo

    flow = prefect.Flow(name="test")
    scheduled_start_time = pendulum.parse("19860920")
    get_flow_run_info = MagicMock(
        return_value=FlowRunInfoResult(
            id="my-flow-run-id",
            name="my-flow-run-name",
            flow_id="my-flow-id",
            version=1,
            task_runs=[],
            state=Pending(),
            scheduled_start_time=scheduled_start_time,
            project=ProjectInfo(id="my-project-id", name="my-project-name"),
            parameters={"p1": 1, "p2": 2},
            context={"c1": 1, "c2": 2},
        )
    )
    client = MagicMock(
        get_flow_run_info=get_flow_run_info, set_flow_run_state=MagicMock()
    )
    monkeypatch.setattr(
        "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)
    )
    res = CloudFlowRunner(flow=flow).initialize_run(
        state=Pending(),
        task_states={},
        context={"c2": "two", "c3": 3},
        task_contexts={},
        parameters={"p2": "two", "p3": 3},
    )

    assert res.context["flow_id"] == "my-flow-id"
    assert res.context["flow_run_id"] == "my-flow-run-id"
    assert res.context["flow_run_version"] == 1
    assert res.context["flow_run_name"] == "my-flow-run-name"
    assert res.context["scheduled_start_time"] == scheduled_start_time
    assert res.context["project_name"] == "my-project-name"
    assert res.context["project_id"] == "my-project-id"

    # Explicitly provided parameters override those in cloud
    assert res.context["parameters"] == {"p1": 1, "p2": "two", "p3": 3}
    # Explicitly provided context overridden by cloud
    assert res.context["c1"] == 1
    assert res.context["c2"] == 2
    assert res.context["c3"] == 3
Example #16
0
def test_flow_runner_prioritizes_kwarg_states_over_db_states(
        monkeypatch, state):
    flow = prefect.Flow(name="test")
    db_state = state("already", result=10)
    get_flow_run_info = MagicMock(return_value=MagicMock(state=db_state))
    set_flow_run_state = MagicMock()
    client = MagicMock(get_flow_run_info=get_flow_run_info,
                       set_flow_run_state=set_flow_run_state)
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client",
                        MagicMock(return_value=client))
    res = CloudFlowRunner(flow=flow).run(state=Pending("let's do this"))

    ## assertions
    assert get_flow_run_info.call_count == 1  # one time to pull latest state
    assert set_flow_run_state.call_count == 2  # Pending -> Running -> Success

    states = [call[1]["state"] for call in set_flow_run_state.call_args_list]
    assert states == [Running(), Success(result={})]
Example #17
0
def test_flow_runner_puts_scheduled_start_time_in_context(monkeypatch):
    flow = prefect.Flow(name="test")
    date = pendulum.parse("19860920")
    get_flow_run_info = MagicMock(
        return_value=MagicMock(context={}, scheduled_start_time=date)
    )
    set_flow_run_state = MagicMock()
    client = MagicMock(
        get_flow_run_info=get_flow_run_info, set_flow_run_state=set_flow_run_state
    )
    monkeypatch.setattr(
        "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)
    )
    res = CloudFlowRunner(flow=flow).initialize_run(
        state=None, task_states={}, context={}, task_contexts={}, parameters={}
    )

    assert "scheduled_start_time" in res.context
    assert isinstance(res.context["scheduled_start_time"], datetime.datetime)
    assert res.context["scheduled_start_time"].strftime("%Y-%m-%d") == "1986-09-20"
Example #18
0
def test_flow_runner_puts_flow_run_name_in_context(monkeypatch):
    flow = prefect.Flow(name="test")

    # we can't pass a `name` argument to a mock
    # https://docs.python.org/3/library/unittest.mock.html#mock-names-and-the-name-attribute
    info_mock = MagicMock(context={})
    info_mock.name = "flow run name"
    get_flow_run_info = MagicMock(return_value=info_mock)
    set_flow_run_state = MagicMock()
    client = MagicMock(get_flow_run_info=get_flow_run_info,
                       set_flow_run_state=set_flow_run_state)
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.Client",
                        MagicMock(return_value=client))
    res = CloudFlowRunner(flow=flow).initialize_run(state=None,
                                                    task_states={},
                                                    context={},
                                                    task_contexts={},
                                                    parameters={})

    assert res.context["flow_run_name"] == "flow run name"
    def test_check_interrupt_loop_robust_to_api_errors(self, client,
                                                       monkeypatch):
        trigger = threading.Event()

        error_was_raised = False

        def get_flow_run_info(*args, _call_count=itertools.count(), **kwargs):
            call_count = next(_call_count)
            import inspect

            caller_name = inspect.currentframe().f_back.f_code.co_name
            if caller_name == "interrupt_if_cancelling" and call_count % 2:
                nonlocal error_was_raised
                error_was_raised = True
                raise ValueError("Woops!")
            state = Cancelling() if trigger.is_set() else Running()
            return MagicMock(version=call_count, state=state)

        client.get_flow_run_info = get_flow_run_info

        ran_longer_than_expected = False

        @prefect.task
        def set_trigger(x):
            trigger.set()
            time.sleep(10)
            nonlocal ran_longer_than_expected
            ran_longer_than_expected = True
            return x + 1

        with prefect.Flow("test") as flow:
            set_trigger(1)

        with set_temporary_config({"cloud.check_cancellation_interval": 0.1}):
            res = CloudFlowRunner(flow=flow).run()

        assert isinstance(res, Cancelled)
        assert error_was_raised
        assert not ran_longer_than_expected
Example #20
0
def test_db_cancelled_states_interrupt_flow_run(client, monkeypatch):
    calls = dict(count=0)

    def heartbeat_counter(*args, **kwargs):
        if calls["count"] == 3:
            return Box(dict(data=dict(flow_run_by_pk=dict(state="Cancelled"))))
        calls["count"] += 1
        return Box(dict(data=dict(flow_run_by_pk=dict(state="Running"))))

    client.graphql = heartbeat_counter

    @prefect.task
    def sleeper():
        time.sleep(3)

    f = prefect.Flow("test", tasks=[sleeper])

    with set_temporary_config({"cloud.heartbeat_interval": 0.025}):
        state = CloudFlowRunner(flow=f).run(return_tasks=[sleeper])

    assert isinstance(state, Cancelled)
    assert "interrupt" in state.message.lower()
Example #21
0
def test_task_failure_with_upstream_secrets_doesnt_store_secret_value_and_recompute_if_necessary(
    client, ):
    @prefect.task(max_retries=2, retry_delay=timedelta(seconds=100))
    def is_p_three(p):
        if p == 3:
            raise ValueError("No thank you.")
        return p

    with prefect.Flow("test", result_handler=JSONResultHandler()) as f:
        p = prefect.tasks.secrets.Secret("p")
        res = is_p_three(p)

    with prefect.context(secrets=dict(p=3)):
        state = CloudFlowRunner(flow=f).run(return_tasks=[res])

    assert state.is_running()
    assert isinstance(state.result[res], Retrying)

    exp_res = Result(3, result_handler=SecretResultHandler(p))
    assert not state.result[res].cached_inputs["p"] == exp_res
    exp_res.store_safe_value()
    assert state.result[res].cached_inputs["p"] == exp_res

    ## here we set the result of the secret to a saferesult, ensuring
    ## it will get converted to a "true" result;
    ## we expect that the upstream value will actually get recomputed from context
    ## through the SecretResultHandler
    safe = SafeResult("p", result_handler=SecretResultHandler(p))
    state.result[p] = Success(result=safe)
    state.result[res].start_time = pendulum.now("utc")
    state.result[res].cached_inputs = dict(p=safe)

    with prefect.context(secrets=dict(p=4)):
        new_state = CloudFlowRunner(flow=f).run(return_tasks=[res],
                                                task_states=state.result)

    assert new_state.is_successful()
    assert new_state.result[res].result == 4
Example #22
0
def test_non_keyed_states_are_hydrated_correctly_with_retries(
        monkeypatch, tmpdir):
    """
    Ensures that retries longer than 10 minutes properly "hydrate" upstream states
    so that mapped tasks retry correctly - for mapped tasks, even non-data dependencies
    can affect the number of children spawned.
    """
    @prefect.task
    def return_list():
        return [1, 2, 3]

    @prefect.task(max_retries=1, retry_delay=datetime.timedelta(minutes=20))
    def fail_once():
        if prefect.context.get("task_run_count", 0) < 2:
            raise SyntaxError("bad")
        else:
            return 100

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())

    with prefect.Flow(name="test-retries",
                      result=LocalResult(dir=tmpdir)) as flow:
        t1 = fail_once.map(upstream_tasks=[return_list])

    monkeypatch.setattr("requests.Session", MagicMock())
    monkeypatch.setattr("requests.post", MagicMock())

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1,
                    task_slug=flow.slugs[t1],
                    flow_run_id=flow_run_id),
            TaskRun(
                id=task_run_id_2,
                task_slug=flow.slugs[return_list],
                flow_run_id=flow_run_id,
            ),
        ] + [
            TaskRun(id=str(uuid.uuid4()),
                    task_slug=flow.slugs[t],
                    flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, return_list]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_successful()

    # there should be a total of 4 task runs corresponding to each mapped task
    assert (len([
        tr for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t1]
    ]) == 4)

    # t1's first child task should be retrying
    assert all([
        isinstance(tr.state, Retrying) for tr in client.task_runs.values()
        if (tr.task_slug == flow.slugs[t1] and tr.map_index != -1)
    ])

    # RUN A SECOND TIME with an artificially updated start time
    # and remove all in-memory data
    for idx, tr in client.task_runs.items():
        if tr.task_slug == flow.slugs[t1] and tr.map_index != -1:
            tr.state.start_time = pendulum.now("UTC")

    for idx, tr in client.task_runs.items():
        tr.state._result.value = None

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert (len([
        tr for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t1]
    ]) == 4)
    assert all(tr.state.is_successful() for tr in client.task_runs.values())
Example #23
0
def test_deep_map_with_a_retry(monkeypatch):
    """
    Creates a situation in which a deeply-mapped Flow encounters a one-time error in one
    of the middle layers. Running the flow a second time should resolve the error.

    DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test") as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(seconds=0)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id),
        ] + [
            TaskRun(id=t.id, task_id=t.id, flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run()

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert len([
            tr for tr in client.task_runs.values() if tr.task_id == t.id
        ]) == 4

    # t2's first child task should be retrying
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t2.id and tr.map_index == 0)
    assert isinstance(t2_0.state, Retrying)

    # t3's first child task should be pending
    t3_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t3.id and tr.map_index == 0)
    assert t3_0.state.is_pending()

    # RUN A SECOND TIME
    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run()

    # t2's first child task should be successful
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t2.id and tr.map_index == 0)
    assert t2_0.state.is_successful()

    # t3's first child task should be successful
    t3_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t3.id and tr.map_index == 0)
    assert t3_0.state.is_successful()
Example #24
0
def test_deep_map_with_a_failure(monkeypatch, executor):

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(
                id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id
            ),
            TaskRun(
                id=task_run_id_2, task_slug=flow.slugs[t2], flow_run_id=flow_run_id
            ),
            TaskRun(
                id=task_run_id_3, task_slug=flow.slugs[t3], flow_run_id=flow_run_id
            ),
        ]
        + [
            TaskRun(
                id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id
            )
            for t in flow.tasks
            if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks)

    assert state.is_failed()
    assert client.flow_runs[flow_run_id].state.is_failed()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert (
            len(
                [
                    tr
                    for tr in client.task_runs.values()
                    if tr.task_slug == flow.slugs[t]
                ]
            )
            == 4
        )

    # t2's first child task should have failed
    t2_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t2] and tr.map_index == 0
    )
    assert t2_0.state.is_failed()

    # t3's first child task should have failed
    t3_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t3] and tr.map_index == 0
    )
    assert t3_0.state.is_failed()
Example #25
0
def test_deep_map_with_a_retry(monkeypatch):
    """
    Creates a situation in which a deeply-mapped Flow encounters a one-time error in one
    of the middle layers. Running the flow a second time should resolve the error.

    DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(seconds=100)

    monkeypatch.setattr("requests.Session", MagicMock())
    monkeypatch.setattr("requests.post", MagicMock())

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_slug=t1.slug, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2, task_slug=t2.slug, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_3, task_slug=t3.slug, flow_run_id=flow_run_id),
        ]
        + [
            TaskRun(id=str(uuid.uuid4()), task_slug=t.slug, flow_run_id=flow_run_id)
            for t in flow.tasks
            if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert (
            len([tr for tr in client.task_runs.values() if tr.task_slug == t.slug]) == 4
        )

    # t2's first child task should be retrying
    t2_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t2.slug and tr.map_index == 0
    )
    assert isinstance(t2_0.state, Retrying)

    # t3's first child task should be pending
    t3_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t3.slug and tr.map_index == 0
    )
    assert t3_0.state.is_pending()

    # RUN A SECOND TIME with an artificially updated start time
    failed_id = [
        t_id
        for t_id, tr in client.task_runs.items()
        if tr.task_slug == t2.slug and tr.map_index == 0
    ].pop()
    client.task_runs[failed_id].state.start_time = pendulum.now("UTC")

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    # t2's first child task should be successful
    t2_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t2.slug and tr.map_index == 0
    )
    assert t2_0.state.is_successful()

    # t3's first child task should be successful
    t3_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t3.slug and tr.map_index == 0
    )
    assert t3_0.state.is_successful()
Example #26
0
def test_task_runner_cls_is_cloud_task_runner():
    fr = CloudFlowRunner(flow=prefect.Flow(name="test"))
    assert fr.task_runner_cls is CloudTaskRunner
Example #27
0
def test_states_are_hydrated_correctly_with_retries(monkeypatch, tmpdir):
    """
    Ensures that retries longer than 10 minutes properly "hydrate" upstream states
    so that mapped tasks retry correctly.
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())

    with prefect.Flow(name="test-retries",
                      result=LocalResult(dir=tmpdir)) as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(minutes=100)

    monkeypatch.setattr("requests.Session", MagicMock())
    monkeypatch.setattr("requests.post", MagicMock())

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1,
                    task_slug=flow.slugs[t1],
                    flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2,
                    task_slug=flow.slugs[t2],
                    flow_run_id=flow_run_id),
        ] + [
            TaskRun(id=str(uuid.uuid4()),
                    task_slug=flow.slugs[t],
                    flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, t2]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2]:
        assert (len([
            tr for tr in client.task_runs.values()
            if tr.task_slug == flow.slugs[t]
        ]) == 4)

    # t2's first child task should be retrying
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_slug == flow.slugs[t2] and tr.map_index == 0)
    assert isinstance(t2_0.state, Retrying)

    # RUN A SECOND TIME with an artificially updated start time
    # and remove all in-memory data
    failed_id = [
        t_id for t_id, tr in client.task_runs.items()
        if tr.task_slug == flow.slugs[t2] and tr.map_index == 0
    ].pop()
    client.task_runs[failed_id].state.start_time = pendulum.now("UTC")

    for idx, tr in client.task_runs.items():
        tr.state._result.value = None

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    # t2's first child task should be successful
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_slug == flow.slugs[t2] and tr.map_index == 0)
    assert t2_0.state.is_successful()