Beispiel #1
0
def test_starting_at_arbitrary_loop_index_from_cloud_context(client):
    @prefect.task
    def looper(x):
        if prefect.context.get("task_loop_count", 1) < 20:
            raise LOOP(result=prefect.context.get("task_loop_result", 0) + x)
        return prefect.context.get("task_loop_result", 0) + x

    @prefect.task
    def downstream(l):
        return l**2

    with prefect.Flow(name="looping", result=PrefectResult()) as f:
        inter = looper(10)
        final = downstream(inter)

    client.get_flow_run_info = MagicMock(return_value=MagicMock(
        context={"task_loop_count": 20}))

    flow_state = CloudFlowRunner(flow=f).run(return_tasks=[inter, final])

    assert flow_state.is_successful()
    assert flow_state.result[inter].result == 10
    assert flow_state.result[final].result == 100
Beispiel #2
0
def test_simple_two_task_flow_with_final_task_already_running(
        monkeypatch, executor):

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())

    with prefect.Flow(name="test") as flow:
        t1 = prefect.Task()
        t2 = prefect.Task()
        t2.set_upstream(t1)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id),
            TaskRun(
                id=task_run_id_2,
                task_id=t2.id,
                version=1,
                flow_run_id=flow_run_id,
                state=Running(),
            ),
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks,
                                               executor=executor)

    assert state.is_running()
    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_successful()
    assert client.task_runs[task_run_id_1].version == 2
    assert client.task_runs[task_run_id_2].state.is_running()
    assert client.task_runs[task_run_id_2].version == 1
Beispiel #3
0
def test_flow_runner_retries_forever_on_queued_state(client, monkeypatch,
                                                     num_attempts):

    mock_sleep = MagicMock()
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.time.sleep",
                        mock_sleep)

    run_states = [
        Queued(start_time=pendulum.now("UTC").add(seconds=i))
        for i in range(num_attempts - 1)
    ]
    run_states.append(Success())

    mock_run = MagicMock(side_effect=run_states)

    client.get_flow_run_info = MagicMock(
        side_effect=[MagicMock(version=i) for i in range(num_attempts)])

    # Mock out the actual flow execution
    monkeypatch.setattr("prefect.engine.cloud.flow_runner.FlowRunner.run",
                        mock_run)

    @prefect.task
    def return_one():
        return 1

    with prefect.Flow("test-cloud-flow-runner-with-queues") as flow:
        one = return_one()

    # Without these (actual, not mocked) sleep calls, when running full test suite this
    # test can fail for no reason.
    final_state = CloudFlowRunner(flow=flow).run()
    assert final_state.is_successful()

    assert mock_run.call_count == num_attempts
    # Not called on the initial run attempt
    assert client.get_flow_run_info.call_count == num_attempts - 1
Beispiel #4
0
def test_scheduled_start_time_is_in_context(monkeypatch, executor):
    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())

    flow = prefect.Flow(name="test", tasks=[whats_the_time])

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1,
                    task_id=whats_the_time.id,
                    flow_run_id=flow_run_id)
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks,
                                               executor=executor)

    assert state.is_successful()
    assert client.flow_runs[flow_run_id].state.is_successful()
    assert client.task_runs[task_run_id_1].state.is_successful()
    assert isinstance(state.result[whats_the_time].result, datetime.datetime)
def test_flow_runner_heartbeat_sets_command(monkeypatch, setting_available):
    client = MagicMock()
    monkeypatch.setattr(
        "prefect.engine.cloud.flow_runner.Client", MagicMock(return_value=client)
    )

    client.graphql.return_value.data.flow_run_by_pk.flow.settings = (
        dict(heartbeat_enabled=True) if setting_available else {}
    )

    runner = CloudFlowRunner(flow=prefect.Flow(name="test"))
    with prefect.context(flow_run_id="foo"):
        res = runner._heartbeat()

    assert res is True
    assert runner.heartbeat_cmd == [
        sys.executable,
        "-m",
        "prefect",
        "heartbeat",
        "flow-run",
        "-i",
        "foo",
    ]
Beispiel #6
0
def test_simple_map(monkeypatch):

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())

    with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow:
        t1 = plus_one.map([0, 1, 2])

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id)
        ]
        + [
            TaskRun(
                id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id
            )
            for t in flow.tasks
            if t is not t1
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(
            return_tasks=flow.tasks, executor=LocalExecutor()
        )

    assert state.is_successful()
    assert client.flow_runs[flow_run_id].state.is_successful()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    # there should be a total of 4 task runs corresponding to the mapped task
    assert (
        len([tr for tr in client.task_runs.values() if tr.task_slug == flow.slugs[t1]])
        == 4
    )
Beispiel #7
0
def test_task_runner_cls_is_cloud_task_runner():
    fr = CloudFlowRunner(flow=prefect.Flow(name="test"))
    assert fr.task_runner_cls is CloudTaskRunner
Beispiel #8
0
def test_deep_map_with_a_retry(monkeypatch):
    """
    Creates a situation in which a deeply-mapped Flow encounters a one-time error in one
    of the middle layers. Running the flow a second time should resolve the error.

    DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test") as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(seconds=0)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_id=t1.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2, task_id=t2.id, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_3, task_id=t3.id, flow_run_id=flow_run_id),
        ] + [
            TaskRun(id=t.id, task_id=t.id, flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run()

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert len([
            tr for tr in client.task_runs.values() if tr.task_id == t.id
        ]) == 4

    # t2's first child task should be retrying
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t2.id and tr.map_index == 0)
    assert isinstance(t2_0.state, Retrying)

    # t3's first child task should be pending
    t3_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t3.id and tr.map_index == 0)
    assert t3_0.state.is_pending()

    # RUN A SECOND TIME
    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run()

    # t2's first child task should be successful
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t2.id and tr.map_index == 0)
    assert t2_0.state.is_successful()

    # t3's first child task should be successful
    t3_0 = next(tr for tr in client.task_runs.values()
                if tr.task_id == t3.id and tr.map_index == 0)
    assert t3_0.state.is_successful()
Beispiel #9
0
def test_deep_map_with_a_retry(monkeypatch):
    """
    Creates a situation in which a deeply-mapped Flow encounters a one-time error in one
    of the middle layers. Running the flow a second time should resolve the error.

    DOES NOT WORK WITH DASK EXECUTORS because of the need for shared state on second run
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(seconds=100)

    monkeypatch.setattr("requests.Session", MagicMock())
    monkeypatch.setattr("requests.post", MagicMock())

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1, task_slug=t1.slug, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2, task_slug=t2.slug, flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_3, task_slug=t3.slug, flow_run_id=flow_run_id),
        ]
        + [
            TaskRun(id=str(uuid.uuid4()), task_slug=t.slug, flow_run_id=flow_run_id)
            for t in flow.tasks
            if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert (
            len([tr for tr in client.task_runs.values() if tr.task_slug == t.slug]) == 4
        )

    # t2's first child task should be retrying
    t2_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t2.slug and tr.map_index == 0
    )
    assert isinstance(t2_0.state, Retrying)

    # t3's first child task should be pending
    t3_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t3.slug and tr.map_index == 0
    )
    assert t3_0.state.is_pending()

    # RUN A SECOND TIME with an artificially updated start time
    failed_id = [
        t_id
        for t_id, tr in client.task_runs.items()
        if tr.task_slug == t2.slug and tr.map_index == 0
    ].pop()
    client.task_runs[failed_id].state.start_time = pendulum.now("UTC")

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    # t2's first child task should be successful
    t2_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t2.slug and tr.map_index == 0
    )
    assert t2_0.state.is_successful()

    # t3's first child task should be successful
    t3_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == t3.slug and tr.map_index == 0
    )
    assert t3_0.state.is_successful()
def test_non_keyed_states_are_hydrated_correctly_with_retries(
        monkeypatch, tmpdir):
    """
    Ensures that retries longer than 10 minutes properly "hydrate" upstream states
    so that mapped tasks retry correctly - for mapped tasks, even non-data dependencies
    can affect the number of children spawned.
    """
    @prefect.task
    def return_list():
        return [1, 2, 3]

    @prefect.task(max_retries=1, retry_delay=datetime.timedelta(minutes=20))
    def fail_once():
        if prefect.context.get("task_run_count", 0) < 2:
            raise SyntaxError("bad")
        else:
            return 100

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())

    with prefect.Flow(name="test-retries",
                      result=LocalResult(dir=tmpdir)) as flow:
        t1 = fail_once.map(upstream_tasks=[return_list])

    monkeypatch.setattr("requests.Session", MagicMock())
    monkeypatch.setattr("requests.post", MagicMock())

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1,
                    task_slug=flow.slugs[t1],
                    flow_run_id=flow_run_id),
            TaskRun(
                id=task_run_id_2,
                task_slug=flow.slugs[return_list],
                flow_run_id=flow_run_id,
            ),
        ] + [
            TaskRun(id=str(uuid.uuid4()),
                    task_slug=flow.slugs[t],
                    flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, return_list]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_successful()

    # there should be a total of 4 task runs corresponding to each mapped task
    assert (len([
        tr for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t1]
    ]) == 4)

    # t1's first child task should be retrying
    assert all([
        isinstance(tr.state, Retrying) for tr in client.task_runs.values()
        if (tr.task_slug == flow.slugs[t1] and tr.map_index != -1)
    ])

    # RUN A SECOND TIME with an artificially updated start time
    # and remove all in-memory data
    for idx, tr in client.task_runs.items():
        if tr.task_slug == flow.slugs[t1] and tr.map_index != -1:
            tr.state.start_time = pendulum.now("UTC")

    for idx, tr in client.task_runs.items():
        tr.state._result.value = None

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert (len([
        tr for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t1]
    ]) == 4)
    assert all(tr.state.is_successful() for tr in client.task_runs.values())
def test_states_are_hydrated_correctly_with_retries(monkeypatch, tmpdir):
    """
    Ensures that retries longer than 10 minutes properly "hydrate" upstream states
    so that mapped tasks retry correctly.
    """

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())

    with prefect.Flow(name="test-retries",
                      result=LocalResult(dir=tmpdir)) as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)

    t2.max_retries = 1
    t2.retry_delay = datetime.timedelta(minutes=100)

    monkeypatch.setattr("requests.Session", MagicMock())
    monkeypatch.setattr("requests.post", MagicMock())

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(id=task_run_id_1,
                    task_slug=flow.slugs[t1],
                    flow_run_id=flow_run_id),
            TaskRun(id=task_run_id_2,
                    task_slug=flow.slugs[t2],
                    flow_run_id=flow_run_id),
        ] + [
            TaskRun(id=str(uuid.uuid4()),
                    task_slug=flow.slugs[t],
                    flow_run_id=flow_run_id)
            for t in flow.tasks if t not in [t1, t2]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    assert client.flow_runs[flow_run_id].state.is_running()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2]:
        assert (len([
            tr for tr in client.task_runs.values()
            if tr.task_slug == flow.slugs[t]
        ]) == 4)

    # t2's first child task should be retrying
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_slug == flow.slugs[t2] and tr.map_index == 0)
    assert isinstance(t2_0.state, Retrying)

    # RUN A SECOND TIME with an artificially updated start time
    # and remove all in-memory data
    failed_id = [
        t_id for t_id, tr in client.task_runs.items()
        if tr.task_slug == flow.slugs[t2] and tr.map_index == 0
    ].pop()
    client.task_runs[failed_id].state.start_time = pendulum.now("UTC")

    for idx, tr in client.task_runs.items():
        tr.state._result.value = None

    with prefect.context(flow_run_id=flow_run_id):
        CloudFlowRunner(flow=flow).run(executor=LocalExecutor())

    # t2's first child task should be successful
    t2_0 = next(tr for tr in client.task_runs.values()
                if tr.task_slug == flow.slugs[t2] and tr.map_index == 0)
    assert t2_0.state.is_successful()
Beispiel #12
0
def test_deep_map_with_a_failure(monkeypatch, executor):

    flow_run_id = str(uuid.uuid4())
    task_run_id_1 = str(uuid.uuid4())
    task_run_id_2 = str(uuid.uuid4())
    task_run_id_3 = str(uuid.uuid4())

    with prefect.Flow(name="test", result_handler=JSONResultHandler()) as flow:
        t1 = plus_one.map([-1, 0, 1])
        t2 = invert_fail_once.map(t1)
        t3 = plus_one.map(t2)

    client = MockedCloudClient(
        flow_runs=[FlowRun(id=flow_run_id)],
        task_runs=[
            TaskRun(
                id=task_run_id_1, task_slug=flow.slugs[t1], flow_run_id=flow_run_id
            ),
            TaskRun(
                id=task_run_id_2, task_slug=flow.slugs[t2], flow_run_id=flow_run_id
            ),
            TaskRun(
                id=task_run_id_3, task_slug=flow.slugs[t3], flow_run_id=flow_run_id
            ),
        ]
        + [
            TaskRun(
                id=str(uuid.uuid4()), task_slug=flow.slugs[t], flow_run_id=flow_run_id
            )
            for t in flow.tasks
            if t not in [t1, t2, t3]
        ],
        monkeypatch=monkeypatch,
    )

    with prefect.context(flow_run_id=flow_run_id):
        state = CloudFlowRunner(flow=flow).run(return_tasks=flow.tasks)

    assert state.is_failed()
    assert client.flow_runs[flow_run_id].state.is_failed()
    assert client.task_runs[task_run_id_1].state.is_mapped()
    assert client.task_runs[task_run_id_2].state.is_mapped()
    assert client.task_runs[task_run_id_3].state.is_mapped()

    # there should be a total of 4 task runs corresponding to each mapped task
    for t in [t1, t2, t3]:
        assert (
            len(
                [
                    tr
                    for tr in client.task_runs.values()
                    if tr.task_slug == flow.slugs[t]
                ]
            )
            == 4
        )

    # t2's first child task should have failed
    t2_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t2] and tr.map_index == 0
    )
    assert t2_0.state.is_failed()

    # t3's first child task should have failed
    t3_0 = next(
        tr
        for tr in client.task_runs.values()
        if tr.task_slug == flow.slugs[t3] and tr.map_index == 0
    )
    assert t3_0.state.is_failed()