Ejemplo n.º 1
0
def test_flow_runner_runs_flow_with_2_dependent_tasks_and_first_task_fails_and_second_has_trigger():
    flow = Flow(name="test")
    task1 = ErrorTask()
    task2 = SuccessTask(trigger=prefect.triggers.all_failed)

    flow.add_edge(task1, task2)

    flow_state = FlowRunner(flow=flow).run(return_tasks=[task1, task2])
    assert isinstance(
        flow_state, Success
    )  # flow state is determined by terminal states
    assert isinstance(flow_state.result[task1], Failed)
    assert isinstance(flow_state.result[task2], Success)
Ejemplo n.º 2
0
def test_flow_runner_runs_basic_flow_with_2_dependent_tasks_and_first_task_fails_with_FAIL(
):
    flow = Flow(name="test")
    task1 = RaiseFailTask()
    task2 = SuccessTask()

    flow.add_edge(task1, task2)

    flow_state = FlowRunner(flow=flow).run(return_tasks=[task1, task2])
    assert isinstance(flow_state, Failed)
    assert isinstance(flow_state.result[task1], Failed)
    assert not isinstance(flow_state.result[task1], TriggerFailed)
    assert isinstance(flow_state.result[task2], TriggerFailed)
Ejemplo n.º 3
0
def test_all_pipeline_method_steps_are_called():

    pipeline = [
        "initialize_run",
        "check_flow_is_pending_or_running",
        "set_flow_to_running",
        "get_flow_run_state",
    ]

    runner = FlowRunner(Flow(name="test"))

    for method in pipeline:
        setattr(runner, method, MagicMock())

    # initialize run is unpacked, which MagicMocks dont support
    runner.initialize_run = MagicMock(return_value=FlowRunnerInitializeResult(
        MagicMock(), MagicMock(), MagicMock(), MagicMock()))

    runner.run()

    for method in pipeline:
        assert getattr(runner, method).call_count == 1
Ejemplo n.º 4
0
def test_parameters_are_placed_into_context_including_defaults():
    @prefect.task
    def whats_in_ctx():
        return prefect.context.parameters

    y = prefect.Parameter("y", default=99)
    z = prefect.Parameter("z", default=19)
    flow = Flow(name="test", tasks=[y, z, whats_in_ctx])
    flow_state = FlowRunner(flow=flow).run(
        return_tasks=[whats_in_ctx], parameters=dict(y=42)
    )
    assert isinstance(flow_state, Success)
    assert flow_state.result[whats_in_ctx].result == dict(y=42, z=19)
Ejemplo n.º 5
0
    def test_initialize_sets_task_contexts(self):
        t1 = Task(name="t1")
        t2 = Parameter(name="x")
        flow = Flow(name="test", tasks=[t1, t2])

        result = FlowRunner(flow).initialize_run(state=Pending(),
                                                 task_states={},
                                                 context={},
                                                 task_contexts={},
                                                 parameters={})
        assert result.task_contexts == {
            t: dict(task_name=t.name, task_slug=flow.slugs[t])
            for t in flow.tasks
        }
Ejemplo n.º 6
0
    def test_manual_only_trigger_caches_inputs(self, executor):
        with Flow(name="test") as f:
            x = Parameter("x")
            inp = SuccessTask()
            t = AddTask(trigger=manual_only)
            res = t(x, inp)

        first_state = FlowRunner(flow=f).run(
            executor=executor, parameters=dict(x=11), return_tasks=f.tasks
        )
        assert first_state.is_running()

        first_state.result.update(
            {res: Resume(cached_inputs=first_state.result[res].cached_inputs)}
        )
        second_state = FlowRunner(flow=f).run(
            executor=executor,
            parameters=dict(x=1),
            return_tasks=[res],
            task_states=first_state.result,
        )
        assert isinstance(second_state, Success)
        assert second_state.result[res].result == 12
Ejemplo n.º 7
0
    def test_retries_ignore_cached_inputs_if_upstream_results_are_available(
        self, executor
    ):
        with Flow(name="test") as f:
            a = CountTask()
            b = ReturnTask(max_retries=1, retry_delay=datetime.timedelta(0))
            a_res = a()
            b_res = b(a_res)

        first_state = FlowRunner(flow=f).run(executor=executor, return_tasks=f.tasks)
        assert first_state.is_running()

        a_state = first_state.result[a_res]
        a_state.result = 100  # modify the result
        b_state = first_state.result[b_res]
        b_state.cached_inputs = dict(x=Result(2))  # artificially alter state

        with raise_on_exception():  # without caching we'd expect a KeyError
            second_state = FlowRunner(flow=f).run(
                executor=executor, return_tasks=[b_res], task_states=first_state.result
            )
        assert isinstance(second_state, Success)
        assert second_state.result[b_res].result == 1 / 99
Ejemplo n.º 8
0
    def test_retries_use_cached_inputs(self, executor):
        with Flow(name="test") as f:
            a = CountTask()
            b = ReturnTask(max_retries=1, retry_delay=datetime.timedelta(0))
            a_res = a()
            b_res = b(a_res)

        first_state = FlowRunner(flow=f).run(executor=executor, return_tasks=f.tasks)
        assert first_state.is_running()

        a_state = first_state.result[a_res]
        a_state.result = (
            NoResult
        )  # remove the result to see if the cached results are picked up
        b_state = first_state.result[b_res]
        b_state.cached_inputs = dict(x=Result(2))  # artificially alter state

        with raise_on_exception():  # without caching we'd expect a KeyError
            second_state = FlowRunner(flow=f).run(
                executor=executor, return_tasks=[b_res], task_states=first_state.result
            )
        assert isinstance(second_state, Success)
        assert second_state.result[b_res].result == 1
Ejemplo n.º 9
0
 def test_determine_final_state_preserves_running_states_when_tasks_still_running(
     self,
 ):
     task = Task()
     flow = Flow(name="test", tasks=[task])
     old_state = Running()
     new_state = FlowRunner(flow=flow).get_flow_run_state(
         state=old_state,
         task_states={task: Retrying(start_time=pendulum.now("utc").add(days=1))},
         task_contexts={},
         return_tasks=set(),
         task_runner_state_handlers=[],
         executor=LocalExecutor(),
     )
     assert new_state is old_state
Ejemplo n.º 10
0
    def test_parameter_precedance(self):
        x = Parameter(name="x")
        flow = Flow(name="test", tasks=[x])

        result = FlowRunner(flow).initialize_run(
            state=Pending(),
            task_states={},
            context={"parameters": {
                "x": 2,
                "y": 1
            }},
            task_contexts={},
            parameters={"x": 1},
        )
        assert result.context["parameters"] == {"x": 1, "y": 1}
Ejemplo n.º 11
0
    def test_pause_task_doesnt_pause_sometimes(self):
        class OneTask(Task):
            def run(self):
                tasks.pause_task()
                return 1

        class AddTask(Task):
            def run(self, x, y):
                if x == y:
                    tasks.pause_task()
                return x + y

        with Flow(name="test") as f:
            t1 = AddTask()(1, 1)
            t2 = OneTask()(upstream_tasks=[t1])

        res = FlowRunner(flow=f).run(task_states={t1: Resume()}, return_tasks=[t1, t2])
        assert res.result[t1].is_successful()
        assert isinstance(res.result[t2], Paused)
Ejemplo n.º 12
0
def test_secrets_dynamically_pull_from_context():
    flow = Flow(name="test")
    task1 = PrefectSecret("foo", max_retries=1, retry_delay=datetime.timedelta(0))

    flow.add_task(task1)

    flow_state = FlowRunner(flow=flow).run(return_tasks=[task1])
    assert flow_state.is_running()
    assert flow_state.result[task1].is_retrying()

    with prefect.context(secrets=dict(foo=42)):
        time.sleep(1)
        flow_state = FlowRunner(flow=flow).run(task_states=flow_state.result)

    assert flow_state.is_successful()
Ejemplo n.º 13
0
 def test_not_pending_or_running_raise_endrun(self, state):
     flow = Flow(name="test", tasks=[Task()])
     with pytest.raises(ENDRUN):
         FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
Ejemplo n.º 14
0
def test_flow_runner_has_logger():
    r = FlowRunner(Flow(name="test"))
    assert r.logger.name == "prefect.FlowRunner"
Ejemplo n.º 15
0
 def test_task_runner_handlers_are_called(self):
     FlowRunner(flow=Flow(name="test"),
                state_handlers=[flow_runner_handler]).run()
     # the flow changed state twice: Pending -> Running -> Success
     assert handler_results["FlowRunner"] == 2
Ejemplo n.º 16
0
 def test_multiple_flow_handlers_are_called(self):
     flow = Flow(name="test", state_handlers=[flow_handler, flow_handler])
     FlowRunner(flow=flow).run()
     # each flow changed state twice: Pending -> Running -> Success
     assert handler_results["Flow"] == 4
Ejemplo n.º 17
0
 def test_flow_on_failure_is_not_called(self):
     on_failure = MagicMock()
     flow = Flow(name="test", on_failure=on_failure, tasks=[Task()])
     FlowRunner(flow=flow).run()
     assert not on_failure.called
Ejemplo n.º 18
0
def test_parameters_can_be_set_in_context_if_none_passed():
    x = prefect.Parameter("x")
    f = FlowRunner(Flow(name="test", tasks=[x]))
    state = f.run(parameters={}, context={"parameters": {"x": 5}}, return_tasks=[x])
    assert state.result[x].result == 5
Ejemplo n.º 19
0
 def test_running_stays_running(self):
     state = Running()
     flow = Flow(name="test", tasks=[Task()])
     new_state = FlowRunner(flow=flow).set_flow_to_running(state=state)
     assert new_state.is_running()
Ejemplo n.º 20
0
 def test_other_states_raise_endrun(self, state):
     flow = Flow(name="test", tasks=[Task()])
     with pytest.raises(ENDRUN):
         FlowRunner(flow=flow).set_flow_to_running(state=state)
Ejemplo n.º 21
0
 def test_scheduled_states_with_past_start_time(self):
     state = Scheduled(start_time=pendulum.now("utc") -
                       datetime.timedelta(minutes=1))
     assert (FlowRunner(flow=Flow(
         name="test")).check_flow_reached_start_time(state=state) is state)
Ejemplo n.º 22
0
 def test_pending_becomes_running(self, state):
     flow = Flow(name="test", tasks=[Task()])
     new_state = FlowRunner(flow=flow).set_flow_to_running(state=state)
     assert new_state.is_running()
Ejemplo n.º 23
0
 def test_scheduled_states_without_start_time(self):
     state = Scheduled(start_time=None)
     assert (FlowRunner(flow=Flow(
         name="test")).check_flow_reached_start_time(state=state) is state)
Ejemplo n.º 24
0
 def test_initialize_sets_none_to_pending(self):
     result = FlowRunner(Flow(name="test")).initialize_run(
         state=None, task_states={}, context={}, task_contexts={}, parameters={}
     )
     assert result.state.is_pending()
#  where: 0 and 3 - Lexical Matching
#             1 - Spatial (Relative to screen)
#             2 - Spatial (Relative to other elements)

with Flow("Running the Transformers for Pair Classification") as flow1:
    with tags("train"):
        train_input = prepare_rico_task(train_path,
                                        type_instructions=INSTRUCTION_TYPE)
        train_dataset = prepare_rico_layout_lm_task(train_input["data"])
    with tags("dev"):
        dev_input = prepare_rico_task(dev_path,
                                      type_instructions=INSTRUCTION_TYPE)
        dev_dataset = prepare_rico_layout_lm_task(dev_input["data"])
    with tags("test"):
        test_input = prepare_rico_task(test_path,
                                       type_instructions=INSTRUCTION_TYPE)
        test_dataset = prepare_rico_layout_lm_task(test_input["data"])
    layout_lm_trainer_task(
        train_dataset=train_dataset,
        dev_dataset=dev_dataset,
        test_dataset=test_dataset,
        mapping_dev=dev_input["mapping"],
        mapping_test=test_input["mapping"],
        task_name="layout_lm_pair_rico",
        output_dir="./cache/layout_lm_pair_rico/",
        mode="train",
        eval_fn=pair_evaluation,
    )

FlowRunner(flow=flow1).run()
Ejemplo n.º 26
0
 def test_pending_or_running_are_ok(self, state):
     flow = Flow(name="test", tasks=[Task()])
     new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running(
         state=state)
     assert new_state is state
Ejemplo n.º 27
0
 def test_initialize_returns_state_if_provided(self, state):
     result = FlowRunner(Flow(name="test")).initialize_run(
         state=state, task_states={}, context={}, task_contexts={}, parameters={}
     )
     assert result.state is state
Ejemplo n.º 28
0
def test_task_map_with_no_upstream_results_and_a_mapped_state(executor):
    """
    This test makes sure that mapped tasks properly generate children tasks even when
    run multiple times and without available upstream results. In this test, we run the pipeline
    from a variety of starting points, ensuring that some upstream results are unavailable and
    checking that children pipelines are properly regenerated.
    """
    @prefect.task
    def numbers():
        return [1, 2, 3]

    @prefect.task
    def plus_one(x):
        return x + 1

    @prefect.task
    def get_sum(x):
        return sum(x)

    with Flow(name="test") as f:
        n = numbers()
        x = plus_one.map(n)
        y = plus_one.map(x)
        s = get_sum(y)

    # first run with a missing result from `n` but map_states for `x`
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[
                Pending(cached_inputs={"x": Result(i)}) for i in range(1, 4)
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12

    # next run with missing results for n and x
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[Success(), Success(),
                               Success()]),
            y:
            Mapped(map_states=[
                Success(result=3),
                Success(result=4),
                Retrying(cached_inputs={"x": Result(4)}),
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12

    # next run with missing results for n, x, and y
    state = FlowRunner(flow=f).run(
        executor=executor,
        task_states={
            n:
            Success(),
            x:
            Mapped(map_states=[Success(), Success(),
                               Success()]),
            y:
            Mapped(map_states=[
                Success(result=3),
                Success(result=4),
                Success(result=5)
            ]),
        },
        return_tasks=f.tasks,
    )

    assert state.is_successful()
    assert state.result[s].result == 12
Ejemplo n.º 29
0
 def test_non_scheduled_states(self, state):
     assert (FlowRunner(flow=Flow(
         name="test")).check_flow_reached_start_time(state=state) is state)