Exemple #1
0
class TestCheckFlowPendingOrRunning:
    @pytest.mark.parametrize("state", [Pending(), Running(), Retrying(), Scheduled()])
    def test_pending_or_running_are_ok(self, state):
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
        assert new_state is state

    @pytest.mark.parametrize(
        "state", [Finished(), Success(), Failed(), Skipped(), State()]
    )
    def test_not_pending_or_running_raise_endrun(self, state):
        flow = Flow(name="test", tasks=[Task()])
        with pytest.raises(ENDRUN):
            FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
class TestRunFlowStep:
    def test_running_state_finishes(self):
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).get_flow_run_state(
            state=Running(),
            task_states={},
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state.is_successful()

    @pytest.mark.parametrize(
        "state",
        [Pending(),
         Retrying(),
         Finished(),
         Success(),
         Failed(),
         Skipped()])
    def test_other_states_raise_endrun(self, state):
        flow = Flow(name="test", tasks=[Task()])
        with pytest.raises(ENDRUN):
            FlowRunner(flow=flow).get_flow_run_state(
                state=state,
                task_states={},
                task_contexts={},
                return_tasks=set(),
                task_runner_state_handlers=[],
                executor=Executor(),
            )

    def test_determine_final_state_has_final_say(self):
        class MyFlowRunner(FlowRunner):
            def determine_final_state(self, *args, **kwargs):
                return Failed("Very specific error message")

        flow = Flow(name="test", tasks=[Task()])
        new_state = MyFlowRunner(flow=flow).get_flow_run_state(
            state=Running(),
            task_states={},
            task_contexts={},
            return_tasks=set(),
            task_runner_state_handlers=[],
            executor=LocalExecutor(),
        )
        assert new_state.is_failed()
        assert new_state.message == "Very specific error message"
Exemple #3
0
def load_tweets(creds, path):
    print(path)
    fh = FirehoseJob(creds,
                     PARQUET_SAMPLE_RATE_TIME_S=30,
                     save_to_neo=prefect.context.get('save_to_neo', False))
    cnt = 0
    data = []
    for arr in fh.process_id_file(path, job_name="500m_COVID-REHYDRATE"):
        data.append(arr.to_pandas())
        print('{}/{}'.format(len(data), len(arr)))
        cnt += len(arr)
        print('TOTAL: ' + str(cnt))
    data = pd.concat(data, ignore_index=True, sort=False)
    if len(data) == 0:
        raise ENDRUN(state=Skipped())
    return data
Exemple #4
0
class TestSetFlowToRunning:
    @pytest.mark.parametrize("state", [Pending(), Retrying()])
    def test_pending_becomes_running(self, state):
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).set_flow_to_running(state=state)
        assert new_state.is_running()

    def test_running_stays_running(self):
        state = Running()
        flow = Flow(name="test", tasks=[Task()])
        new_state = FlowRunner(flow=flow).set_flow_to_running(state=state)
        assert new_state.is_running()

    @pytest.mark.parametrize("state", [Finished(), Success(), Failed(), Skipped()])
    def test_other_states_raise_endrun(self, state):
        flow = Flow(name="test", tasks=[Task()])
        with pytest.raises(ENDRUN):
            FlowRunner(flow=flow).set_flow_to_running(state=state)
    def check_upstream_skipped(
        self, state: State, upstream_states: Dict[Edge, State]
    ) -> State:
        """
        Checks if any of the upstream tasks have skipped.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, State]): the upstream states

        Returns:
            - State: the state of the task after running the check
        """

        all_states = set()  # type: Set[State]
        for edge, upstream_state in upstream_states.items():

            # if the upstream state is Mapped, and this task is also mapped,
            # we want each individual child to determine if it should
            # skip or not based on its upstream parent in the mapping
            if isinstance(upstream_state, Mapped) and not edge.mapped:
                all_states.update(upstream_state.map_states)
            else:
                all_states.add(upstream_state)

        if self.task.skip_on_upstream_skip and any(s.is_skipped() for s in all_states):
            self.logger.debug(
                "Task '{name}': Upstream states were skipped; ending run.".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )
            raise ENDRUN(
                state=Skipped(
                    message=(
                        "Upstream task was skipped; if this was not the intended "
                        "behavior, consider changing `skip_on_upstream_skip=False` "
                        "for this task."
                    )
                )
            )
        return state
Exemple #6
0
    def check_upstream_skipped(
        self, state: State, upstream_states: Dict[Edge, State]
    ) -> State:
        """
        Checks if any of the upstream tasks have skipped.

        Args:
            - state (State): the current state of this task
            - upstream_states (Dict[Edge, State]): the upstream states

        Returns:
            - State: the state of the task after running the check
        """

        all_states = set()  # type: Set[State]
        for upstream_state in upstream_states.values():
            if isinstance(upstream_state, Mapped):
                all_states.update(upstream_state.map_states)
            else:
                all_states.add(upstream_state)

        if self.task.skip_on_upstream_skip and any(s.is_skipped() for s in all_states):
            self.logger.debug(
                "Task '{name}': Upstream states were skipped; ending run.".format(
                    name=prefect.context.get("task_full_name", self.task.name)
                )
            )
            raise ENDRUN(
                state=Skipped(
                    message=(
                        "Upstream task was skipped; if this was not the intended "
                        "behavior, consider changing `skip_on_upstream_skip=False` "
                        "for this task."
                    )
                )
            )
        return state
Exemple #7
0
def load_path():
    data_dirs = [
        'COVID-19-TweetIDs/2020-01', 'COVID-19-TweetIDs/2020-02',
        'COVID-19-TweetIDs/2020-03'
    ]

    timestamp = None
    if 'backfill_timestamp' in prefect.context:
        timestamp = arrow.get(prefect.context['backfill_timestamp'])
    else:
        timestamp = prefect.context['scheduled_start_time']
    print('TIMESTAMP = ', timestamp)
    suffix = timestamp.strftime('%Y-%m-%d-%H')

    for data_dir in data_dirs:
        if os.path.isdir(data_dir):
            for path in Path(data_dir).iterdir():
                if path.name.endswith('{}.txt'.format(suffix)):
                    print(path)
                    return str(path)
        else:
            print('WARNING: not a dir', data_dir)
    # TODO: (wzy) Figure out how to cancel this gracefully
    raise ENDRUN(state=Skipped())
Exemple #8
0
class TestFlowVisualize:
    def test_visualize_raises_informative_importerror_without_graphviz(
            self, monkeypatch):
        f = Flow(name="test")
        f.add_task(Task())

        with monkeypatch.context() as m:
            m.setattr(sys, "path", "")
            with pytest.raises(ImportError) as exc:
                f.visualize()

        assert "pip install prefect[viz]" in repr(exc.value)

    def test_viz_returns_graph_object_if_in_ipython(self):
        import graphviz

        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        with patch.dict("sys.modules", IPython=ipython):
            f = Flow(name="test")
            f.add_task(Task(name="a_nice_task"))
            graph = f.visualize()
        assert "label=a_nice_task" in graph.source
        assert "shape=ellipse" in graph.source

    def test_viz_reflects_mapping(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                res = AddTask(name="a_nice_task").map(
                    x=Task(name="a_list_task"), y=8)
            graph = f.visualize()
        assert 'label="a_nice_task <map>" shape=box' in graph.source
        assert "label=a_list_task shape=ellipse" in graph.source
        assert "label=x style=dashed" in graph.source
        assert "label=y style=dashed" in graph.source

    @pytest.mark.parametrize("state", [Success(), Failed(), Skipped()])
    def test_viz_if_flow_state_provided(self, state):
        import graphviz

        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        with patch.dict("sys.modules", IPython=ipython):
            t = Task(name="a_nice_task")
            f = Flow(name="test")
            f.add_task(t)
            graph = f.visualize(flow_state=Success(result={t: state}))
        assert "label=a_nice_task" in graph.source
        assert 'color="' + state.color + '80"' in graph.source
        assert "shape=ellipse" in graph.source

    def test_viz_reflects_mapping_if_flow_state_provided(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        add = AddTask(name="a_nice_task")
        list_task = Task(name="a_list_task")

        map_state = Mapped(map_states=[Success(), Failed()])
        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                res = add.map(x=list_task, y=8)
            graph = f.visualize(flow_state=Success(result={
                res: map_state,
                list_task: Success()
            }))

        # one colored node for each mapped result
        assert 'label="a_nice_task <map>" color="#00800080"' in graph.source
        assert 'label="a_nice_task <map>" color="#FF000080"' in graph.source
        assert 'label=a_list_task color="#00800080"' in graph.source
        assert 'label=8 color="#00000080"' in graph.source

        # two edges for each input to add()
        for var in ["x", "y"]:
            for index in [0, 1]:
                assert "{0} [label={1} style=dashed]".format(
                    index, var) in graph.source

    def test_viz_reflects_multiple_mapping_if_flow_state_provided(self):
        ipython = MagicMock(
            get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True)))
        add = AddTask(name="a_nice_task")
        list_task = Task(name="a_list_task")

        map_state1 = Mapped(map_states=[Success(), TriggerFailed()])
        map_state2 = Mapped(map_states=[Success(), Failed()])

        with patch.dict("sys.modules", IPython=ipython):
            with Flow(name="test") as f:
                first_res = add.map(x=list_task, y=8)
                with pytest.warns(
                        UserWarning
                ):  # making a copy of a task with dependencies
                    res = first_res.map(x=first_res, y=9)
            graph = f.visualize(flow_state=Success(
                result={
                    res: map_state1,
                    list_task: Success(),
                    first_res: map_state2,
                }))

        assert "{first} -> {second} [label=x style=dashed]".format(
            first=str(id(first_res)) + "0", second=str(id(res)) + "0")
        assert "{first} -> {second} [label=x style=dashed]".format(
            first=str(id(first_res)) + "1", second=str(id(res)) + "1")

    @pytest.mark.parametrize(
        "error",
        [
            ImportError("abc"),
            ValueError("abc"),
            TypeError("abc"),
            NameError("abc"),
            AttributeError("abc"),
        ],
    )
    def test_viz_renders_if_ipython_isnt_installed_or_errors(self, error):
        graphviz = MagicMock()
        ipython = MagicMock(get_ipython=MagicMock(side_effect=error))
        with patch.dict("sys.modules", graphviz=graphviz, IPython=ipython):
            with Flow(name="test") as f:
                res = AddTask(name="a_nice_task").map(
                    x=Task(name="a_list_task"), y=8)
            f.visualize()
Exemple #9
0
             assert_true={"is_cached", "is_finished", "is_successful"}),
        dict(state=ClientFailed(), assert_true={"is_meta_state"}),
        dict(state=Failed(), assert_true={"is_finished", "is_failed"}),
        dict(state=Finished(), assert_true={"is_finished"}),
        dict(state=Looped(), assert_true={"is_finished", "is_looped"}),
        dict(state=Mapped(),
             assert_true={"is_finished", "is_mapped", "is_successful"}),
        dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Pending(), assert_true={"is_pending"}),
        dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}),
        dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Retrying(),
             assert_true={"is_pending", "is_scheduled", "is_retrying"}),
        dict(state=Running(), assert_true={"is_running"}),
        dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Skipped(),
             assert_true={"is_finished", "is_successful", "is_skipped"}),
        dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}),
        dict(state=Success(), assert_true={"is_finished", "is_successful"}),
        dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}),
        dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}),
    ],
)
def test_state_is_methods(state_check):
    """
    Iterates over all of the "is_*()" methods of the state, asserting that each one is
    False, unless the name of that method is provided as `assert_true`.

    For example, if `state_check == (Pending(), {'is_pending'})`, then this method will
    assert that `state.is_running()` is False, `state.is_successful()` is False, etc. but
    `state.is_pending()` is True.
Exemple #10
0
        dict(state=ClientFailed(), assert_true={"is_meta_state"}),
        dict(state=Failed(), assert_true={"is_finished", "is_failed"}),
        dict(state=Finished(), assert_true={"is_finished"}),
        dict(state=Looped(), assert_true={"is_finished", "is_looped"}),
        dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}),
        dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}),
        dict(state=Pending(), assert_true={"is_pending"}),
        dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}),
        dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}),
        dict(
            state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"}
        ),
        dict(state=Running(), assert_true={"is_running"}),
        dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}),
        dict(
            state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"}
        ),
        dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}),
        dict(state=Success(), assert_true={"is_finished", "is_successful"}),
        dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}),
        dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}),
    ],
)
def test_state_is_methods(state_check):
    """
    Iterates over all of the "is_*()" methods of the state, asserting that each one is
    False, unless the name of that method is provided as `assert_true`.

    For example, if `state_check == (Pending(), {'is_pending'})`, then this method will
    assert that `state.is_running()` is False, `state.is_successful()` is False, etc. but
    `state.is_pending()` is True.
Exemple #11
0
 def test_state_type_methods_with_skipped_state(self):
     state = Skipped()
     assert not state.is_cached()
     assert not state.is_pending()
     assert not state.is_retrying()
     assert not state.is_running()
     assert state.is_finished()
     assert state.is_skipped()
     assert not state.is_scheduled()
     assert state.is_successful()
     assert not state.is_failed()
     assert not state.is_mapped()
     assert not state.is_meta_state()