class TestCheckFlowPendingOrRunning: @pytest.mark.parametrize("state", [Pending(), Running(), Retrying(), Scheduled()]) def test_pending_or_running_are_ok(self, state): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state) assert new_state is state @pytest.mark.parametrize( "state", [Finished(), Success(), Failed(), Skipped(), State()] ) def test_not_pending_or_running_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).check_flow_is_pending_or_running(state=state)
class TestRunFlowStep: def test_running_state_finishes(self): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).get_flow_run_state( state=Running(), task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state.is_successful() @pytest.mark.parametrize( "state", [Pending(), Retrying(), Finished(), Success(), Failed(), Skipped()]) def test_other_states_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).get_flow_run_state( state=state, task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=Executor(), ) def test_determine_final_state_has_final_say(self): class MyFlowRunner(FlowRunner): def determine_final_state(self, *args, **kwargs): return Failed("Very specific error message") flow = Flow(name="test", tasks=[Task()]) new_state = MyFlowRunner(flow=flow).get_flow_run_state( state=Running(), task_states={}, task_contexts={}, return_tasks=set(), task_runner_state_handlers=[], executor=LocalExecutor(), ) assert new_state.is_failed() assert new_state.message == "Very specific error message"
def load_tweets(creds, path): print(path) fh = FirehoseJob(creds, PARQUET_SAMPLE_RATE_TIME_S=30, save_to_neo=prefect.context.get('save_to_neo', False)) cnt = 0 data = [] for arr in fh.process_id_file(path, job_name="500m_COVID-REHYDRATE"): data.append(arr.to_pandas()) print('{}/{}'.format(len(data), len(arr))) cnt += len(arr) print('TOTAL: ' + str(cnt)) data = pd.concat(data, ignore_index=True, sort=False) if len(data) == 0: raise ENDRUN(state=Skipped()) return data
class TestSetFlowToRunning: @pytest.mark.parametrize("state", [Pending(), Retrying()]) def test_pending_becomes_running(self, state): flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).set_flow_to_running(state=state) assert new_state.is_running() def test_running_stays_running(self): state = Running() flow = Flow(name="test", tasks=[Task()]) new_state = FlowRunner(flow=flow).set_flow_to_running(state=state) assert new_state.is_running() @pytest.mark.parametrize("state", [Finished(), Success(), Failed(), Skipped()]) def test_other_states_raise_endrun(self, state): flow = Flow(name="test", tasks=[Task()]) with pytest.raises(ENDRUN): FlowRunner(flow=flow).set_flow_to_running(state=state)
def check_upstream_skipped( self, state: State, upstream_states: Dict[Edge, State] ) -> State: """ Checks if any of the upstream tasks have skipped. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, State]): the upstream states Returns: - State: the state of the task after running the check """ all_states = set() # type: Set[State] for edge, upstream_state in upstream_states.items(): # if the upstream state is Mapped, and this task is also mapped, # we want each individual child to determine if it should # skip or not based on its upstream parent in the mapping if isinstance(upstream_state, Mapped) and not edge.mapped: all_states.update(upstream_state.map_states) else: all_states.add(upstream_state) if self.task.skip_on_upstream_skip and any(s.is_skipped() for s in all_states): self.logger.debug( "Task '{name}': Upstream states were skipped; ending run.".format( name=prefect.context.get("task_full_name", self.task.name) ) ) raise ENDRUN( state=Skipped( message=( "Upstream task was skipped; if this was not the intended " "behavior, consider changing `skip_on_upstream_skip=False` " "for this task." ) ) ) return state
def check_upstream_skipped( self, state: State, upstream_states: Dict[Edge, State] ) -> State: """ Checks if any of the upstream tasks have skipped. Args: - state (State): the current state of this task - upstream_states (Dict[Edge, State]): the upstream states Returns: - State: the state of the task after running the check """ all_states = set() # type: Set[State] for upstream_state in upstream_states.values(): if isinstance(upstream_state, Mapped): all_states.update(upstream_state.map_states) else: all_states.add(upstream_state) if self.task.skip_on_upstream_skip and any(s.is_skipped() for s in all_states): self.logger.debug( "Task '{name}': Upstream states were skipped; ending run.".format( name=prefect.context.get("task_full_name", self.task.name) ) ) raise ENDRUN( state=Skipped( message=( "Upstream task was skipped; if this was not the intended " "behavior, consider changing `skip_on_upstream_skip=False` " "for this task." ) ) ) return state
def load_path(): data_dirs = [ 'COVID-19-TweetIDs/2020-01', 'COVID-19-TweetIDs/2020-02', 'COVID-19-TweetIDs/2020-03' ] timestamp = None if 'backfill_timestamp' in prefect.context: timestamp = arrow.get(prefect.context['backfill_timestamp']) else: timestamp = prefect.context['scheduled_start_time'] print('TIMESTAMP = ', timestamp) suffix = timestamp.strftime('%Y-%m-%d-%H') for data_dir in data_dirs: if os.path.isdir(data_dir): for path in Path(data_dir).iterdir(): if path.name.endswith('{}.txt'.format(suffix)): print(path) return str(path) else: print('WARNING: not a dir', data_dir) # TODO: (wzy) Figure out how to cancel this gracefully raise ENDRUN(state=Skipped())
class TestFlowVisualize: def test_visualize_raises_informative_importerror_without_graphviz( self, monkeypatch): f = Flow(name="test") f.add_task(Task()) with monkeypatch.context() as m: m.setattr(sys, "path", "") with pytest.raises(ImportError) as exc: f.visualize() assert "pip install prefect[viz]" in repr(exc.value) def test_viz_returns_graph_object_if_in_ipython(self): import graphviz ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) with patch.dict("sys.modules", IPython=ipython): f = Flow(name="test") f.add_task(Task(name="a_nice_task")) graph = f.visualize() assert "label=a_nice_task" in graph.source assert "shape=ellipse" in graph.source def test_viz_reflects_mapping(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: res = AddTask(name="a_nice_task").map( x=Task(name="a_list_task"), y=8) graph = f.visualize() assert 'label="a_nice_task <map>" shape=box' in graph.source assert "label=a_list_task shape=ellipse" in graph.source assert "label=x style=dashed" in graph.source assert "label=y style=dashed" in graph.source @pytest.mark.parametrize("state", [Success(), Failed(), Skipped()]) def test_viz_if_flow_state_provided(self, state): import graphviz ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) with patch.dict("sys.modules", IPython=ipython): t = Task(name="a_nice_task") f = Flow(name="test") f.add_task(t) graph = f.visualize(flow_state=Success(result={t: state})) assert "label=a_nice_task" in graph.source assert 'color="' + state.color + '80"' in graph.source assert "shape=ellipse" in graph.source def test_viz_reflects_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: res = add.map(x=list_task, y=8) graph = f.visualize(flow_state=Success(result={ res: map_state, list_task: Success() })) # one colored node for each mapped result assert 'label="a_nice_task <map>" color="#00800080"' in graph.source assert 'label="a_nice_task <map>" color="#FF000080"' in graph.source assert 'label=a_list_task color="#00800080"' in graph.source assert 'label=8 color="#00000080"' in graph.source # two edges for each input to add() for var in ["x", "y"]: for index in [0, 1]: assert "{0} [label={1} style=dashed]".format( index, var) in graph.source def test_viz_reflects_multiple_mapping_if_flow_state_provided(self): ipython = MagicMock( get_ipython=lambda: MagicMock(config=dict(IPKernelApp=True))) add = AddTask(name="a_nice_task") list_task = Task(name="a_list_task") map_state1 = Mapped(map_states=[Success(), TriggerFailed()]) map_state2 = Mapped(map_states=[Success(), Failed()]) with patch.dict("sys.modules", IPython=ipython): with Flow(name="test") as f: first_res = add.map(x=list_task, y=8) with pytest.warns( UserWarning ): # making a copy of a task with dependencies res = first_res.map(x=first_res, y=9) graph = f.visualize(flow_state=Success( result={ res: map_state1, list_task: Success(), first_res: map_state2, })) assert "{first} -> {second} [label=x style=dashed]".format( first=str(id(first_res)) + "0", second=str(id(res)) + "0") assert "{first} -> {second} [label=x style=dashed]".format( first=str(id(first_res)) + "1", second=str(id(res)) + "1") @pytest.mark.parametrize( "error", [ ImportError("abc"), ValueError("abc"), TypeError("abc"), NameError("abc"), AttributeError("abc"), ], ) def test_viz_renders_if_ipython_isnt_installed_or_errors(self, error): graphviz = MagicMock() ipython = MagicMock(get_ipython=MagicMock(side_effect=error)) with patch.dict("sys.modules", graphviz=graphviz, IPython=ipython): with Flow(name="test") as f: res = AddTask(name="a_nice_task").map( x=Task(name="a_list_task"), y=8) f.visualize()
assert_true={"is_cached", "is_finished", "is_successful"}), dict(state=ClientFailed(), assert_true={"is_meta_state"}), dict(state=Failed(), assert_true={"is_finished", "is_failed"}), dict(state=Finished(), assert_true={"is_finished"}), dict(state=Looped(), assert_true={"is_finished", "is_looped"}), dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}), dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}), dict(state=Pending(), assert_true={"is_pending"}), dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}), dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}), dict(state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"}), dict(state=Running(), assert_true={"is_running"}), dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}), dict(state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"}), dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}), dict(state=Success(), assert_true={"is_finished", "is_successful"}), dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}), dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}), ], ) def test_state_is_methods(state_check): """ Iterates over all of the "is_*()" methods of the state, asserting that each one is False, unless the name of that method is provided as `assert_true`. For example, if `state_check == (Pending(), {'is_pending'})`, then this method will assert that `state.is_running()` is False, `state.is_successful()` is False, etc. but `state.is_pending()` is True.
dict(state=ClientFailed(), assert_true={"is_meta_state"}), dict(state=Failed(), assert_true={"is_finished", "is_failed"}), dict(state=Finished(), assert_true={"is_finished"}), dict(state=Looped(), assert_true={"is_finished", "is_looped"}), dict(state=Mapped(), assert_true={"is_finished", "is_mapped", "is_successful"}), dict(state=Paused(), assert_true={"is_pending", "is_scheduled"}), dict(state=Pending(), assert_true={"is_pending"}), dict(state=Queued(), assert_true={"is_meta_state", "is_queued"}), dict(state=Resume(), assert_true={"is_pending", "is_scheduled"}), dict( state=Retrying(), assert_true={"is_pending", "is_scheduled", "is_retrying"} ), dict(state=Running(), assert_true={"is_running"}), dict(state=Scheduled(), assert_true={"is_pending", "is_scheduled"}), dict( state=Skipped(), assert_true={"is_finished", "is_successful", "is_skipped"} ), dict(state=Submitted(), assert_true={"is_meta_state", "is_submitted"}), dict(state=Success(), assert_true={"is_finished", "is_successful"}), dict(state=TimedOut(), assert_true={"is_finished", "is_failed"}), dict(state=TriggerFailed(), assert_true={"is_finished", "is_failed"}), ], ) def test_state_is_methods(state_check): """ Iterates over all of the "is_*()" methods of the state, asserting that each one is False, unless the name of that method is provided as `assert_true`. For example, if `state_check == (Pending(), {'is_pending'})`, then this method will assert that `state.is_running()` is False, `state.is_successful()` is False, etc. but `state.is_pending()` is True.
def test_state_type_methods_with_skipped_state(self): state = Skipped() assert not state.is_cached() assert not state.is_pending() assert not state.is_retrying() assert not state.is_running() assert state.is_finished() assert state.is_skipped() assert not state.is_scheduled() assert state.is_successful() assert not state.is_failed() assert not state.is_mapped() assert not state.is_meta_state()