def test_flow_run_cancel(monkeypatch): client = MagicMock() client.cancel_flow_run = MagicMock(return_value=True) monkeypatch.setattr("prefect.tasks.prefect.flow_run_cancel.Client", MagicMock(return_value=client)) flow_cancel_task = CancelFlowRun(flow_run_id="id123") # Verify correct initialization assert flow_cancel_task.flow_run_id == "id123" # Verify client called with arguments flow = Flow("TestContext") flow.add_task(flow_cancel_task) flow.run() assert client.cancel_flow_run.called assert client.cancel_flow_run.call_args[0][0] == "id123"
def run_pipeline(flow: Flow, data_dir: str, output_dir: str, **kwargs) -> Optional[State]: """Run the pipeline. Parameters ---------- flow : Flow The generated flow. data_dir : str The directory containing the data. output_dir : str The output location for the data. **kwargs Additional parameters Returns ------- State The output of ``flow.run``. """ output = flow.run( parameters={ "data_dir": data_dir, "output_dir": output_dir, **kwargs }, executor=LocalDaskExecutor(scheduler="processes"), ) return output
def test_unix_step(unused_tcp_port): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() def _on_task_failure(task, state): raise Exception(state.message) with tmp(Path(SOURCE_DIR) / "test-data/local/prefect_test_case"): config = parse_config("config.yml") storage = storage_driver_factory(config=config.get("storage"), run_path=".") resource = storage.store("unix_test_script.py") jobs = [{ "id": "0", "name": "test_script", "executable": "unix_test_script.py", "args": ["vas"], }] stage_task = UnixStep( resources=[resource], outputs=["output.out"], job_list=jobs, iens=1, cmd="python3", url=url, step_id="step_id_0", stage_id="stage_id_0", ee_id="ee_id_0", on_failure=_on_task_failure, run_path=config.get("run_path"), storage_config=config.get("storage"), max_retries=1, retry_delay=timedelta(seconds=2), ) flow = Flow("testing") flow.add_task(stage_task) flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[stage_task] assert task_result.is_successful() assert flow_run.is_successful() assert len(task_result.result["outputs"]) == 1 expected_path = storage.get_storage_path(1) / "output.out" output_path = flow_run.result[stage_task].result["outputs"][0] assert expected_path == output_path assert output_path.exists()
def run_pipeline(flow: Flow, data_dir: str, output_dir: str, **kwargs) -> Optional[State]: """Run a pipeline. Parameters ---------- flow : Flow The generated flow. data_dir : str The directory containing the data. output_dir : str The output location for the data. **kwargs Parameter values. Returns ------- State The output of ``flow.run``. """ allparams = {param.name for param in flow.parameters()} params = {"data_dir": data_dir, "output_dir": output_dir, **kwargs} params = {key: value for key, value in params.items() if key in allparams} with prefect.context(data_dir=data_dir, output_dir=output_dir): output = flow.run(parameters=params) return output
def test_jinja_template_can_execute_python_code(): date = pendulum.parse("1986-09-20") task = JinjaTemplate(template='{{ date.strftime("%Y-%d") }} is a date.') f = Flow(name="test", tasks=[task]) res = f.run(context={"date": date}) assert res.is_successful() assert res.result[task].result == "1986-20 is a date."
def test_string_formatter_fails_in_expected_ways(): t1 = StringFormatter(template="{name} is from {place}") t2 = StringFormatter(template="{0} is from {1}") f = Flow(name="test", tasks=[t1, t2]) res = f.run() assert res.is_failed() assert isinstance(res.result[t1].result, KeyError) assert isinstance(res.result[t2].result, IndexError)
def execute(flow: Flow) -> state: """ Returns: state: (state) state of league flow """ with raise_on_exception(): executor = DaskExecutor(address=os.getenv("WORKER_ADDRESS")) state = flow.run() return state
def test_imperative_args_are_added_to_flow_before_mapping(self): # Check an edge case when mapping over tasks that haven't been added to flow yet. @tasks.task def data(): return range(3) def func(a, flow): return inc.copy().bind(a, flow=flow) flow = Flow("test") res = apply_map(func, data, flow=flow) state = flow.run() assert state.result[res].result == [1, 2, 3]
def test_merge_imperative_flow_checkpoint_false(): flow = Flow("test") cond = identity.copy().bind(True, flow=flow) with case(cond, True): a = inc.copy().bind(1, flow=flow) with case(cond, False): b = inc.copy().bind(2, flow=flow) c = merge(a, b, flow=flow, checkpoint=False) state = flow.run() assert c.checkpoint == False
def test_airflow_task_successfully_runs_a_task(self, airflow_settings): task = AirflowTask( db_conn=airflow_settings["db_conn"], task_id="also_run_this", dag_id="example_bash_operator", env=airflow_settings, ) flow = Flow(name="test single task", tasks=[task]) flow_state = flow.run() assert flow_state.is_successful() assert flow_state.result[task].is_successful() assert flow_state.result[task].result is None
def test_apply_map_simple(self, api): if api == "functional": def func(x, y, z): a = add(x, y) a.name = "add-a" b = add(a, z) b.name = "add-b" c = add(b, 1) c.name = "add-c" return inc(c) with Flow("test") as flow: y = ranged(3) z = edges.unmapped(1) res = apply_map(func, range(3, 6), y=y, z=z) else: def func(x, y, z, flow): a = add.copy(name="add-a").bind(x, y, flow=flow) b = add.copy(name="add-b").bind(a, z, flow=flow) c = add.copy(name="add-c").bind(b, 1, flow=flow) return inc.copy().bind(c, flow=flow) flow = Flow("test") y = ranged.copy().bind(3, flow=flow) z = edges.unmapped(tasks.as_task(1, flow=flow)) res = apply_map(func, range(3, 6), y=y, z=z, flow=flow) consts = {t.name: c for t, c in flow.constants.items()} assert consts == { "ranged": { "n": 3 }, "add-b": { "y": 1 }, "add-c": { "y": 1 } } for task in flow.tasks: if task.name != "ranged": for e in flow.edges_to(task): assert e.mapped state = flow.run() assert state.result[res].result == [6, 8, 10]
def test_merge_imperative_flow(): flow = Flow("test") cond = identity.copy().bind(True, flow=flow) with case(cond, True): a = inc.copy().bind(1, flow=flow) with case(cond, False): b = inc.copy().bind(2, flow=flow) c = merge(a, b, flow=flow) state = flow.run() assert state.result[cond].result is True assert state.result[a].result == 2 assert state.result[b].is_skipped() assert state.result[c].result == 2
def execute(flow: Flow, year: int, league_id: int, cookies: dict) -> state: """ Flow executor/runner (increases abstraction) Args: flow: (Flow) flow to be executed year: (int) - year in which to make requests league_id: (int) - league id in which to make requests cookies: (dict) - auth cookies Returns: league_state: (state) state of league flow """ with raise_on_exception(): executor = DaskExecutor(address=os.getenv("WORKER_ADDRESS")) league_state = flow.run(year=year, league_id=league_id, cookies=cookies, executor=executor) return league_state
def test_case_imperative_api(self, branch): flow = Flow("test") cond = identity.copy() a = identity.copy() b = inc.copy() c = identity.copy() d = inc.copy() cond.bind(branch, flow=flow) flow.add_task(cond) with case(cond, "a"): a.bind(1, flow=flow) b.bind(a, flow=flow) flow.add_task(a) flow.add_task(b) with case(cond, "b"): c.bind(3, flow=flow) d.bind(c, flow=flow) flow.add_task(c) flow.add_task(d) state = flow.run() if branch == "a": assert state.result[a].result == 1 assert state.result[b].result == 2 assert state.result[c].is_skipped() assert state.result[d].is_skipped() elif branch == "b": assert state.result[a].is_skipped() assert state.result[b].is_skipped() assert state.result[c].result == 3 assert state.result[d].result == 4 elif branch == "c": for t in [a, b, c, d]: assert state.result[t].is_skipped()
def test_apply_map_control_flow(self, api): if api == "functional": def func(x): with case(is_even(x), True): x2 = add(x, 1) return merge(x2, x) with Flow("test") as flow: res = apply_map(func, range(4)) else: def func(x, flow): cond = is_even.copy().bind(x, flow=flow) with case(cond, True): x2 = add.copy().bind(x, 1, flow=flow) return merge(x2, x, flow=flow) flow = Flow("test") res = apply_map(func, range(4), flow=flow) state = flow.run() assert state.result[res].result == [1, 1, 3, 3]
class OutputValue(Task): def run(self, n, multiple): print(n * multiple) flow = Flow("unmapped-values") total = Parameter("total") multiple = Parameter("multiple") numbers = GetNumbers() numbers.set_upstream(total, key="total", flow=flow) output_value = OutputValue() output_value.bind(mapped=True, n=numbers, multiple=unmapped(multiple), flow=flow) # with Flow("unmapped-values") as flow: # total = Parameter("total") # multiple = Parameter("multiple") # numbers = get_numbers(total) # output_value.map(numbers, multiple=unmapped(multiple)) flow.run(parameters={"total": 5, "multiple": 10})
username=username, dest_dir=my_project_name, metadata_items=metadata_items, flow=flow, ) # Initialize a git project. flow.add_task(git_init) git_init.set_upstream(pull_sfdc_code, flow=flow) git_init.bind(project_dir=my_project_name, flow=flow) # Add SFDX files to the project. flow.add_task(git_add) git_add.set_upstream(git_init, flow=flow) git_add.bind(project_dir=my_project_name, flow=flow) # TODO: use an edge to link the return values of one task to the input of another :D # changed_files = sfdx_commands.copy_changed_files_and_get_tests # flow.add_task(changed_files) # flow.add_edge(pull_sfdc_code, changed_files, key="pull_result") # Push them to remote if __name__ == "__main__": flow.run( username="******", project_name="SFDX_Project", metadata_items=["ApexClass"], )
def test_jinja_template_formats_from_context(): task = JinjaTemplate(template="I am {{ task_name }}", name="foo") f = Flow(name="test", tasks=[task]) res = f.run() assert res.is_successful() assert res.result[task].result == "I am foo"
class AddValue(Task): def run(self, v): return v + 10 class PrintValue(Task): def run(self, v): print(v) # with Flow("task-results") as flow: # v = get_value() # v_added = add_value(v) # p = print_value(v_added) flow = Flow("task-results") get_value = GetValue() add_value = AddValue() print_value = PrintValue() get_value.set_downstream(add_value, key="v", flow=flow) add_value.set_downstream(print_value, key="v", flow=flow) state = flow.run() assert state.result[get_value].result == 10 assert state.result[add_value].result == 20 assert state.result[print_value].result == None
from prefect import Flow, task def timestamper(task, old_state, new_state): """ Task state handler which timestamps new states and logs the duration between state changes using the task's logger. """ new_state.timestamp = pendulum.now("utc") if hasattr(old_state, "timestamp"): duration = (new_state.timestamp - old_state.timestamp).in_seconds() task.logger.info( "{} seconds passed in between state transitions".format(duration)) return new_state @task(state_handlers=[timestamper]) def sleeper(): time.sleep(2) f = Flow("log-task-duration", tasks=[sleeper]) f.run() # INFO - prefect.FlowRunner | Beginning Flow run for 'log-task-duration' # INFO - prefect.FlowRunner | Starting flow run. # INFO - prefect.TaskRunner | Task 'sleeper': Starting task run... # INFO - prefect.Task | 2 seconds passed in between state transitions # INFO - prefect.TaskRunner | Task 'sleeper': finished task run for task with final state: 'Success' # INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded
def run_predefined_flow(self): flow = Flow("Run a Prefect Flow in Docker") # 点咖啡 task # task_config_1 = { # 'task_type':'action', # 'task_name':'', # 'task_id':'', # 'coffee order id': '', # } # task_1 = Scp_Task('点咖啡', task_config_1) task_config_1 = { 'task_type': 'action', 'task_name': 'make coffee', 'task_id': '', } task_1 = Scp_Task(name='make coffee', **task_config_1) task_config_2 = { 'task_type': 'event', 'task_name': 'coffee finished', 'task_id': '', } # task_2 = Scp_Task.copy() task_2 = Scp_Event_Task(name='coffee finished', **task_config_2) task_config_3 = { 'task_type': 'action', 'task_name': 'send coffee', 'task_id': '', } task_3 = Scp_Task(name='send coffee', **task_config_3) # add tasks to the flow flow.add_task(Scp_StartEvent_Task()) flow.add_task(task_1) flow.add_task(task_2) flow.add_task(task_3) flow.add_task(Scp_EndEvent_Task()) # create non-data dependencies task_2.set_upstream(task_1, flow=flow) task_3.set_upstream(task_2, flow=flow) # task_4.set_upstream(task_3, flow=flow) # create data bindings task_input_1 = { 'msg': 'test_task_1', 'url': 'http://39.106.6.6:8080/SCIDE/SCManager?action=executeContract&contractID=CoffeeFDU&operation=postMakeCoffee&arg=%22%22', } task_input_3 = { 'msg': 'test_task_2', 'url': '', } task_1.bind(**task_input_1, flow=flow) task_3.bind(**task_input_3, flow=flow) # start flow state = flow.run()
from prefect import Flow from etl import extract, transform, load flow = Flow('ETL') flow.set_dependencies(transform, keyword_tasks=dict(data=extract)) flow.set_dependencies(load, keyword_tasks=dict(data=transform)) flow.run() # prints "Here's your data: [10, 20, 30]"
def execute_plan(self, plan: Flow, **kwargs): state = plan.run(**kwargs) return state
def test_string_formatter_formats_from_context(): task = StringFormatter(template="I am {task_name}", name="foo") f = Flow(name="test", tasks=[task]) res = f.run() assert res.is_successful() assert res.result[task].result == "I am foo"
class CommonFlow: def __init__(self, instance_id, **mongodb): self.flow = Flow("Run a Prefect Flow in Docker") self.instance_id = instance_id self.t_app_class = mongodb.get('t_app_class') self.t_app_instance = mongodb.get('t_app_instance') self.app_class_child_shapes = find_app_class_by_instance_id( self.t_app_instance, self.instance_id).get("app_class").get("childShapes") self.all_action_count = 0 self.get_all_action_count() def get_all_action_count(self): # 获取流程信息 for child_shape in self.app_class_child_shapes: stencil = child_shape.get("stencil").get("id") if stencil in [ "StartNoneEvent", "DefaultEvent", "SocialAction", "PhysicalAction", "CyberAction" ]: self.all_action_count += 1 def run_flow(self): t = Thread(target=self.new_thread, ) t.start() def new_thread(self): # 获取流程信息 self.get_flow(None) # 拼接流程 state = self.flow.run() print("执行完成") def get_flow(self, pre_task): if pre_task == None: # 从初始节点开始 for child_shape in self.app_class_child_shapes: stencil = child_shape.get("stencil").get("id") if stencil == 'StartNoneEvent': # 获取开始task task_config = { 'instance_id': self.instance_id, 't_app_class': self.t_app_class, 't_app_instance': self.t_app_instance, 'task_type': 'StartNoneEvent', 'task_name': 'StartNoneEvent', 'task_id': child_shape.get("resourceId"), } task = Scp_StartEvent_Task(name="StartNoneEvent", **task_config) self.flow.add_task(task) # 配置初始输入库 # task_input = self.app_instance_resource # task.bind(**task_input, flow=self.flow) # 开始加入其他task self.all_action_count -= 1 self.get_flow(task) else: # 判断是否所有的节点都已经生成完了,如果都完了就开始执行 if self.all_action_count == 0: return "success" # 判断当前节点还有没有下一个节点 child_shape = self.get_child_shape_by_id(pre_task.task_id) next_nodes = self.get_next_nodes(child_shape) for next_node in next_nodes: task = self.get_task(next_node) self.flow.add_task(task) task.set_upstream(pre_task, flow=self.flow) self.all_action_count -= 1 self.get_flow(task) def get_child_shape_by_id(self, target_id): for child_shape in self.app_class_child_shapes: resource_id = child_shape.get("resourceId") if resource_id == target_id: return child_shape def get_next_nodes(self, child_shape): next_nodes = [] outgoings = child_shape.get("outgoing") for outgoing in outgoings: resource_id = outgoing.get("resourceId") sequence_flow = self.get_child_shape_by_id(resource_id) sequence_flow_outgoings = sequence_flow.get("outgoing") for sequence_flow_outgoing in sequence_flow_outgoings: next_node_id = sequence_flow_outgoing.get("resourceId") next_nodes.append(self.get_child_shape_by_id(next_node_id)) return next_nodes def get_task(self, node): task_type = node.get("stencil").get("id") task_name = node.get("properties").get("name") task_id = node.get("resourceId") task_executor = node.get("properties").get("activityelement").get("id") task_config = { 'instance_id': self.instance_id, 't_app_class': self.t_app_class, 't_app_instance': self.t_app_instance, 'task_type': task_type, 'task_name': task_name, 'task_id': task_id, 'task_executor': task_executor, } if task_type == 'DefaultEvent': task = Scp_Event_Task(name=task_name, **task_config) return task else: task = Scp_Task(name=task_name, **task_config) return task
from prefect import Task, Flow class MyTask(Task): def run(self): print("This will be logged!") flow = Flow("log-stdout") my_task = MyTask(log_stdout=True) flow.add_task(my_task) flow.run()
def execute_plan(self, plan: prefect.Flow, **kwargs): return plan.run(**kwargs)
@task def signal_task(msg): if msg == 'go': print(msg) raise signals.SUCCESS(message='going!') elif msg == 'stop!': raise signals.FAIL(message='stopping!') elif msg == 'skip!': raise signals.SKIP(message='skipping!') with Flow("My first flow") as flow: first_result = signal_task('go!') seconf_result = signal_task('stop!') state = flow.run() """ @task def number_task(): print('42') return 42 f = Flow("example") f.add_task(number_task) print(f.tasks) state = f.run()
from prefect import Task, Flow # ETL Pipeline Tasks class Extract(Task): def run(self): # Extract the data return randrange(1, 10) class Transform(Task): def run(self, data): # Transform the data return data * 10 class Load(Task): def run(self, data): # Load the data print(f"\nHere's your data: {data}") # Define Tasks in a Flow Context e = Extract() t = Transform() l = Load() flow = Flow('Evolving ETL') # Set dependency graph flow.set_dependencies(t, keyword_tasks={'data': e}) flow.set_dependencies(l, keyword_tasks={'data': t}) flow.run() # Prints the data
def run(self, numbers): print(sum(numbers)) flow = Flow("parallel-execution") stop = Parameter("stop") number_1 = RandomNum() number_2 = RandomNum() number_3 = RandomNum() stop.set_downstream(number_1, key="stop", flow=flow) stop.set_downstream(number_2, key="stop", flow=flow) stop.set_downstream(number_3, key="stop", flow=flow) sum_numbers = Sum() sum_numbers.bind(numbers=[number_1, number_2, number_3], flow=flow) # with Flow("parallel-execution") as flow: # stop = Parameter("stop") # number_1 = random_num(stop) # number_2 = random_num(stop) # number_3 = random_num(stop) # sum = sum(numbers=[number_1, number_2, number_3]) # flow.visualize() flow.run(parameters={"stop": 5}, executor=DaskExecutor())