def test_ten_independent_tasks(): """ x1 x2 ... x10 Ten identical but independent tasks """ f = Flow(name="test") for i in range(1, 11): f.add_task(get_task("x{}".format(i))) steps = f.generate_local_task_ids(_debug_steps=True) # each task generates the same id based on its own characteristics assert count_unique_ids(steps[1]) == 1 # each step generates new ids assert steps[1] != steps[2] != steps[3] != steps[4] != steps[5] # ...but the ids are not unique for i in range(1, 5): assert count_unique_ids(steps[i]) == 1 # disambiguation finally takes place in step 5 assert count_unique_ids(steps[5]) == 10
def test_unix_step(unused_tcp_port): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() def _on_task_failure(task, state): raise Exception(state.message) with tmp(Path(SOURCE_DIR) / "test-data/local/prefect_test_case"): config = parse_config("config.yml") storage = storage_driver_factory(config=config.get("storage"), run_path=".") resource = storage.store("unix_test_script.py") jobs = [{ "id": "0", "name": "test_script", "executable": "unix_test_script.py", "args": ["vas"], }] stage_task = UnixStep( resources=[resource], outputs=["output.out"], job_list=jobs, iens=1, cmd="python3", url=url, step_id="step_id_0", stage_id="stage_id_0", ee_id="ee_id_0", on_failure=_on_task_failure, run_path=config.get("run_path"), storage_config=config.get("storage"), max_retries=1, retry_delay=timedelta(seconds=2), ) flow = Flow("testing") flow.add_task(stage_task) flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[stage_task] assert task_result.is_successful() assert flow_run.is_successful() assert len(task_result.result["outputs"]) == 1 expected_path = storage.get_storage_path(1) / "output.out" output_path = flow_run.result[stage_task].result["outputs"][0] assert expected_path == output_path assert output_path.exists()
def test_available_dates_sensor_schema(tmpdir): """ Test that AvailableDatesSensorSchema can load a valid set of sensor config parameters. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges", "DUMMY_PARAM"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Local(tmpdir) workflow_storage.add_flow(dummy_workflow) input_dict = dict( schedule="0 0 * * *", cdr_types=["calls", "sms", "mds", "topups"], workflows=[ {"workflow_name": "DUMMY_WORKFLOW", "parameters": {"DUMMY_PARAM": 1}}, {"workflow_name": "DUMMY_WORKFLOW", "parameters": {"DUMMY_PARAM": 2}}, ], ) sensor_config = AvailableDatesSensorSchema( context={"workflow_storage": workflow_storage} ).load(input_dict) assert isinstance(sensor_config["schedule"], Schedule) assert sensor_config["schedule"].clocks[0].cron == "0 0 * * *" assert sensor_config["cdr_types"] == ["calls", "sms", "mds", "topups"] assert sensor_config["workflows"] == [ WorkflowConfig(**workflow) for workflow in input_dict["workflows"] ]
def test_workflow_config_schema(): """ Test that WorkflowConfigSchema loads input data into a WorkflowConfig namedtuple. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges", "DUMMY_PARAM"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Memory() workflow_storage.add_flow(dummy_workflow) input_dict = dict( workflow_name="DUMMY_WORKFLOW", parameters={"DUMMY_PARAM": "DUMMY_VALUE"}, earliest_date=datetime.date(2016, 1, 1), date_stencil=[-1, 0], ) workflow_config = WorkflowConfigSchema( context={"workflow_storage": workflow_storage} ).load(input_dict) assert isinstance(workflow_config, WorkflowConfig) assert workflow_config.workflow_name == input_dict["workflow_name"] assert workflow_config.parameters == input_dict["parameters"] assert workflow_config.earliest_date == input_dict["earliest_date"] assert workflow_config.date_stencil == DateStencil(input_dict["date_stencil"])
def test_workflow_config_schema_missing_and_unexpected_parameter_names(): """ Test that WorkflowConfigSchema raises a ValidationError if the names of the provided parameters are not valid for the named workflow. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges", "DUMMY_PARAM"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Memory() workflow_storage.add_flow(dummy_workflow) input_dict = dict( workflow_name="DUMMY_WORKFLOW", parameters={"EXTRA_PARAM": "DUMMY_VALUE"} ) with pytest.raises(ValidationError) as exc_info: workflow_config = WorkflowConfigSchema( context={"workflow_storage": workflow_storage} ).load(input_dict) assert ( "Missing required parameters {'DUMMY_PARAM'} for workflow 'DUMMY_WORKFLOW'." in exc_info.value.messages["parameters"] ) assert ( "Unexpected parameters provided for workflow 'DUMMY_WORKFLOW': {'EXTRA_PARAM'}." in exc_info.value.messages["parameters"] )
def test_case_imperative_errors(self): flow = Flow("test") flow2 = Flow("test2") cond = identity.copy() a = identity.copy() with pytest.raises(ValueError, match="Multiple flows"): with case(cond, True): flow.add_task(a) flow2.add_task(a)
def test_deserialize_flow(self): f = Flow(name="test") f.add_task(Task()) f.add_task(Parameter("x")) env = LocalEnvironment() serialized = env.serialize_flow_to_bytes(f) deserialized = env.deserialize_flow_from_bytes(serialized) assert isinstance(deserialized, Flow) assert len(deserialized.tasks) == 2 assert {p.name for p in deserialized.parameters()} == {"x"}
def test_modify_task_changes_hash(): f = Flow(name="test") t = Task() f.add_task(t) hash1 = f.generate_local_task_ids() # this is not an attribute referenced in task.serialize(), so it should not affect the id t.new_attribute = "hi" hash2 = f.generate_local_task_ids() # this is an attribute referenced in task.serialize(), so it should affect the id t.slug = "hi" hash3 = f.generate_local_task_ids() assert hash1 == hash2 assert hash1 != hash3
def test_flow_run_cancel(monkeypatch): client = MagicMock() client.cancel_flow_run = MagicMock(return_value=True) monkeypatch.setattr("prefect.tasks.prefect.flow_run_cancel.Client", MagicMock(return_value=client)) flow_cancel_task = CancelFlowRun(flow_run_id="id123") # Verify correct initialization assert flow_cancel_task.flow_run_id == "id123" # Verify client called with arguments flow = Flow("TestContext") flow.add_task(flow_cancel_task) flow.run() assert client.cancel_flow_run.called assert client.cancel_flow_run.call_args[0][0] == "id123"
def test_one_task(): """ x1 A single task """ f = Flow(name="test") f.add_task(get_task("x1")) steps = f.generate_local_task_ids(_debug_steps=True) # the task is uniquely identified by its own characteristics assert count_unique_ids(steps[1]) == 1 # no further processing assert steps[1] == steps[2] == steps[3] == steps[4] == steps[5]
def create_cdc_all_states_flow(): """Creates a flow that runs the CDC data update on all states.""" sched = CronSchedule("17 */4 * * *") flow = Flow("CDCAllStatesDataUpdate", sched) for state in ALL_STATES_PLUS_DC: task = StartFlowRun( flow_name=CDCCovidDataTracker.__name__, project_name="can-scrape", wait=True, parameters={"state": state.abbr}, ) flow.add_task(task) return flow
def test_workflow_config_schema_invalid_parameter_names(key): """ Test that WorkflowConfigSchema raises a ValidationError if the 'parameters' dict keys contain 'reference_date' or 'date_ranges'. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Memory() workflow_storage.add_flow(dummy_workflow) input_dict = dict(workflow_name="DUMMY_WORKFLOW", parameters={key: "DUMMY_VALUE"}) with pytest.raises(ValidationError) as exc_info: workflow_config = WorkflowConfigSchema( context={"workflow_storage": workflow_storage} ).load(input_dict) assert "Invalid input." in exc_info.value.messages["parameters"][key]["key"]
def test_workflow_config_schema_defaults(): """ Test that WorkflowConfigSchema loads input data if 'parameters', 'earliest_date' and 'date_stencil' are not specified. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Memory() workflow_storage.add_flow(dummy_workflow) input_dict = dict(workflow_name="DUMMY_WORKFLOW") workflow_config = WorkflowConfigSchema( context={"workflow_storage": workflow_storage} ).load(input_dict) assert isinstance(workflow_config, WorkflowConfig) assert workflow_config == WorkflowConfig(workflow_name="DUMMY_WORKFLOW")
def test_workflow_config_schema_invalid_earliest_date(): """ Test that WorkflowConfigSchema raises a ValidationError if the 'earliest_date' field is not a date. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Memory() workflow_storage.add_flow(dummy_workflow) input_dict = dict(workflow_name="DUMMY_WORKFLOW", earliest_date=datetime.time(11)) with pytest.raises(ValidationError) as exc_info: workflow_config = WorkflowConfigSchema( context={"workflow_storage": workflow_storage} ).load(input_dict) assert "Not a valid date." in exc_info.value.messages["earliest_date"]
def test_workflow_config_schema_workflow_not_found(tmpdir): """ Test that WorkflowConfigSchema raises a ValidationError if the named workflow does not exist. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in ["reference_date", "date_ranges"]: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Local(tmpdir) workflow_storage.add_flow(dummy_workflow) with pytest.raises(ValidationError) as exc_info: workflow_config = WorkflowConfigSchema( context={ "workflow_storage": workflow_storage }).load({"workflow_name": "NONEXISTENT_WORKFLOW"}) assert ("Workflow does not exist in this storage." in exc_info.value.messages["workflow_name"])
def flow_from_chains(*chains): """ Builds a Flow from chains of task names. To build a flow that runs x, then y, then z, and also runs x2 after x: flow_from_chains( ['x', 'y', 'z'], ['x', 'x2'] ) The tasks in the returned flow are all completely identical. """ flow = Flow(name="test") for chain in chains: for name in chain: flow.add_task(get_task(name)) for u_name, d_name in zip(chain, chain[1:]): flow.add_edge(get_task(u_name), get_task(d_name), validate=False) return flow
def test_workflow_config_schema_workflow_does_not_accept_automatic_parameters( missing_parameter, tmpdir): """ Test that WorkflowConfigSchema raises a ValidationError if the named workflow does not accept parameters 'reference_date' and 'date_ranges'. """ # Create storage object to pass in context dummy_workflow = Flow(name="DUMMY_WORKFLOW") for parameter in {"reference_date", "date_ranges"} - {missing_parameter}: dummy_workflow.add_task(Parameter(parameter)) workflow_storage = storage.Local(tmpdir) workflow_storage.add_flow(dummy_workflow) with pytest.raises(ValidationError) as exc_info: workflow_config = WorkflowConfigSchema( context={ "workflow_storage": workflow_storage }).load({"workflow_name": "DUMMY_WORKFLOW"}) assert (f"Workflow does not accept parameters {{'{missing_parameter}'}}." in exc_info.value.messages["workflow_name"])
def test_ten_different_tasks(): """ x1 x2 ... x10 Ten non-identical and independent tasks """ f = Flow(name="test") for i in range(1, 11): f.add_task(Task(name=str(i))) steps = f.generate_local_task_ids(_debug_steps=True) # tasks are immediately identifiable assert count_unique_ids(steps[1]) == 10 # no further processing assert steps[1] == steps[2] == steps[3] == steps[4] == steps[5]
def test_flow_id_does_not_affect_task_ids(): f = Flow(name="test") f.add_task(get_task("x")) f2 = Flow(name="test") f2.add_task(get_task("x")) f3 = Flow(name="foo") f3.add_task(get_task("x")) assert f.generate_local_task_ids() == f2.generate_local_task_ids() assert f.generate_local_task_ids() == f3.generate_local_task_ids()
def test_case_imperative_api(self, branch): flow = Flow("test") cond = identity.copy() a = identity.copy() b = inc.copy() c = identity.copy() d = inc.copy() cond.bind(branch, flow=flow) flow.add_task(cond) with case(cond, "a"): a.bind(1, flow=flow) b.bind(a, flow=flow) flow.add_task(a) flow.add_task(b) with case(cond, "b"): c.bind(3, flow=flow) d.bind(c, flow=flow) flow.add_task(c) flow.add_task(d) state = flow.run() if branch == "a": assert state.result[a].result == 1 assert state.result[b].result == 2 assert state.result[c].is_skipped() assert state.result[d].is_skipped() elif branch == "b": assert state.result[a].is_skipped() assert state.result[b].is_skipped() assert state.result[c].result == 3 assert state.result[d].result == 4 elif branch == "c": for t in [a, b, c, d]: assert state.result[t].is_skipped()
def test_on_task_failure(unused_tcp_port): host = "localhost" url = f"ws://{host}:{unused_tcp_port}" messages = [] mock_ws_thread = threading.Thread(target=partial(_mock_ws, messages=messages), args=(host, unused_tcp_port)) mock_ws_thread.start() with tmp(Path(SOURCE_DIR) / "test-data/local/prefect_test_case", False): config = parse_config("config.yml") storage = storage_driver_factory(config=config.get("storage"), run_path=".") resource = storage.store("unix_test_retry_script.py") jobs = [{ "id": "0", "name": "test_script", "executable": "unix_test_retry_script.py", "args": [], }] stage_task = UnixStep( resources=[resource], outputs=[], job_list=jobs, iens=1, cmd="python3", url=url, step_id="step_id_0", stage_id="stage_id_0", ee_id="ee_id_0", on_failure=partial(PrefectEnsemble._on_task_failure, url=url), run_path=config.get("run_path"), storage_config=config.get("storage"), max_retries=3, retry_delay=timedelta(seconds=1), ) flow = Flow("testing") flow.add_task(stage_task) flow_run = flow.run() # Stop the mock evaluator WS server with Client(url) as c: c.send("stop") mock_ws_thread.join() task_result = flow_run.result[stage_task] assert task_result.is_successful() assert flow_run.is_successful() fail_job_messages = [ msg for msg in messages if ids.EVTYPE_FM_JOB_FAILURE in msg ] fail_step_messages = [ msg for msg in messages if ids.EVTYPE_FM_STEP_FAILURE in msg ] expected_job_failed_messages = 2 expected_step_failed_messages = 0 assert expected_job_failed_messages == len(fail_job_messages) assert expected_step_failed_messages == len(fail_step_messages)
MAX_WIDTH = 11 PROB_EDGE = 0.1 VERSION = 6 random.seed(SEED) flow = Flow(f"{SEED} Seed Flow {VERSION}") LEVELS = dict() for level in range(N_LEVELS): width = random.randint(MIN_WIDTH, MAX_WIDTH) LEVELS[level] = [ Task(name=f"Task {level}-{i}") for i, _ in enumerate(range(width)) ] for task in LEVELS[level]: flow.add_task(task) if level: for a, b in itertools.product(LEVELS[level - 1], LEVELS[level]): if random.random() > PROB_EDGE: flow.add_edge(a, b) # flow.storage = Docker( # base_image="python:3.8", # python_dependencies=[], # registry_url="znicholasbrown", # image_name=f"random_seed-{VERSION}", # image_tag=f"random-seed-flow-{VERSION}", # ) flow.register(project_name="Community Support Flows")
class CommonFlow: def __init__(self, instance_id, **mongodb): self.flow = Flow("Run a Prefect Flow in Docker") self.instance_id = instance_id self.t_app_class = mongodb.get('t_app_class') self.t_app_instance = mongodb.get('t_app_instance') self.app_class_child_shapes = find_app_class_by_instance_id( self.t_app_instance, self.instance_id).get("app_class").get("childShapes") self.all_action_count = 0 self.get_all_action_count() def get_all_action_count(self): # 获取流程信息 for child_shape in self.app_class_child_shapes: stencil = child_shape.get("stencil").get("id") if stencil in [ "StartNoneEvent", "DefaultEvent", "SocialAction", "PhysicalAction", "CyberAction" ]: self.all_action_count += 1 def run_flow(self): t = Thread(target=self.new_thread, ) t.start() def new_thread(self): # 获取流程信息 self.get_flow(None) # 拼接流程 state = self.flow.run() print("执行完成") def get_flow(self, pre_task): if pre_task == None: # 从初始节点开始 for child_shape in self.app_class_child_shapes: stencil = child_shape.get("stencil").get("id") if stencil == 'StartNoneEvent': # 获取开始task task_config = { 'instance_id': self.instance_id, 't_app_class': self.t_app_class, 't_app_instance': self.t_app_instance, 'task_type': 'StartNoneEvent', 'task_name': 'StartNoneEvent', 'task_id': child_shape.get("resourceId"), } task = Scp_StartEvent_Task(name="StartNoneEvent", **task_config) self.flow.add_task(task) # 配置初始输入库 # task_input = self.app_instance_resource # task.bind(**task_input, flow=self.flow) # 开始加入其他task self.all_action_count -= 1 self.get_flow(task) else: # 判断是否所有的节点都已经生成完了,如果都完了就开始执行 if self.all_action_count == 0: return "success" # 判断当前节点还有没有下一个节点 child_shape = self.get_child_shape_by_id(pre_task.task_id) next_nodes = self.get_next_nodes(child_shape) for next_node in next_nodes: task = self.get_task(next_node) self.flow.add_task(task) task.set_upstream(pre_task, flow=self.flow) self.all_action_count -= 1 self.get_flow(task) def get_child_shape_by_id(self, target_id): for child_shape in self.app_class_child_shapes: resource_id = child_shape.get("resourceId") if resource_id == target_id: return child_shape def get_next_nodes(self, child_shape): next_nodes = [] outgoings = child_shape.get("outgoing") for outgoing in outgoings: resource_id = outgoing.get("resourceId") sequence_flow = self.get_child_shape_by_id(resource_id) sequence_flow_outgoings = sequence_flow.get("outgoing") for sequence_flow_outgoing in sequence_flow_outgoings: next_node_id = sequence_flow_outgoing.get("resourceId") next_nodes.append(self.get_child_shape_by_id(next_node_id)) return next_nodes def get_task(self, node): task_type = node.get("stencil").get("id") task_name = node.get("properties").get("name") task_id = node.get("resourceId") task_executor = node.get("properties").get("activityelement").get("id") task_config = { 'instance_id': self.instance_id, 't_app_class': self.t_app_class, 't_app_instance': self.t_app_instance, 'task_type': task_type, 'task_name': task_name, 'task_id': task_id, 'task_executor': task_executor, } if task_type == 'DefaultEvent': task = Scp_Event_Task(name=task_name, **task_config) return task else: task = Scp_Task(name=task_name, **task_config) return task
from prefect import Task, Flow class MyTask(Task): def run(self): print("This will be logged!") flow = Flow("log-stdout") my_task = MyTask(log_stdout=True) flow.add_task(my_task) flow.run()
from gob import git_commands, sfdx_commands create_sfdx_project = sfdx_commands.create_sfdx_project pull_sfdc_code = sfdx_commands.pull_sfdc_code git_init = git_commands.git_init git_add = git_commands.git_add username = Parameter("username") my_project_name = Parameter("project_name") metadata_items = Parameter("metadata_items") # Flow Entry Point flow = PrefectFlow("My SFDC Project Init") # Create the SFDX Project flow.add_task(create_sfdx_project) create_sfdx_project.bind(project_name=my_project_name, flow=flow) # Pull the sfdx code from the org. flow.add_task(pull_sfdc_code) pull_sfdc_code.set_upstream(create_sfdx_project, flow=flow) pull_sfdc_code.bind( username=username, dest_dir=my_project_name, metadata_items=metadata_items, flow=flow, ) # Initialize a git project. flow.add_task(git_init) git_init.set_upstream(pull_sfdc_code, flow=flow)
#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Created on Tue Feb 4 09:26:28 2020 @author: felipe """ from prefect import Parameter, Flow flow = Flow("My imperative flow!") # define some new tasks name = Parameter("name") second_add = add.copy() # add our tasks to the flow flow.add_task(add) flow.add_task(second_add) flow.add_task(say_hello) # create non-data dependencies so that `say_hello` waits for `second_add` to finish. say_hello.set_upstream(second_add, flow=flow) # create data bindings add.bind(x=1, y=2, flow=flow) second_add.bind(x=add, y=100, flow=flow) say_hello.bind(person=name, flow=flow)
import random from prefect import Flow, Task class ANumber(Task): def run(self): return random.randint(0, 100) flow = Flow('Using Operators') # a = a_number() # b = a_number() # add = a + b # sub = a - b # lt = a < b a = ANumber() b = ANumber() flow.add_task(a) flow.add_task(b) add = a.__add__(b)
@task def signal_task(msg): if msg == 'go': print(msg) raise signals.SUCCESS(message='going!') elif msg == 'stop!': raise signals.FAIL(message='stopping!') elif msg == 'skip!': raise signals.SKIP(message='skipping!') with Flow("My first flow") as flow: first_result = signal_task('go!') seconf_result = signal_task('stop!') state = flow.run() """ @task def number_task(): print('42') return 42 f = Flow("example") f.add_task(number_task) print(f.tasks) state = f.run()
def run_predefined_flow(self): flow = Flow("Run a Prefect Flow in Docker") # 点咖啡 task # task_config_1 = { # 'task_type':'action', # 'task_name':'', # 'task_id':'', # 'coffee order id': '', # } # task_1 = Scp_Task('点咖啡', task_config_1) task_config_1 = { 'task_type': 'action', 'task_name': 'make coffee', 'task_id': '', } task_1 = Scp_Task(name='make coffee', **task_config_1) task_config_2 = { 'task_type': 'event', 'task_name': 'coffee finished', 'task_id': '', } # task_2 = Scp_Task.copy() task_2 = Scp_Event_Task(name='coffee finished', **task_config_2) task_config_3 = { 'task_type': 'action', 'task_name': 'send coffee', 'task_id': '', } task_3 = Scp_Task(name='send coffee', **task_config_3) # add tasks to the flow flow.add_task(Scp_StartEvent_Task()) flow.add_task(task_1) flow.add_task(task_2) flow.add_task(task_3) flow.add_task(Scp_EndEvent_Task()) # create non-data dependencies task_2.set_upstream(task_1, flow=flow) task_3.set_upstream(task_2, flow=flow) # task_4.set_upstream(task_3, flow=flow) # create data bindings task_input_1 = { 'msg': 'test_task_1', 'url': 'http://39.106.6.6:8080/SCIDE/SCManager?action=executeContract&contractID=CoffeeFDU&operation=postMakeCoffee&arg=%22%22', } task_input_3 = { 'msg': 'test_task_2', 'url': '', } task_1.bind(**task_input_1, flow=flow) task_3.bind(**task_input_3, flow=flow) # start flow state = flow.run()