def test_workflow_copying_eopatches(): feature1 = FeatureType.DATA, "data1" feature2 = FeatureType.DATA, "data2" create_node = EONode(CreateEOPatchTask()) init_node = EONode( InitializeFeatureTask([feature1, feature2], shape=(2, 4, 4, 3), init_value=1), inputs=[create_node], ) remove_node1 = EONode(RemoveFeatureTask([feature1]), inputs=[init_node]) remove_node2 = EONode(RemoveFeatureTask([feature2]), inputs=[init_node]) output_node1 = EONode(OutputTask(name="out1"), inputs=[remove_node1]) output_node2 = EONode(OutputTask(name="out2"), inputs=[remove_node2]) workflow = EOWorkflow([ create_node, init_node, remove_node1, remove_node2, output_node1, output_node2 ]) results = workflow.execute() eop1 = results.outputs["out1"] eop2 = results.outputs["out2"] assert eop1 == EOPatch( data={"data2": np.ones((2, 4, 4, 3), dtype=np.uint8)}) assert eop2 == EOPatch( data={"data1": np.ones((2, 4, 4, 3), dtype=np.uint8)})
def test_get_nodes(): in_node = EONode(InputTask()) inc_node0 = EONode(IncTask(), inputs=[in_node]) inc_node1 = EONode(IncTask(), inputs=[inc_node0]) inc_node2 = EONode(IncTask(), inputs=[inc_node1]) output_node = EONode(OutputTask(name="out"), inputs=[inc_node2]) eow = EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node]) returned_nodes = eow.get_nodes() assert [ in_node, inc_node0, inc_node1, inc_node2, output_node, ] == returned_nodes, "Returned nodes differ from original nodes" arguments_dict = {in_node: {"val": 2}, inc_node0: {"d": 2}} workflow_res = eow.execute(arguments_dict) manual_res = [] for _, node in enumerate(returned_nodes): manual_res = [ node.task.execute(*manual_res, **arguments_dict.get(node, {})) ] assert workflow_res.outputs["out"] == manual_res[ 0], "Manually running returned nodes produces different results."
def test_exception_handling(): input_node = EONode(InputTask(), name="xyz") exception_node = EONode(ExceptionTask(), inputs=[input_node]) increase_node = EONode(IncTask(), inputs=[exception_node]) workflow = EOWorkflow([input_node, exception_node, increase_node]) with pytest.raises(CustomException): workflow.execute() results = workflow.execute(raise_errors=False) assert results.outputs == {} assert results.error_node_uid == exception_node.uid assert len(results.stats) == 2 for node in [input_node, exception_node]: node_stats = results.stats[node.uid] assert node_stats.node_uid == node.uid assert node_stats.node_name == node.name if node is exception_node: assert isinstance(node_stats.exception, CustomException) assert node_stats.exception_traceback.startswith("Traceback") else: assert node_stats.exception is None assert node_stats.exception_traceback is None
def test_workflow_arguments(): input_node1 = EONode(InputTask()) input_node2 = EONode(InputTask(), name="some name") divide_node = EONode(DivideTask(), inputs=(input_node1, input_node2), name="some name") output_node = EONode(OutputTask(name="output"), inputs=[divide_node]) workflow = EOWorkflow([input_node1, input_node2, divide_node, output_node]) with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor: k2future = { k: executor.submit(workflow.execute, { input_node1: { "val": k**3 }, input_node2: { "val": k**2 } }) for k in range(2, 100) } executor.shutdown() for k in range(2, 100): assert k2future[k].result().outputs["output"] == k result1 = workflow.execute({ input_node1: { "val": 15 }, input_node2: { "val": 3 } }) assert result1.outputs["output"] == 5 result2 = workflow.execute({ input_node1: { "val": 6 }, input_node2: { "val": 3 } }) assert result2.outputs["output"] == 2 result3 = workflow.execute({ input_node1: { "val": 6 }, input_node2: { "val": 3 }, divide_node: { "z": 1 } }) assert result3.outputs[output_node.task.name] == 3
def test_output_task_in_workflow(test_eopatch_path, test_eopatch): load = EONode(LoadTask(test_eopatch_path)) output = EONode(OutputTask(name="result-name"), inputs=[load]) workflow = EOWorkflow([load, output, EONode(DummyTask(), inputs=[load])]) results = workflow.execute() assert len(results.outputs) == 1 assert results.outputs["result-name"] == test_eopatch
def setUp(self): input_task1 = InputTask() input_task2 = InputTask() divide_task = DivideTask() self.workflow = EOWorkflow(dependencies=[ Dependency(task=input_task1, inputs=[]), Dependency(task=input_task2, inputs=[]), Dependency(task=divide_task, inputs=[input_task1, input_task2]) ])
def setUp(self): task1 = FooTask() task2 = FooTask() task3 = FooTask() self.workflow = EOWorkflow(dependencies=[ Dependency(task=task1, inputs=[]), Dependency(task=task2, inputs=[]), Dependency(task=task3, inputs=[task1, task2]) ])
def test_multiedge_workflow(): in_node = EONode(InputTask()) inc_node = EONode(IncTask(), inputs=[in_node]) div_node = EONode(DivideTask(), inputs=[inc_node, inc_node]) output_node = EONode(OutputTask(name="out"), inputs=[div_node]) workflow = EOWorkflow([in_node, output_node, inc_node, div_node]) arguments_dict = {in_node: {"val": 2}} workflow_res = workflow.execute(arguments_dict) assert workflow_res.outputs["out"] == 1
def test_resolve_dependencies(self, edges): dag = DirectedGraph.from_edges(edges) if DirectedGraph._is_cyclic(dag): with self.assertRaises(CyclicDependencyError): _ = EOWorkflow._schedule_dependencies(dag) else: ver2pos = {u: i for i, u in enumerate(EOWorkflow._schedule_dependencies(dag))} self.assertTrue(functools.reduce( lambda P, Q: P and Q, [ver2pos[u] < ver2pos[v] for u, v in edges] ))
def process(self, arguments): data = self.validate_parameter(arguments, "data", required=True, allowed_types=[xr.DataArray]) process = self.validate_parameter(arguments, "process", required=True) dependencies, result_task = self.generate_workflow_dependencies( process["callback"], arguments) workflow = EOWorkflow(dependencies) all_results = workflow.execute({}) return all_results[result_task]
def _execute_process_graph(process_graph, job_id, variables): # This is what we are aiming for: # # loadco1 = load_collectionEOTask(process_graph["loadco1"]["arguments"]) # ndvi1 = ndviEOTask(process_graph["ndvi1"]["arguments"]) # reduce1 = reduceEOTask(process_graph["reduce1"]["arguments"]) # tasks = [ # (loadco1, [], "Node name: load1"), # (ndvi1, [loadco1], "Node name: ndvi1"), # (reduce1, [ndvi1], "Node name: reduce1"), # ] # # workflow = EOWorkflow(tasks) # workflow.execute({}) # first create all the tasks and remember their names, so we will be able # to reference them when looking for tasks that current task depends on: tasks_by_name = {} result_task = None for node_name, node_definition in process_graph.items(): # We would like to instantiate an appropriate EOTask based on # process_id, like this: # tasks_by_name[node_name] = \ # load_collectionEOTask(node_definition['arguments'], ...) process_id = node_definition['process_id'] task_module_name = '{process_id}'.format(process_id=process_id) task_class_name = '{process_id}EOTask'.format(process_id=process_id) task_module = getattr(sys.modules[__name__].process, task_module_name) task_class = getattr(task_module, task_class_name) tasks_by_name[node_name] = task_class(node_definition['arguments'], job_id, logger, variables, node_name) if node_definition.get('result', False): result_task = tasks_by_name[node_name] if process_id != 'save_result': raise process.VariableValueMissing( "No value specified for process graph variable 'save_result'." ) # create a list of tasks for workflow: tasks = [] for node_name, task in tasks_by_name.items(): depends_on = [tasks_by_name[x] for x in task.depends_on()] tasks.append((task, depends_on, 'Node name: ' + node_name)) workflow = EOWorkflow(tasks) logger.debug("[{}]: executing workflow...".format(job_id)) result = workflow.execute({}) logger.debug("[{}]: workflow executed.".format(job_id)) return result_task.results
def process(self, arguments): data = self.validate_parameter(arguments, "data", required=True, allowed_types=[xr.DataArray]) dimension = self.validate_parameter(arguments, "dimension", required=True, allowed_types=[str]) reducer = self.validate_parameter(arguments, "reducer", default=None) target_dimension = self.validate_parameter( arguments, "target_dimension", default=None, allowed_types=[str, type(None)]) binary = self.validate_parameter(arguments, "binary", default=False, allowed_types=[bool]) if dimension not in data.dims: raise ProcessArgumentInvalid( "The argument 'dimension' in process 'reduce' is invalid: Dimension '{}' does not exist in data." .format(dimension)) if reducer is None: if data[dimension].size > 1: raise ProcessArgumentInvalid( "The argument 'dimension' in process 'reduce' is invalid: Dimension '{}' has more than one value, but reducer is not specified." .format(dimension)) return data.squeeze(dimension, drop=True) else: if not data.attrs.get("reduce_by"): arguments["data"].attrs["reduce_by"] = [dimension] else: arguments["data"].attrs["reduce_by"].append(dimension) dependencies, result_task = self.generate_workflow_dependencies( reducer["callback"], arguments) workflow = EOWorkflow(dependencies) all_results = workflow.execute({}) result = all_results[result_task] result.attrs["reduce_by"].pop() if target_dimension: result = xr.concat(result, dim=target_dimension) return result
def to_workflow(self): input_task = LoadTask(".") # Dummy to show correct graph return EOWorkflow([ (input_task, [], "Download bands"), *self.tasks(input_task), ] )
def test_trivial_workflow(self): task = DummyTask() dep = Dependency(task, []) workflow = EOWorkflow([dep]) result = workflow.execute() self.assertTrue(isinstance(result, WorkflowResults)) self.assertEqual(len(result), 1) self.assertEqual(len(result.keys()), 1) self.assertEqual(len(result.values()), 1) items = list(result.items()) self.assertEqual(len(items), 1) self.assertTrue(isinstance(items[0][0], EOTask)) self.assertEqual(items[0][1], 42) self.assertEqual(result[dep], 42)
def test_workflow_arguments(self): input_task1 = InputTask() input_task2 = InputTask() divide_task = DivideTask() workflow = EOWorkflow(dependencies=[ Dependency(task=input_task1, inputs=[]), Dependency(task=input_task2, inputs=[]), Dependency(task=divide_task, inputs=[input_task1, input_task2]) ]) with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor: k2future = { k: executor.submit( workflow.execute, { input_task1: {'val': k ** 3}, input_task2: {'val': k ** 2} } ) for k in range(2, 100) } executor.shutdown() for k in range(2, 100): future = k2future[k] self.assertEqual(future.result()[divide_task], k) result1 = workflow.execute({ input_task1: {'val': 15}, input_task2: {'val': 3} }) self.assertEqual(result1[divide_task], 5) result2 = workflow.execute({ input_task1: {'val': 6}, input_task2: {'val': 3} }) self.assertEqual(result2[divide_task], 2) result3 = workflow.execute({ input_task1: {'val': 6}, input_task2: {'val': 3}, divide_task: {'z': 1} }) self.assertEqual(result3[divide_task], 3)
class TestGraph(unittest.TestCase): def setUp(self): task1 = FooTask() task2 = FooTask() task3 = FooTask() self.workflow = EOWorkflow(dependencies=[ Dependency(task=task1, inputs=[]), Dependency(task=task2, inputs=[]), Dependency(task=task3, inputs=[task1, task2]) ]) def test_graph_nodes_and_edges(self): dot = self.workflow.get_dot() self.assertTrue(isinstance(dot, Digraph)) digraph = self.workflow.dependency_graph() self.assertTrue(isinstance(digraph, Digraph))
def test_keyboard_interrupt(simple_cluster): exception_node = EONode(KeyboardExceptionTask()) workflow = EOWorkflow([exception_node]) execution_kwargs = [] for _ in range(10): execution_kwargs.append({exception_node: {"arg1": 1}}) with pytest.raises( (ray.exceptions.TaskCancelledError, ray.exceptions.RayTaskError)): RayExecutor(workflow, execution_kwargs).run()
class TestGraph(unittest.TestCase): def setUp(self): input_task1 = InputTask() input_task2 = InputTask() divide_task = DivideTask() self.workflow = EOWorkflow(dependencies=[ Dependency(task=input_task1, inputs=[]), Dependency(task=input_task2, inputs=[]), Dependency(task=divide_task, inputs=[input_task1, input_task2]) ]) def test_graph_nodes_and_edges(self): dot = self.workflow.get_dot() dot_file = StringIO() dot_file.write(dot.source) dot_file.seek(0) digraph = self.workflow.dependency_graph()
def test_keyboard_interrupt(): exception_node = EONode(KeyboardExceptionTask()) workflow = EOWorkflow([exception_node]) execution_kwargs = [] for _ in range(10): execution_kwargs.append({exception_node: {"arg1": 1}}) run_kwargs = [{"workers": 1}, {"workers": 3, "multiprocess": True}, {"workers": 3, "multiprocess": False}] for kwarg in run_kwargs: with pytest.raises(KeyboardInterrupt): EOExecutor(workflow, execution_kwargs).run(**kwarg)
def setUpClass(cls): task = ExampleTask() cls.workflow = EOWorkflow([(task, []), Dependency(task=ExampleTask(), inputs=[task, task])]) cls.execution_args = [ {task: {'arg1': 1}}, {}, {task: {'arg1': 3, 'arg3': 10}}, {task: {'arg1': None}} ]
def setUpClass(cls): cls.task = ExampleTask() cls.final_task = FooTask() cls.workflow = EOWorkflow([(cls.task, []), Dependency(task=cls.final_task, inputs=[cls.task, cls.task])]) cls.execution_args = [ {cls.task: {'arg1': 1}}, {}, {cls.task: {'arg1': 3, 'arg3': 10}}, {cls.task: {'arg1': None}} ]
def test_bad_structure_exceptions(): in_node = EONode(InputTask()) inc_node0 = EONode(IncTask(), inputs=[in_node]) inc_node1 = EONode(IncTask(), inputs=[inc_node0]) inc_node2 = EONode(IncTask(), inputs=[inc_node1]) output_node = EONode(OutputTask(name="out"), inputs=[inc_node2]) # This one must work EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node]) # Duplicated node with pytest.raises(ValueError): EOWorkflow( [in_node, inc_node0, inc_node0, inc_node1, inc_node2, output_node]) # Missing node with pytest.raises(ValueError): EOWorkflow([in_node, inc_node0, inc_node2, output_node]) # Create circle (much more difficult now) super(EONode, inc_node0).__setattr__("inputs", (inc_node1, )) with pytest.raises(ValueError): EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node])
def test_workflow_results(): input_node = EONode(InputTask()) output_node = EONode(OutputTask(name="out"), inputs=[input_node]) workflow = EOWorkflow([input_node, output_node]) results = workflow.execute({input_node: {"val": 10}}) assert isinstance(results, WorkflowResults) assert results.outputs == {"out": 10} results_without_outputs = results.drop_outputs() assert results_without_outputs.outputs == {} assert id(results_without_outputs) != id(results) assert isinstance(results.start_time, dt.datetime) assert isinstance(results.end_time, dt.datetime) assert results.start_time < results.end_time < dt.datetime.now() assert isinstance(results.stats, dict) assert len(results.stats) == 2 for node in [input_node, output_node]: stats_uid = node.uid assert isinstance(results.stats.get(stats_uid), NodeStats)
def test_report_creation(self): task = ExampleTask() workflow = EOWorkflow(dependencies=[ Dependency(task=task, inputs=[]), ]) execution_args = [ {'arg1': 1} ] with tempfile.TemporaryDirectory() as tmpdirname: executor = EOExecutor(workflow, execution_args, file_path=tmpdirname) executor.run() self.assertIsNotNone(executor.make_report())
def test_execution_errors(self): task = RaiserErrorTask() workflow = EOWorkflow(dependencies=[ Dependency(task=task, inputs=[]), ]) execution_args = [ {'arg1': 1} ] with tempfile.TemporaryDirectory() as tmpdirname: executor = EOExecutor(workflow, execution_args, file_path=tmpdirname) executor.run() self.assertTrue('error' in executor.execution_stats[0])
def test_run_after_interrupt(workflow, execution_kwargs, simple_cluster): foo_node = EONode(FooTask()) exception_node = EONode(KeyboardExceptionTask(), inputs=[foo_node]) exception_workflow = EOWorkflow([foo_node, exception_node]) exception_executor = RayExecutor(exception_workflow, [{}]) executor = RayExecutor(workflow, execution_kwargs[:-1]) # removes args for exception result_preexception = executor.run() with pytest.raises( (ray.exceptions.TaskCancelledError, ray.exceptions.RayTaskError)): exception_executor.run() result_postexception = executor.run() assert [res.outputs for res in result_preexception ] == [res.outputs for res in result_postexception]
def test_execution_stats(self): task = ExampleTask() workflow = EOWorkflow(dependencies=[ Dependency(task=task, inputs=[]), ]) execution_args = [ {'arg1': 1}, {'arg1': 2} ] with tempfile.TemporaryDirectory() as tmpdirname: executor = EOExecutor(workflow, execution_args, file_path=tmpdirname) executor.run() self.assertEqual(len(executor.execution_stats), 2)
def test_linear_workflow(self): in_task = InputTask() inc_task = Inc() pow_task = Pow() eow = EOWorkflow.make_linear_workflow(in_task, inc_task, pow_task) res = eow.execute({ in_task: { 'val': 2 }, inc_task: { 'd': 2 }, pow_task: { 'n': 3 } }) self.assertEqual(res[pow_task], (2 + 2)**3)
def test_get_node_with_uid(): in_node = EONode(InputTask()) inc_node = EONode(IncTask(), inputs=[in_node]) output_node = EONode(OutputTask(name="out"), inputs=[inc_node]) eow = EOWorkflow([in_node, inc_node, output_node]) assert all(node == eow.get_node_with_uid(node.uid) for node in (in_node, inc_node, output_node)) assert eow.get_node_with_uid("nonexsitant") is None with pytest.raises(KeyError): eow.get_node_with_uid("nonexsitant", fail_if_missing=True)
def test_workflows_reusing_nodes(): in_node = EONode(InputTask()) node1 = EONode(IncTask(), inputs=[in_node]) node2 = EONode(IncTask(), inputs=[node1]) out_node = EONode(OutputTask(name="out"), inputs=[node2]) input_args = {in_node: {"val": 2}, node2: {"d": 2}} original = EOWorkflow([in_node, node1, node2, out_node]) node_reuse = EOWorkflow([in_node, node1, node2, out_node]) assert original.execute(input_args).outputs["out"] == node_reuse.execute( input_args).outputs["out"]