def test_exception_handling(): input_node = EONode(InputTask(), name="xyz") exception_node = EONode(ExceptionTask(), inputs=[input_node]) increase_node = EONode(IncTask(), inputs=[exception_node]) workflow = EOWorkflow([input_node, exception_node, increase_node]) with pytest.raises(CustomException): workflow.execute() results = workflow.execute(raise_errors=False) assert results.outputs == {} assert results.error_node_uid == exception_node.uid assert len(results.stats) == 2 for node in [input_node, exception_node]: node_stats = results.stats[node.uid] assert node_stats.node_uid == node.uid assert node_stats.node_name == node.name if node is exception_node: assert isinstance(node_stats.exception, CustomException) assert node_stats.exception_traceback.startswith("Traceback") else: assert node_stats.exception is None assert node_stats.exception_traceback is None
def test_workflow_arguments(): input_node1 = EONode(InputTask()) input_node2 = EONode(InputTask(), name="some name") divide_node = EONode(DivideTask(), inputs=(input_node1, input_node2), name="some name") output_node = EONode(OutputTask(name="output"), inputs=[divide_node]) workflow = EOWorkflow([input_node1, input_node2, divide_node, output_node]) with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor: k2future = { k: executor.submit(workflow.execute, { input_node1: { "val": k**3 }, input_node2: { "val": k**2 } }) for k in range(2, 100) } executor.shutdown() for k in range(2, 100): assert k2future[k].result().outputs["output"] == k result1 = workflow.execute({ input_node1: { "val": 15 }, input_node2: { "val": 3 } }) assert result1.outputs["output"] == 5 result2 = workflow.execute({ input_node1: { "val": 6 }, input_node2: { "val": 3 } }) assert result2.outputs["output"] == 2 result3 = workflow.execute({ input_node1: { "val": 6 }, input_node2: { "val": 3 }, divide_node: { "z": 1 } }) assert result3.outputs[output_node.task.name] == 3
def test_workflows_reusing_nodes(): in_node = EONode(InputTask()) node1 = EONode(IncTask(), inputs=[in_node]) node2 = EONode(IncTask(), inputs=[node1]) out_node = EONode(OutputTask(name="out"), inputs=[node2]) input_args = {in_node: {"val": 2}, node2: {"d": 2}} original = EOWorkflow([in_node, node1, node2, out_node]) node_reuse = EOWorkflow([in_node, node1, node2, out_node]) assert original.execute(input_args).outputs["out"] == node_reuse.execute( input_args).outputs["out"]
def test_workflow_copying_eopatches(): feature1 = FeatureType.DATA, "data1" feature2 = FeatureType.DATA, "data2" create_node = EONode(CreateEOPatchTask()) init_node = EONode( InitializeFeatureTask([feature1, feature2], shape=(2, 4, 4, 3), init_value=1), inputs=[create_node], ) remove_node1 = EONode(RemoveFeatureTask([feature1]), inputs=[init_node]) remove_node2 = EONode(RemoveFeatureTask([feature2]), inputs=[init_node]) output_node1 = EONode(OutputTask(name="out1"), inputs=[remove_node1]) output_node2 = EONode(OutputTask(name="out2"), inputs=[remove_node2]) workflow = EOWorkflow([ create_node, init_node, remove_node1, remove_node2, output_node1, output_node2 ]) results = workflow.execute() eop1 = results.outputs["out1"] eop2 = results.outputs["out2"] assert eop1 == EOPatch( data={"data2": np.ones((2, 4, 4, 3), dtype=np.uint8)}) assert eop2 == EOPatch( data={"data1": np.ones((2, 4, 4, 3), dtype=np.uint8)})
def test_get_nodes(): in_node = EONode(InputTask()) inc_node0 = EONode(IncTask(), inputs=[in_node]) inc_node1 = EONode(IncTask(), inputs=[inc_node0]) inc_node2 = EONode(IncTask(), inputs=[inc_node1]) output_node = EONode(OutputTask(name="out"), inputs=[inc_node2]) eow = EOWorkflow([in_node, inc_node0, inc_node1, inc_node2, output_node]) returned_nodes = eow.get_nodes() assert [ in_node, inc_node0, inc_node1, inc_node2, output_node, ] == returned_nodes, "Returned nodes differ from original nodes" arguments_dict = {in_node: {"val": 2}, inc_node0: {"d": 2}} workflow_res = eow.execute(arguments_dict) manual_res = [] for _, node in enumerate(returned_nodes): manual_res = [ node.task.execute(*manual_res, **arguments_dict.get(node, {})) ] assert workflow_res.outputs["out"] == manual_res[ 0], "Manually running returned nodes produces different results."
def test_workflow_arguments(self): input_task1 = InputTask() input_task2 = InputTask() divide_task = DivideTask() workflow = EOWorkflow(dependencies=[ Dependency(task=input_task1, inputs=[]), Dependency(task=input_task2, inputs=[]), Dependency(task=divide_task, inputs=[input_task1, input_task2]) ]) with concurrent.futures.ProcessPoolExecutor(max_workers=5) as executor: k2future = { k: executor.submit( workflow.execute, { input_task1: {'val': k ** 3}, input_task2: {'val': k ** 2} } ) for k in range(2, 100) } executor.shutdown() for k in range(2, 100): future = k2future[k] self.assertEqual(future.result()[divide_task], k) result1 = workflow.execute({ input_task1: {'val': 15}, input_task2: {'val': 3} }) self.assertEqual(result1[divide_task], 5) result2 = workflow.execute({ input_task1: {'val': 6}, input_task2: {'val': 3} }) self.assertEqual(result2[divide_task], 2) result3 = workflow.execute({ input_task1: {'val': 6}, input_task2: {'val': 3}, divide_task: {'z': 1} }) self.assertEqual(result3[divide_task], 3)
def test_output_task_in_workflow(test_eopatch_path, test_eopatch): load = EONode(LoadTask(test_eopatch_path)) output = EONode(OutputTask(name="result-name"), inputs=[load]) workflow = EOWorkflow([load, output, EONode(DummyTask(), inputs=[load])]) results = workflow.execute() assert len(results.outputs) == 1 assert results.outputs["result-name"] == test_eopatch
def test_multiedge_workflow(): in_node = EONode(InputTask()) inc_node = EONode(IncTask(), inputs=[in_node]) div_node = EONode(DivideTask(), inputs=[inc_node, inc_node]) output_node = EONode(OutputTask(name="out"), inputs=[div_node]) workflow = EOWorkflow([in_node, output_node, inc_node, div_node]) arguments_dict = {in_node: {"val": 2}} workflow_res = workflow.execute(arguments_dict) assert workflow_res.outputs["out"] == 1
def _execute_process_graph(process_graph, job_id, variables): # This is what we are aiming for: # # loadco1 = load_collectionEOTask(process_graph["loadco1"]["arguments"]) # ndvi1 = ndviEOTask(process_graph["ndvi1"]["arguments"]) # reduce1 = reduceEOTask(process_graph["reduce1"]["arguments"]) # tasks = [ # (loadco1, [], "Node name: load1"), # (ndvi1, [loadco1], "Node name: ndvi1"), # (reduce1, [ndvi1], "Node name: reduce1"), # ] # # workflow = EOWorkflow(tasks) # workflow.execute({}) # first create all the tasks and remember their names, so we will be able # to reference them when looking for tasks that current task depends on: tasks_by_name = {} result_task = None for node_name, node_definition in process_graph.items(): # We would like to instantiate an appropriate EOTask based on # process_id, like this: # tasks_by_name[node_name] = \ # load_collectionEOTask(node_definition['arguments'], ...) process_id = node_definition['process_id'] task_module_name = '{process_id}'.format(process_id=process_id) task_class_name = '{process_id}EOTask'.format(process_id=process_id) task_module = getattr(sys.modules[__name__].process, task_module_name) task_class = getattr(task_module, task_class_name) tasks_by_name[node_name] = task_class(node_definition['arguments'], job_id, logger, variables, node_name) if node_definition.get('result', False): result_task = tasks_by_name[node_name] if process_id != 'save_result': raise process.VariableValueMissing( "No value specified for process graph variable 'save_result'." ) # create a list of tasks for workflow: tasks = [] for node_name, task in tasks_by_name.items(): depends_on = [tasks_by_name[x] for x in task.depends_on()] tasks.append((task, depends_on, 'Node name: ' + node_name)) workflow = EOWorkflow(tasks) logger.debug("[{}]: executing workflow...".format(job_id)) result = workflow.execute({}) logger.debug("[{}]: workflow executed.".format(job_id)) return result_task.results
def process(self, arguments): data = self.validate_parameter(arguments, "data", required=True, allowed_types=[xr.DataArray]) process = self.validate_parameter(arguments, "process", required=True) dependencies, result_task = self.generate_workflow_dependencies( process["callback"], arguments) workflow = EOWorkflow(dependencies) all_results = workflow.execute({}) return all_results[result_task]
def process(self, arguments): data = self.validate_parameter(arguments, "data", required=True, allowed_types=[xr.DataArray]) dimension = self.validate_parameter(arguments, "dimension", required=True, allowed_types=[str]) reducer = self.validate_parameter(arguments, "reducer", default=None) target_dimension = self.validate_parameter( arguments, "target_dimension", default=None, allowed_types=[str, type(None)]) binary = self.validate_parameter(arguments, "binary", default=False, allowed_types=[bool]) if dimension not in data.dims: raise ProcessArgumentInvalid( "The argument 'dimension' in process 'reduce' is invalid: Dimension '{}' does not exist in data." .format(dimension)) if reducer is None: if data[dimension].size > 1: raise ProcessArgumentInvalid( "The argument 'dimension' in process 'reduce' is invalid: Dimension '{}' has more than one value, but reducer is not specified." .format(dimension)) return data.squeeze(dimension, drop=True) else: if not data.attrs.get("reduce_by"): arguments["data"].attrs["reduce_by"] = [dimension] else: arguments["data"].attrs["reduce_by"].append(dimension) dependencies, result_task = self.generate_workflow_dependencies( reducer["callback"], arguments) workflow = EOWorkflow(dependencies) all_results = workflow.execute({}) result = all_results[result_task] result.attrs["reduce_by"].pop() if target_dimension: result = xr.concat(result, dim=target_dimension) return result
def test_trivial_workflow(self): task = DummyTask() dep = Dependency(task, []) workflow = EOWorkflow([dep]) result = workflow.execute() self.assertTrue(isinstance(result, WorkflowResults)) self.assertEqual(len(result), 1) self.assertEqual(len(result.keys()), 1) self.assertEqual(len(result.values()), 1) items = list(result.items()) self.assertEqual(len(items), 1) self.assertTrue(isinstance(items[0][0], EOTask)) self.assertEqual(items[0][1], 42) self.assertEqual(result[dep], 42)
def test_workflow_results(): input_node = EONode(InputTask()) output_node = EONode(OutputTask(name="out"), inputs=[input_node]) workflow = EOWorkflow([input_node, output_node]) results = workflow.execute({input_node: {"val": 10}}) assert isinstance(results, WorkflowResults) assert results.outputs == {"out": 10} results_without_outputs = results.drop_outputs() assert results_without_outputs.outputs == {} assert id(results_without_outputs) != id(results) assert isinstance(results.start_time, dt.datetime) assert isinstance(results.end_time, dt.datetime) assert results.start_time < results.end_time < dt.datetime.now() assert isinstance(results.stats, dict) assert len(results.stats) == 2 for node in [input_node, output_node]: stats_uid = node.uid assert isinstance(results.stats.get(stats_uid), NodeStats)