def test_process_graph_not_found(self): """ Checks if an error is thrown when a process graph file cannot be found. """ pg_filepath = os.path.join(self.pg_dirpath, "does_not_exist.json") try: translate_process_graph(pg_filepath) except FileNotFoundError: assert True
def test_process_graph_not_found(self): """ Checks if an error is thrown when a process graph file cannot be found. """ try: translate_process_graph(self.non_existing_filepath) except FileNotFoundError: assert True
def test_get_parent_process(self): """ Tests to retrieve the parent process of an embedded process graph. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) lsr_node = graph['linear_scale_range_1'] apply_node = graph['apply_0'] assert lsr_node.parent_process == apply_node
def run_job_id(endPoint: str, job_id: UUID, user: str = None, path: str = None, tileindex: int = None, tiletotal: int = None): # endPoint='https://jeodpp.jrc.ec.europa.eu/openeo/jobs' try: httpGet = endPoint + '/' + job_id sendRequest = requests.get(httpGet) sendRequest.raise_for_status() if sendRequest: job_metadata = sendRequest.json() else: raise urllib.error.ContentTooShortError process_graph = job_metadata["process"] # process_graph = get_job_process_graph(db_session, job_id) #todo: cannot use db here, as long as we are not in the docker-compose scope # update_job_status(db_session, job_id, models.job.ProcessStatus.running) jeodpp = BackEnd('jeodpp', user=user, path=path) graph = translate_process_graph(process_graph) jeodpp.processGraph(graph.sort(), tileindex, tiletotal) #todo: cannot use db here, as long as we are not in the docker-compose scope # update_job_status(db_session, job_id, models.job.ProcessStatus.finished) return "finished" except: print("Erorr: job {} did not process, error returned".format(job_id)) #todo: cannot use db here, as long as we are not in the docker-compose scope # update_job_status(db_session, job_id, models.job.ProcessStatus.error) return "error"
def test_from_global_parameter(self): """ Tests parsing of a globally defined parameter. """ global_parameter_filepath = os.path.join(self.pg_dirpath, "s2_max_ndvi_global_parameter.json") parameters = {'test_from_parameter': 3} graph = translate_process_graph(global_parameter_filepath, parameters=parameters) assert graph['ndvi_6'].arguments['y'] == 3
def test_from_local_parameter(self): """ Tests parsing of a locally defined parameter. """ pg_filepath = os.path.join(self.pg_dirpath, "s2_max_ndvi_local_parameter.json") graph = translate_process_graph(pg_filepath) assert graph['ndvi_6'].arguments['y'] == 3
def test_lc_from_global_parameters(self): """ Tests parsing of globally defined parameters given in the process graph itself. """ pg_filepath = os.path.join(self.pg_dirpath, "lc_global_parameter.json") graph = translate_process_graph(pg_filepath) assert graph['dc_0'].arguments['bands'] == ['B08', 'B04', 'B02'] assert graph['dc_0'].arguments['id'] == 'COPERNICUS/S2'
def test_job(): """Create a xarray/opendatacube job based on an openEO process graph.""" # Set input parameters tests_folder = os.path.dirname(os.path.abspath(__file__)) process_graph_json = os.path.join(tests_folder, "process_graphs/evi.json") process_defs = json.load( open(os.path.join(tests_folder, 'backend_processes.json')))['processes'] odc_env = 'default' odc_url = 'tcp://xx.yyy.zz.kk:8786' graph = translate_process_graph(process_graph_json, process_defs).sort(by='result') # Check if process graph is valid validate_processes(graph, process_defs) nodes = map_to_odc(graph, odc_env, odc_url) # Write to disk with open("evi_odc.py", "w") as f: for node in nodes: f.write(nodes[node]) # Check it matches the reference file f_name = "evi_odc" with open(f_name + ".py") as f: this_file = f.readlines() with open(os.path.join(tests_folder, f"ref_jobs/{f_name}_ref.py")) as f: ref_file = f.readlines() assert this_file == ref_file # Clean up os.remove(f_name + ".py")
def test_get_dimension(self): """ Tests dimension retrieval. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) apply_node = graph['apply_0'] assert apply_node.dimension is None reduce_node = graph['reduce_time_7'] assert reduce_node.dimension == 't'
def test_is_reducer(self): """ Tests reducer identification. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) apply_node = graph['apply_0'] assert not apply_node.is_reducer reduce_node = graph['reduce_time_7'] assert reduce_node.is_reducer
def test_lc_sub_processes(self): """ Tests correct linkage of sub-processes in load collection process. """ pg_filepath = os.path.join(self.pg_dirpath, "lc_sub_processes.json") graph = translate_process_graph(pg_filepath) assert len(graph) == 3 assert len(graph['loadco1_0'].result_processes) == 2 assert list(graph['cc_1'].input_data_processes.ids)[0] == 'loadco1_0' assert list(graph['pf_2'].input_data_processes.ids)[0] == 'loadco1_0' assert list(graph['cc_1'].output_data_processes.ids)[0] == 'loadco1_0' assert list(graph['pf_2'].output_data_processes.ids)[0] == 'loadco1_0'
def validate_process_graph(pg_filepath, collections_src, processes_src=None, parameters=None): """ Validate the input process graph with respect to: - processes - collections - node names Parameters ---------- pg_filepath : str or dict File path to process graph (json file) or parsed file as a dictionary. collections_src : dict or str or list It can be: - dictionary of loaded collection definitions (keys are the collection ID's) - directory path to collections (.json) - URL of the remote collection endpoint (e.g., "https://earthengine.openeo.org/v1.0/collections") - list of loaded collection definitions processes_src : dict or str or list, optional It can be: - dictionary of loaded process definitions (keys are the process ID's) - directory path to processes (.json) - URL of the remote process endpoint (e.g., "https://earthengine.openeo.org/v1.0/processes") - list of loaded process definitions The default value points to the "processes" repository of the parser. parameters : dict, optional Globally defined parameters, which can be used in 'from_parameter'. Returns ------- valid : bool If True, the given process graph is valid. err_msgs : list List of strings containing error or user information messages if `valid` is False. """ # define source of process definitions process_defs = os.path.join(os.path.dirname(__file__), "processes") \ if processes_src is None else processes_src process_graph = translate_process_graph(pg_filepath, process_defs=process_defs, parameters=parameters) proc_valid, proc_err_msgs = validate_processes(process_graph, process_defs) coll_valid, coll_err_msgs = validate_collections(process_graph, collections_src) pg_err_msgs = proc_err_msgs + coll_err_msgs pg_valid = proc_valid & coll_valid return pg_valid, pg_err_msgs
def run_job_graph(process_graph: dict, user: str = None, path: str = None, tileindex: int = None, tiletotal: int = None): try: jeodpp = BackEnd('jeodpp', user=user, path=path) translated_graph = translate_process_graph(process_graph) jeodpp.processGraph(translated_graph.sort(), tileindex, tiletotal) return "finished" except: return "error"
def test_sort_process_graph(self): """ Tests sorting of a process graph. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) assert list(graph.ids) == [ "apply_0", "linear_scale_range_1", "load_collection_2", "reduce_bands_3", "red_4", "nir_5", "ndvi_6", "reduce_time_7", "max_8", "save_9" ] sorted_graph = graph.sort(by='dependency') assert list(sorted_graph.ids) == [ "load_collection_2", "reduce_bands_3", "red_4", "nir_5", "ndvi_6", "reduce_time_7", "max_8", "apply_0", "linear_scale_range_1", "save_9" ]
def openeo_to_eodatareaders(process_graph_json_in: Union[dict, str], job_data: str, process_defs: Union[dict, list, str], vrt_only: bool = False, existing_node_ids: List[Tuple] = None) \ -> Tuple[List[Tuple[str, List[str], Optional[str], List[str], str]], Graph]: """ This function translates an OpenEO process graph into a sequence of calls to EODataProcessor, one for each node of the process graph. Each openEO process is wrapped into an apply/reduce call using EODataProcessor methods. """ # Translate openEO PG to traversable object if isinstance(process_graph_json_in, dict): process_graph_json = deepcopy(process_graph_json_in) else: process_graph_json = process_graph_json_in graph = translate_process_graph( process_graph_json, process_defs=process_defs).sort(by='dependency') # Get wrapper processes -> TODO: is this really needed? wrapper_processes = get_wrapper_processes() nodes = [] N_nodes = len(graph.ids) last_node = False for k, node_id in enumerate(graph.ids): cur_node = graph[node_id] wrapper_name = None wrapper_dimension = None node_dependencies = None if k + 1 == N_nodes: last_node = True if cur_node.is_reducer: # Current process is classified as "reducer" in its process definition if cur_node.parent_process: # Current process has parent, must be an embedded process graph wrapper_name = cur_node.parent_process.process_id wrapper_dimension = cur_node.parent_process.dimension else: # Current process is of type "reducer" but has no parent, must be one of these processes: # "reduce_dimension", "reduce_dimension_binary" wrapper_name = cur_node.process_id wrapper_dimension = cur_node.dimension else: wrapper_name = cur_node.process_id recuder_dimension = None # for clarity, this will be needed when also 'apply_dimension' is supported by EODataProcessor # Workaround for process "array_element" until it has the category "reducer" set # TODO remove when the process definition is updated if (not cur_node.is_reducer) and (cur_node.parent_process): # Current process has parent, must be an embedded process graph wrapper_name = cur_node.parent_process.process_id wrapper_dimension = cur_node.parent_process.dimension # NB find better solution if wrapper_dimension: wrapper_dimension = check_dim_name(wrapper_dimension) if cur_node.content['process_id'] == 'run_udf': operator = "UdfExec" params = map_udf(cur_node.content, job_data, cur_node.id) else: operator = "EODataProcessor" params = map_process(cur_node.content, cur_node.id, cur_node.is_result, job_data, wrapper_name=wrapper_name, wrapper_dimension=wrapper_dimension, vrt_only=vrt_only, last_node=last_node) # Get dependencies if cur_node.result_process and (cur_node.process_id in wrapper_processes): # The current process is a wrapper process, which embeds a process graph # Its only dependency is the node in the embedded process graph with 'result' set to True. node_dependencies = [cur_node.result_process.id] else: node_dependencies = list(cur_node.dependencies.ids) # Add to nodes list nodes.append((cur_node.id, params, node_dependencies, operator)) return nodes, graph
def test_has_descendant_process(self): """ Tests if a node has a descendant process. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) dc_node = graph['load_collection_2'] assert dc_node.has_descendant_process(graph, 'save_result')
def test_get_node_by_name(self): """ Tests node access in a graph by node name. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) apply_node = graph['apply'] assert apply_node.id == 'apply_0'
def test_to_igraph(self): """ Tests conversion of internal graph to an iGraph object. """ graph = translate_process_graph(self.max_ndvi_pg_filepath) graph.to_igraph(edge_name="process") assert True
def test_translate_process_graph(self): """ Translates a process graph from openEO syntax to a Python traversable object. """ graph = translate_process_graph(self.uc1_polarization_pg_filepath) print(graph) assert True
def test_translate_process_graph_none_params(self): """Translate a minimal process graph with all allowed values set to None.""" pg_file = os.path.join(self.pg_dirpath, "none.json") graph = translate_process_graph(pg_file) print(graph) assert True
def test_translate_process_graph(self): """ Translates a process graph from openEO syntax to a Python traversable object. """ pg_filepath = os.path.join(self.pg_dirpath, "s1_uc1_polarization.json") graph = translate_process_graph(pg_filepath) print(graph) assert True
def main(): parser = argparse.ArgumentParser() parser.add_argument("-process_graph", "--process_graph", help="process_graph", dest="process_graph", required=False, type=str) parser.add_argument("-job_id", "--job_id", help="job id (UUID)", dest="job_id", required=False, type=UUID) parser.add_argument("-user", "--user", help="user id", dest="user", required=False, type=str, default=None) parser.add_argument("-path", "--path", help="path to save output", dest="path", required=False, type=str, default=None) parser.add_argument("-tileindex", "--tileindex", help="tileindex to split input", dest="tileindex", required=False, type=int, default=None) parser.add_argument("-tiletotal", "--tiletotal", help="total to split input", dest="tiletotal", required=False, type=int, default=None) args = parser.parse_args() if args.path is not None: path = os.path.join(args.path, job_id) try: os.mkdir(path) except OSError: print("The creation of the directory {} has failed".format(path)) else: path = None if args.process_graph is not None: jeodpp = BackEnd('jeodpp', user=args.user, path=path) graph = translate_process_graph(process_graph) jeodpp.processGraph(graph.sort(), tileindex, tiletotal) run_job_graph(args.process_graph, args.user, args.path, args.tileindex, args.tiletotal) elif args.job_id is not None: run_job_id(args.job_id, args.user, args.path, args.tileindex, args.tiletotal) else: print( "Erorr: either process_graph or a db_session with job_id must be provided" )