def test_parse_eli_geometry(in_geojson: Dict[str, Any], expected_wkt: str): """Test conversion of "ELI" geometries to Geojson compatible geometries""" assert parse_eli_geometry(in_geojson).wkt == expected_wkt # type: ignore
def check_wms(source, info_msgs, warning_msgs, error_msgs): """ Check WMS source Parameters ---------- source : dict Source dictionary info_msgs : list Good messages warning_msgs: list Warning messages error_msgs: list: Error Messages """ wms_url = source["properties"]["url"] source_headers = get_http_headers(source) params = ["{proj}", "{bbox}", "{width}", "{height}"] missingparams = [p for p in params if p not in wms_url] if len(missingparams) > 0: error_msgs.append( f"The following values are missing in the URL: {','.join(missingparams)}" ) wms_args = {} u = urlparse(wms_url) url_parts = list(u) for k, v in parse_qsl(u.query, keep_blank_values=True): wms_args[k.lower()] = v def validate_wms_getmap_url(): """ Layers and styles can contain whitespaces. Ignore them here. They are checked against GetCapabilities later. """ url_parts_without_layers = "&".join([ f"{key}={value}" for key, value in wms_args.items() if key not in {"layers", "styles"} ]) parts = url_parts.copy() parts[4] = url_parts_without_layers url = urlunparse(parts).replace("{", "").replace("}", "") return validators.url(url) if not validate_wms_getmap_url(): error_msgs.append(f"URL validation error: {wms_url}") # Check mandatory WMS GetMap parameters (Table 8, Section 7.3.2, WMS 1.3.0 specification) missing_request_parameters = set() is_esri = "request" not in wms_args if is_esri: required_parameters = [ "f", "bbox", "size", "imageSR", "bboxSR", "format" ] else: required_parameters = [ "version", "request", "layers", "bbox", "width", "height", "format", ] for request_parameter in required_parameters: if request_parameter.lower() not in wms_args: missing_request_parameters.add(request_parameter) # Nothing more to do for esri rest api if is_esri: return if "version" in wms_args and wms_args["version"] == "1.3.0": if "crs" not in wms_args: missing_request_parameters.add("crs") if "srs" in wms_args: error_msgs.append( f"WMS {wms_args['version']} urls should not contain SRS parameter." ) elif "version" in wms_args and not wms_args["version"] == "1.3.0": if "srs" not in wms_args: missing_request_parameters.add("srs") if "crs" in wms_args: error_msgs.append( f"WMS {wms_args['version']} urls should not contain CRS parameter." ) if len(missing_request_parameters) > 0: missing_request_parameters_str = ",".join(missing_request_parameters) error_msgs.append( f"Parameter '{missing_request_parameters_str}' is missing in url.") return # Styles is mandatory according to the WMS specification, but some WMS servers seems not to care if "styles" not in wms_args: warning_msgs.append( "Parameter 'styles' is missing in url. 'STYLES=' can be used to request default style." ) # We first send a service=WMS&request=GetCapabilities request to server # According to the WMS Specification Section 6.2 Version numbering and negotiation, the server should return # the GetCapabilities XML with the highest version the server supports. # If this fails, it is tried to explicitly specify a WMS version exceptions = [] wms = None for wmsversion in [None, "1.3.0", "1.1.1", "1.1.0", "1.0.0"]: if wmsversion is None: wmsversion_str = "-" else: wmsversion_str = wmsversion try: wms_getcapabilites_url = wmshelper.get_getcapabilities_url( wms_url, wmsversion) r = requests.get(wms_getcapabilites_url, headers=source_headers) xml = r.text wms = wmshelper.parse_wms(xml) if wms is not None: break except Exception as e: exceptions.append(f"WMS {wmsversion_str}: Error: {e}") continue if wms is None: for msg in exceptions: error_msgs.append(msg) return for access_constraint in wms["AccessConstraints"]: info_msgs.append(f"AccessConstraints: {access_constraint}") for fee in wms["Fees"]: info_msgs.append(f"Fee: {fee}") if source["geometry"] is None: geom = None else: geom = eliutils.parse_eli_geometry(source["geometry"]) # Check layers if "layers" in wms_args: layer_arg = wms_args["layers"] layers = layer_arg.split(",") not_found_layers = [] for layer_name in layer_arg.split(","): if layer_name not in wms["layers"]: not_found_layers.append(layer_name) if len(not_found_layers) > 0: error_msgs.append( f"Layers '{','.join(not_found_layers)}' not advertised by WMS GetCapabilities " "request.") # Check source geometry against layer bounding box # Regardless of its projection, each layer should advertise an approximated bounding box in lon/lat. # See WMS 1.3.0 Specification Section 7.2.4.6.6 EX_GeographicBoundingBox if geom is not None and geom.is_valid: max_outside = 0.0 for layer_name in layers: if layer_name in wms["layers"]: bbox = wms["layers"][layer_name]["BBOX"] geom_bbox = box(*bbox) geom_outside_bbox = geom.difference(geom_bbox) area_outside_bbox = geom_outside_bbox.area / geom.area * 100.0 max_outside = max(max_outside, area_outside_bbox) # 5% is an arbitrary chosen value and should be adapted as needed if max_outside > 5.0: error_msgs.append( f"{round(area_outside_bbox, 2)}% of geometry is outside of the layers bounding box. " "Geometry should be checked") # Check styles if "styles" in wms_args: style = wms_args["styles"] # default style needs not to be advertised by the server if not (style == "default" or style == "" or style == "," * len(layers)): styles = wms_args["styles"].split(",") if not len(styles) == len(layers): error_msgs.append( "Not the same number of styles and layers.") else: for layer_name, style in zip(layers, styles): if (len(style) > 0 and not style == "default" and layer_name in wms["layers"] and style not in wms["layers"][layer_name]["Styles"]): error_msgs.append( f"Layer '{layer_name}' does not support style '{style}'" ) # Check CRS crs_should_included_if_available = {"EPSG:4326", "EPSG:3857", "CRS:84"} if "available_projections" not in source["properties"]: error_msgs.append( "source is missing 'available_projections' element.") else: for layer_name in layers: if layer_name in wms["layers"]: not_supported_crs = set() for crs in source["properties"]["available_projections"]: if crs.upper() not in wms["layers"][layer_name]["CRS"]: not_supported_crs.add(crs) if len(not_supported_crs) > 0: supported_crs_str = ",".join( wms["layers"][layer_name]["CRS"]) not_supported_crs_str = ",".join(not_supported_crs) warning_msgs.append( f"Layer '{layer_name}': CRS '{not_supported_crs_str}' not in: {supported_crs_str}. Some server support CRS which are not advertised." ) supported_but_not_included = set() for crs in crs_should_included_if_available: if (crs not in source["properties"] ["available_projections"] and crs in wms["layers"][layer_name]["CRS"]): supported_but_not_included.add(crs) if len(supported_but_not_included) > 0: supported_but_not_included_str = ",".join( supported_but_not_included) warning_msgs.append( f"Layer '{layer_name}': CRS '{supported_but_not_included_str}' not included in available_projections but " "supported by server.") if wms_args["version"] < wms["version"]: warning_msgs.append( f"Query requests WMS version '{wms_args['version']}', server supports '{wms['version']}'" ) # Check formats imagery_format = wms_args["format"] imagery_formats_str = "', '".join(wms["formats"]) if imagery_format not in wms["formats"]: error_msgs.append( f"Format '{imagery_format}' not in '{imagery_formats_str}'.") if ("category" in source["properties"] and "photo" in source["properties"]["category"]): if "jpeg" not in imagery_format and "jpeg" in imagery_formats_str: warning_msgs.append( f"Server supports JPEG, but '{imagery_format}' is used. " "JPEG is typically preferred for photo sources, but might not be always " "the best choice. " f"(Server supports: '{imagery_formats_str}')")
async def process_source(filename, session: ClientSession): try: async with aiofiles.open(filename, mode="r", encoding="utf-8") as f: contents = await f.read() source = json.loads(contents) # Exclude sources # Skip non wms layers if not source["properties"]["type"] == "wms": return # check if it is esri rest and not wms if "bboxSR" in source["properties"]["url"]: return if "available_projections" not in source["properties"]: return if "header" in source["properties"]["url"]: return if "geometry" not in source: return processed_sources.add(filename) category = source["properties"].get("category", None) if source["geometry"] is None: geom = box(-180, -90, 180, 90) pt = Point(7.44, 46.56) else: geom = eliutils.parse_eli_geometry(source["geometry"]) pt = geom.representative_point() test_zoom_level = ZOOM_LEVEL if "min_zoom" in source["properties"]: test_zoom_level = max(source["properties"]["min_zoom"], test_zoom_level) if "max_zoom" in source["properties"]: test_zoom_level = min(source["properties"]["max_zoom"], test_zoom_level) old_url = source["properties"]["url"] old_projections = source["properties"]["available_projections"] # Get existing image hash original_img_messages = [] status, image_hash = await get_image( url=old_url, available_projections=old_projections, lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=original_img_messages, ) if not status == ImageHashStatus.SUCCESS or image_hash is None: ignored_sources[ filename] = "Not possible to download reference image" # We are finished if it was not possible to get the image return if max_count(str(image_hash)) == 16: if ("category" in source["properties"] and "photo" in source["properties"]["category"]): msgs = "\n\t".join(original_img_messages) logging.warning( f"{filename}: has category {category} but image hash is {image_hash}:\n\t{msgs}" ) # These image hashes indicate that the downloaded image is not useful to determine # if the updated query returns the same image error_msgs = "\n\t".join(original_img_messages) logging.warning( f"{filename}: Image hash {image_hash} not useful ({category}): \n\t{error_msgs}" ) ignored_sources[ filename] = f"Image hash {image_hash} not useful ({category})" return # Update wms wms_messages = [] result = await update_wms(old_url, session, wms_messages) if result is None: error_msgs = "\n\t".join(wms_messages) logging.info( f"{filename}: Not possible to update wms url:\n\t{error_msgs}") ignored_sources[filename] = "Not possible to update wms url" return new_url = result["url"] new_projections = result["available_projections"] del result # Download image for updated url new_img_messages = [] new_status, new_image_hash = await get_image( url=new_url, available_projections=new_projections, lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=new_img_messages, ) if not new_status == ImageHashStatus.SUCCESS or new_image_hash is None: error_msgs = "\n\t".join(new_img_messages) logging.warning( f"{filename}: Could not download image with updated url: {new_status}\n\t{error_msgs}" ) ignored_sources[ filename] = "Could not download image with updated url" return # Only sources are updated where the new query returns the same image if not image_similar(image_hash, new_image_hash, test_zoom_level): error_original_img_messages = "\n\t".join(original_img_messages) error_new_img_messages = "\n\t".join(new_img_messages) logging.info( f"{filename}: ImageHash not the same for: {filename}: {image_hash} - {new_image_hash}: {image_hash - new_image_hash}\n\t{error_original_img_messages} \n\t{error_new_img_messages}" ) ignored_sources[ filename] = f"ImageHash for reference image and image with updated url differs: {image_hash} - {new_image_hash}: {image_hash - new_image_hash}" return # Test if selected projections work despite not being advertised for EPSG in {"EPSG:3857", "EPSG:4326"}: if EPSG not in new_projections: epsg_check_messages = [] epsg_image_status, epsg_image_hash = await get_image( url=new_url, available_projections=[EPSG], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=epsg_check_messages, ) epsg_check_messages_str = "\n\t".join(epsg_check_messages) logging.info( f"{filename}: Test if projection {EPSG} works despite not advertised:\n\t{epsg_check_messages_str}" ) if epsg_image_status == ImageHashStatus.NETWORK_ERROR: if EPSG in old_projections and EPSG not in new_projections: new_projections.add(EPSG) added_projections[filename][ "Network error, but projection was previously included."].append( EPSG) elif epsg_image_status == ImageHashStatus.SUCCESS: # Relax similarity constraint to account for differences due to reprojection hash_diff = image_hash - epsg_image_hash if image_similar(image_hash, epsg_image_hash, test_zoom_level): new_projections.add(EPSG) added_projections[filename][ "Projection returns similar image despite not advertised."].append( EPSG) logging.info( f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {image_hash}: {hash_diff}" ) elif epsg_image_hash is not None: logging.info( f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {image_hash}: {hash_diff}" ) else: logging.info( f"{filename}: Do not add {EPSG} No image returned." ) # Servers might support projections that are not used in the area covered by a source # Keep only EPSG codes that are used in the area covered by the sources geometry if source["geometry"] is not None: epsg_outside_area_of_use = set() for epsg in new_projections: try: if epsg == "CRS:84": continue crs = CRS.from_string(epsg) area_of_use = crs.area_of_use crs_box = box( area_of_use.west, area_of_use.south, area_of_use.east, area_of_use.north, ) if not crs_box.intersects(geom): epsg_outside_area_of_use.add(epsg) except Exception as e: logging.exception( f"{filename}: Could not check area of use for projection {epsg}: {e}" ) continue if len(new_projections) == len(epsg_outside_area_of_use): logging.error( f"{filename}: epsg_outside_area_of_use filter removes all EPSG" ) if len(epsg_outside_area_of_use) > 0: if len(epsg_outside_area_of_use) <= 10: removed_projections[filename][ "EPSG outside area of use"].extend( list(epsg_outside_area_of_use)) else: removed_projections[filename][ "EPSG outside area of use"].extend( list(epsg_outside_area_of_use)[:10] + [ "...", f"+ {len(epsg_outside_area_of_use)-10} more" ]) new_projections -= epsg_outside_area_of_use # Servers that report a lot of projection may be configured wrongly # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised if len(new_projections) > 15: filtered_projs = set() for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]: if proj in new_projections: filtered_projs.add(proj) for proj in old_projections: if proj in new_projections: filtered_projs.add(proj) new_projections = filtered_projs # Filter alias projections if "EPSG:3857" in new_projections: included_alias_projections = new_projections.intersection( wmshelper.epsg_3857_alias) if len(included_alias_projections) > 0: removed_projections[filename]["Alias projections"].extend( list(included_alias_projections)) new_projections -= included_alias_projections else: # if EPSG:3857 not present but alias, keep only alias with highest number to be consistent result_epsg_3857_alias = new_projections & wmshelper.epsg_3857_alias result_epsg_3857_alias_sorted = list( sorted( result_epsg_3857_alias, key=lambda x: (x.split(":")[0], int(x.split(":")[1])), reverse=True, )) if len(result_epsg_3857_alias_sorted) > 1: removed_projections[filename]["Alias projections"].extend( list(result_epsg_3857_alias_sorted[1:])) new_projections -= set(result_epsg_3857_alias_sorted[1:]) # Filter deprecated projections if len(new_projections - wmshelper.valid_epsgs) > 0: removed_projections[filename]["Deprecated projections"].extend( list(new_projections - wmshelper.valid_epsgs)) new_projections.intersection_update(wmshelper.valid_epsgs) # Check if projections are supported by server not_supported_projections = set() image_hashes = {} for proj in new_projections: proj_messages = [] proj_status, proj_image_hash = await get_image( url=new_url, available_projections=[proj], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=proj_messages, ) image_hashes[proj] = { "status": proj_status, "hash": proj_image_hash, "logs": proj_messages, } msgs = "\n\t".join(proj_messages) logging.info( f"{filename} Projection check: {proj}: {proj_status}:\n\t{msgs}" ) if proj_status == ImageHashStatus.IMAGE_ERROR: not_supported_projections.add(proj) removed_projections[filename][ "Projection check: does not return an image"].append(proj) elif proj_status == ImageHashStatus.NETWORK_ERROR: # If not sucessfull status do not add if not previously addedd if proj not in old_projections: removed_projections[filename][ "Projection check: network error and previously not included"].append( proj) not_supported_projections.add(proj) if len(not_supported_projections) > 0: removed = ",".join(not_supported_projections) logging.info( f"{filename}: remove projections that are advertised but do not return an image: {removed}" ) new_projections -= not_supported_projections # Check if EPSG:3857 and EPSG:4326 are similar if ("EPSG:3857" in image_hashes and "EPSG:4326" in image_hashes and image_hashes["EPSG:3857"]["status"] == ImageHashStatus.SUCCESS and image_hashes["EPSG:4326"]["status"] == ImageHashStatus.SUCCESS): img_hash_3857 = image_hashes["EPSG:3857"]["hash"] img_hash_4326 = image_hashes["EPSG:4326"]["hash"] if not image_similar(img_hash_3857, img_hash_4326, test_zoom_level): msgs = "\n\t".join(image_hashes["EPSG:3857"]["logs"] + image_hashes["EPSG:4326"]["logs"]) logging.warning( f"{filename}: ({category}) ImageHash for EPSG:3857 and EPSG:4326 not similiar: {img_hash_3857} - {img_hash_4326}: {img_hash_3857-img_hash_4326}:\n\t{msgs}" ) # Check if only formatting has changed url_has_changed = not compare_urls(source["properties"]["url"], new_url) projections_have_changed = not compare_projs( source["properties"]["available_projections"], new_projections, ) if url_has_changed: source["properties"]["url"] = new_url if projections_have_changed: source["properties"]["available_projections"] = list( sorted( new_projections, key=lambda x: (x.split(":")[0], int(x.split(":")[1])), )) if url_has_changed or projections_have_changed: with open(filename, "w", encoding="utf-8") as out: json.dump(source, out, indent=4, sort_keys=False, ensure_ascii=False) out.write("\n") except Exception as e: logging.exception( f"{filename}: Error occured while processing source: {e}")
async def process_source(filename: str, session: ClientSession): try: async with aiofiles.open(filename, mode="r", encoding="utf-8") as f: contents = await f.read() source = json.loads(contents) # Exclude sources # Skip non wms layers if not source["properties"]["type"] == "wms": return # check if it is esri rest and not wms if "bboxSR" in source["properties"]["url"]: return if "available_projections" not in source["properties"]: return if "header" in source["properties"]["url"]: return if "geometry" not in source: return processed_sources.add(filename) category = source["properties"].get("category", None) if source["geometry"] is None: geom: MultiPolygon | Polygon = box(-180, -90, 180, 90) pt: Point = Point(7.44, 46.56) else: geom = eliutils.parse_eli_geometry(source["geometry"]) pt: Point = geom.representative_point() # type: ignore test_zoom_level = ZOOM_LEVEL if "min_zoom" in source["properties"]: test_zoom_level = max(source["properties"]["min_zoom"], test_zoom_level) if "max_zoom" in source["properties"]: test_zoom_level = min(source["properties"]["max_zoom"], test_zoom_level) old_url = source["properties"]["url"] old_projections = source["properties"]["available_projections"] # Get existing image hash original_img_messages: List[str] = [] original_image_result = await get_image( url=old_url, available_projections=old_projections, lon=pt.x, # type: ignore lat=pt.y, # type: ignore zoom=test_zoom_level, session=session, messages=original_img_messages, ) if not original_image_result.status == ImageHashStatus.SUCCESS or original_image_result.image_hash is None: # type: ignore ignored_sources[ filename] = "Not possible to download reference image" # We are finished if it was not possible to get the image return if max_count(str( original_image_result.image_hash)) == 16: # type: ignore if "category" in source["properties"] and "photo" in source[ "properties"]["category"]: msgs = "\n\t".join(original_img_messages) logging.warning( f"{filename}: has category {category} but image hash is {original_image_result.image_hash}:\n\t{msgs}" ) # type: ignore # These image hashes indicate that the downloaded image is not useful to determine # if the updated query returns the same image error_msgs = "\n\t".join(original_img_messages) logging.warning( f"{filename}: Image hash {original_image_result.image_hash} not useful ({category}): \n\t{error_msgs}" ) # type: ignore ignored_sources[ filename] = f"Image hash {original_image_result.image_hash} not useful ({category})" # type: ignore return # Update wms wms_messages: List[str] = [] result = await update_wms(old_url, session, wms_messages) if result is None: error_msgs = "\n\t".join(wms_messages) logging.info( f"{filename}: Not possible to update wms url:\n\t{error_msgs}") ignored_sources[filename] = "Not possible to update wms url" return new_url, new_projections = result del result # Servers that report a lot of projection may be configured wrongly # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised if len(new_projections) > 15: filtered_projs: Set[str] = set() for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]: if proj in new_projections: filtered_projs.add(proj) for proj in old_projections: if proj in new_projections: filtered_projs.add(proj) new_projections = filtered_projs # Download image for updated url new_img_messages: List[str] = [] updated_image_result = await get_image( url=new_url, available_projections=new_projections, lon=pt.x, # type: ignore lat=pt.y, # type: ignore zoom=test_zoom_level, session=session, messages=new_img_messages, ) if not updated_image_result.status == ImageHashStatus.SUCCESS or updated_image_result.image_hash is None: # type: ignore error_msgs = "\n\t".join(new_img_messages) logging.warning( f"{filename}: Could not download image with updated url: {updated_image_result.status}\n\t{error_msgs}" ) ignored_sources[ filename] = "Could not download image with updated url" return # Only sources are updated where the new query returns the same image if not image_similar(original_image_result.image_hash, updated_image_result.image_hash, test_zoom_level): # type: ignore original_hash = original_image_result.image_hash # type: ignore new_hash = updated_image_result.image_hash # type: ignore hash_diff = original_hash - new_hash # type: ignore error_original_img_messages = "\n\t".join(original_img_messages) error_new_img_messages = "\n\t".join(new_img_messages) logging.info( f"{filename}: ImageHash not the same for: {filename}: {original_hash} - {new_hash}: {hash_diff}\n\t{error_original_img_messages} \n\t{error_new_img_messages}" ) ignored_sources[ filename] = f"ImageHash for reference image and image with updated url differs: {original_hash} - {new_hash}: {new_hash}" return # Test if selected projections work despite not being advertised for EPSG in {"EPSG:3857", "EPSG:4326"}: if EPSG not in new_projections: epsg_check_messages: List[str] = [] epsg_image_result = await get_image( url=new_url, available_projections=[EPSG], lon=pt.x, # type: ignore lat=pt.y, # type: ignore zoom=test_zoom_level, session=session, messages=epsg_check_messages, ) epsg_check_messages_str = "\n\t".join(epsg_check_messages) logging.info( f"{filename}: Test if projection {EPSG} works despite not advertised:\n\t{epsg_check_messages_str}" ) if epsg_image_result.status == ImageHashStatus.NETWORK_ERROR: if EPSG in old_projections and EPSG not in new_projections: new_projections.add(EPSG) added_projections[filename][ "Network error, but projection was previously included."].append( EPSG) elif epsg_image_result.status == ImageHashStatus.SUCCESS: epsg_image_hash = epsg_image_result.image_hash # type: ignore original_image_hash = original_image_result.image_hash # type: ignore # Relax similarity constraint to account for differences due to loss of quality due to re-projection hash_diff = original_image_result.image_hash - epsg_image_result.image_hash # type: ignore if image_similar(original_image_result.image_hash, epsg_image_result.image_hash, test_zoom_level): # type: ignore new_projections.add(EPSG) added_projections[filename][ "Projection returns similar image despite not advertised."].append( EPSG) logging.info( f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {original_image_hash}: {hash_diff}" ) elif epsg_image_hash is not None: logging.info( f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {original_image_hash}: {hash_diff}" ) else: logging.info( f"{filename}: Do not add {EPSG} No image returned." ) # Check if projections are supported by server not_supported_projections: Set[str] = set() image_hashes: Dict[str, Tuple[ImageResult, List[str]]] = {} for proj in new_projections: proj_messages: List[str] = [] epsg_image_result = await get_image( url=new_url, available_projections=[proj], lon=pt.x, # type: ignore lat=pt.y, # type: ignore zoom=test_zoom_level, session=session, messages=proj_messages, ) image_hashes[proj] = (epsg_image_result, proj_messages) msgs = "\n\t".join(proj_messages) logging.info( f"{filename} Projection check: {proj}: {epsg_image_result.status}:\n\t{msgs}" ) if epsg_image_result.status == ImageHashStatus.IMAGE_ERROR: not_supported_projections.add(proj) removed_projections[filename][ "Projection check: does not return an image"].append(proj) elif epsg_image_result.status == ImageHashStatus.NETWORK_ERROR: # If not successfully status do not add if not previously added if proj not in old_projections: removed_projections[filename][ "Projection check: network error and previously not included"].append( proj) not_supported_projections.add(proj) if len(not_supported_projections) > 0: removed = ",".join(not_supported_projections) logging.info( f"{filename}: remove projections that are advertised but do not return an image: {removed}" ) new_projections -= not_supported_projections # Check if EPSG:3857 and EPSG:4326 are similar if ("EPSG:3857" in image_hashes and "EPSG:4326" in image_hashes and image_hashes["EPSG:3857"][0].status == ImageHashStatus.SUCCESS and image_hashes["EPSG:4326"][0].status == ImageHashStatus.SUCCESS): img_hash_3857 = image_hashes["EPSG:3857"][ 0].image_hash # type: ignore img_hash_4326 = image_hashes["EPSG:4326"][ 0].image_hash # type: ignore diff_hash = img_hash_3857 - img_hash_4326 # type: ignore if not image_similar(img_hash_3857, img_hash_4326, test_zoom_level): msgs = "\n\t".join(image_hashes["EPSG:3857"][1] + image_hashes["EPSG:4326"][1]) logging.warning( f"{filename}: ({category}) ImageHash for EPSG:3857 and EPSG:4326 not similar: {img_hash_3857} - {img_hash_4326}: {diff_hash}:\n\t{msgs}" ) # Check projections again to filter out EPSG:3857 alias new_projections = eliutils.clean_projections(new_projections) # Check if only formatting has changed url_has_changed = not compare_urls(source["properties"]["url"], new_url) projections_have_changed = not compare_projs( source["properties"]["available_projections"], new_projections, ) if url_has_changed: source["properties"]["url"] = new_url if projections_have_changed: source["properties"]["available_projections"] = list( sorted( new_projections, key=lambda x: (x.split(":")[0], int(x.split(":")[1])), )) if url_has_changed or projections_have_changed: with open(filename, "w", encoding="utf-8") as out: json.dump(source, out, indent=4, sort_keys=False, ensure_ascii=False) out.write("\n") except Exception as e: logging.exception( f"{filename}: Error occurred while processing source: {e}")
def check_tms(source: Dict[str, Any], messages: List[Message]) -> None: """Check TMS source Parameters ---------- source : Dict[str, Any] The source messages : List[Message] The list to add messages to """ try: url = source["properties"]["url"] source_headers = get_http_headers(source) if source["geometry"] is None: geom = None else: geom = eliutils.parse_eli_geometry(source["geometry"]) # Validate URL try: _url = re.sub(r"switch:?([^}]*)", "switch", url).replace("{", "").replace("}", "") validators.url(_url) # type: ignore except validators.utils.ValidationFailure as e: messages.append( Message(level=MessageLevel.ERROR, message=f"URL validation error {e} / {url}")) # Check URL parameter parameters = {} # {z} instead of {zoom} if "{z}" in source["properties"]["url"]: messages.append( Message( level=MessageLevel.ERROR, message= f"Parameter {{z}} is used instead of {{zoom}} in tile url: {url}" )) return # We can't test sources that have an apikey, that is unknown to ELI if "{apikey}" in url: messages.append( Message( level=MessageLevel.WARNING, message= f"Not possible to check URL, apikey is required: {url}")) return # If URL contains a {switch:a,b,c} parameters, use the first for tests match = re.search(r"switch:?([^}]*)", url) if match is not None: switches = match.group(1).split(",") url = url.replace(match.group(0), "switch") parameters["switch"] = switches[0] # Check zoom levels min_zoom = 0 max_zoom = 22 if "min_zoom" in source["properties"]: min_zoom = int(source["properties"]["min_zoom"]) if "max_zoom" in source["properties"]: max_zoom = int(source["properties"]["max_zoom"]) # Check if we find a TileMap Resource to check for zoom levels # While there is a typical location for metadata, there is no requirement # that the metadata need to be located there. tms_url = tmshelper.TMSURL(url=url) tilemap_resource_url = tms_url.get_tilemap_resource_url() if tilemap_resource_url is not None: for tilemap_url in [ tilemap_resource_url, tilemap_resource_url + "/tilemapresource.xml", ]: try: r, xml = get_text_encoded(tilemap_url.format(**parameters), headers=headers) if r.status_code == 200 and xml is not None: try: tilemap_resource = tmshelper.TileMapResource(xml) except Exception: # Not all TMS server provide TileMap resources. continue if tilemap_resource.tile_map is None: continue # Check zoom levels against TileMapResource tilemap_minzoom, tilemap_maxzoom = tilemap_resource.get_min_max_zoom_level( ) if min_zoom == tilemap_minzoom: messages.append( Message( level=MessageLevel.WARNING, message= f"min_zoom level '{min_zoom}' not the same as specified in TileMap: '{tilemap_minzoom}': {tilemap_url}. " "Caution: this might be intentional as some server timeout for low zoom levels.", )) if not max_zoom == tilemap_maxzoom: messages.append( Message( level=MessageLevel.WARNING, message= f"max_zoom level '{max_zoom}' not the same as specified in TileMap: '{tilemap_maxzoom}': {tilemap_url}", )) # Check geometry within bbox if geom is not None and tilemap_resource.tile_map.bbox84 is not None: max_area_outside = max_area_outside_bbox( geom, tilemap_resource.tile_map.bbox84) # 5% is an arbitrary chosen value and should be adapted as needed if max_area_outside > 5.0: messages.append( Message( level=MessageLevel.ERROR, message= f"{round(max_area_outside, 2)}% of geometry is outside of the layers bounding box. Geometry should be checked", )) break except Exception as e: print(f"Error fetching TMS: {e}: {url}") pass # Test zoom levels by accessing tiles for a point within the geometry if geom is not None: centroid: Point = geom.representative_point() # type: ignore else: centroid = Point(6.1, 49.6) centroid_x: float = centroid.x # type: ignore centroid_y: float = centroid.y # type: ignore zoom_failures: List[Tuple[int, str, int, Optional[str]]] = [] zoom_success: List[int] = [] tested_zooms: Set[int] = set() def test_zoom(zoom: int) -> None: tested_zooms.add(zoom) tile: mercantile.Tile = mercantile.tile(centroid_x, centroid_y, zoom) # type: ignore tile_x: int = tile.x # type: ignore tile_y: int = tile.y # type: ignore query_url = url if "{-y}" in url: y = 2**zoom - 1 - tile_y query_url = query_url.replace("{-y}", str(y)) elif "{!y}" in url: y = 2**(zoom - 1) - 1 - tile_y query_url = query_url.replace("{!y}", str(y)) else: query_url = query_url.replace("{y}", str(tile_y)) parameters["x"] = tile_x parameters["zoom"] = zoom query_url = query_url.format(**parameters) url_is_good, http_code, mime = test_image(query_url, source_headers) if url_is_good: zoom_success.append(zoom) else: zoom_failures.append((zoom, query_url, http_code, mime)) # Test zoom levels for zoom in range(min_zoom, max_zoom + 1): test_zoom(zoom) tested_str = ",".join(list(map(str, sorted(tested_zooms)))) sorted_failures = sorted(zoom_failures, key=lambda x: x[0]) if len(zoom_failures) == 0 and len(zoom_success) > 0: messages.append( Message( level=MessageLevel.INFO, message=f"Zoom levels reachable. (Tested: {tested_str})")) elif len(zoom_failures) > 0 and len(zoom_success) > 0: not_found_str = ",".join( list(map(str, [level for level, _, _, _ in sorted_failures]))) messages.append( Message( level=MessageLevel.WARNING, message= f"Zoom level {not_found_str} not reachable. (Tested: {tested_str}) Tiles might not be present at tested location: {centroid_x},{centroid_y}", )) for level, url, http_code, mime_type in sorted_failures: messages.append( Message( level=MessageLevel.WARNING, message= f"URL for zoom level {level} returned HTTP Code {http_code}: {url} MIME type: {mime_type}", )) else: messages.append( Message( level=MessageLevel.ERROR, message= f"No zoom level reachable. (Tested: {tested_str}) Tiles might not be present at tested location: {centroid_x},{centroid_y}", )) for level, url, http_code, mime_type in sorted_failures: messages.append( Message( level=MessageLevel.WARNING, message= f"URL for zoom level {level} returned HTTP Code {http_code}: {url} MIME type: {mime_type}", )) except Exception as e: messages.append( Message( level=MessageLevel.ERROR, message=f"Failed testing TMS source: Exception: {e}", ))
def check_wms(source: Dict[str, Any], messages: List[Message]) -> None: """Check WMS source Parameters ---------- source : Dict[str, Any] The source messages : List[Message] The list to add messages to """ url = source["properties"]["url"] wms_url = wmshelper.WMSURL(url) source_headers = get_http_headers(source) params = ["{proj}", "{bbox}", "{width}", "{height}"] missingparams = [p for p in params if p not in url] if len(missingparams) > 0: messages.append( Message( level=MessageLevel.ERROR, message= f"The following values are missing in the URL: {','.join(missingparams)}", )) try: wms_url.is_valid_getmap_url() except validators.utils.ValidationFailure as e: messages.append( Message(level=MessageLevel.ERROR, message=f"URL validation error {e} for {url}")) # Check mandatory WMS GetMap parameters (Table 8, Section 7.3.2, WMS 1.3.0 specification) # Normalize parameter names to lower case wms_args = {key.lower(): value for key, value in wms_url.get_parameters()} # Check if it is actually a ESRI Rest url and not a WMS url is_esri = "request" not in wms_args # Check if required parameters are missing missing_request_parameters: Set[str] = set() if is_esri: required_parameters = [ "f", "bbox", "size", "imageSR", "bboxSR", "format" ] else: required_parameters = [ "version", "request", "layers", "bbox", "width", "height", "format", ] for request_parameter in required_parameters: if request_parameter.lower() not in wms_args: missing_request_parameters.add(request_parameter) if not is_esri: if "version" in wms_args and wms_args["version"] == "1.3.0": if "crs" not in wms_args: missing_request_parameters.add("crs") if "srs" in wms_args: messages.append( Message( level=MessageLevel.ERROR, message= f"WMS {wms_args['version']} URLs should not contain SRS parameter: {url}", )) elif "version" in wms_args and not wms_args["version"] == "1.3.0": if "srs" not in wms_args: missing_request_parameters.add("srs") if "crs" in wms_args: messages.append( Message( level=MessageLevel.ERROR, message= f"WMS {wms_args['version']} URLs should not contain CRS parameter: {url}", )) if len(missing_request_parameters) > 0: missing_request_parameters_str = ",".join(missing_request_parameters) messages.append( Message( level=MessageLevel.ERROR, message= f"Parameter '{missing_request_parameters_str}' is missing in URL: {url}.", )) return # Nothing more to do for ESRI Rest API if is_esri: return # Styles is mandatory according to the WMS specification, but some WMS servers seems not to care if "styles" not in wms_args: messages.append( Message( level=MessageLevel.WARNING, message= f"Parameter 'styles' is missing in url. 'STYLES=' can be used to request default style.: {url}", )) # We first send a service=WMS&request=GetCapabilities request to server # According to the WMS Specification Section 6.2 Version numbering and negotiation, the server should return # the GetCapabilities XML with the highest version the server supports. # If this fails, it is tried to explicitly specify a WMS version exceptions: List[str] = [] wms = None for wms_version in [None, "1.3.0", "1.1.1", "1.1.0", "1.0.0"]: if wms_version is None: wms_version_str = "-" else: wms_version_str = wms_version wms_getcapabilities_url = None try: wms_getcapabilities_url = wms_url.get_capabilities_url( wms_version=wms_version) _, xml = get_text_encoded(wms_getcapabilities_url, headers=source_headers) if xml is not None: wms = wmshelper.WMSCapabilities(xml) break except Exception as e: exceptions.append( f"WMS {wms_version_str}: Error: {e} {wms_getcapabilities_url}") continue # Check if it was possible to parse the WMS GetCapability response # If not, there is nothing left to check if wms is None: for msg in exceptions: messages.append(Message( level=MessageLevel.ERROR, message=msg, )) return # Log access constraints and fees metadata for access_constraint in wms.access_constraints: messages.append( Message( level=MessageLevel.INFO, message=f"AccessConstraints: {access_constraint}", )) for fee in wms.fees: messages.append( Message( level=MessageLevel.INFO, message=f"Fee: {fee}", )) if source["geometry"] is None: geom = None else: geom = eliutils.parse_eli_geometry(source["geometry"]) # Check layers if "layers" in wms_args: layers = wms_args["layers"].split(",") # Check if layers in WMS GetMap URL are advertised by WMS server. not_found_layers = [ layer_name for layer_name in layers if layer_name not in wms.layers ] if len(not_found_layers) > 0: messages.append( Message( level=MessageLevel.ERROR, message= f"Layers '{','.join(not_found_layers)}' not advertised by WMS GetCapabilities request (Some server do not advertise layers, but they are very rare).: {url}", )) # Check source geometry against layer bounding box # Regardless of its projection, each layer should advertise an approximated bounding box in lon/lat. # See WMS 1.3.0 Specification Section 7.2.4.6.6 EX_GeographicBoundingBox if geom is not None and geom.is_valid: # type: ignore bboxs = [ wms.layers[layer_name].bbox for layer_name in layers if layer_name in wms.layers and wms.layers[layer_name].bbox ] bboxs = [bbox for bbox in bboxs if bbox is not None] max_area_outside = max_area_outside_bbox(geom, bboxs) # 5% is an arbitrary chosen value and should be adapted as needed if max_area_outside > 5.0: messages.append( Message( level=MessageLevel.ERROR, message= f"{round(max_area_outside, 2)}% of geometry is outside of the layers bounding box. Geometry should be checked", )) # Check styles if "styles" in wms_args: style_parameter = wms_args["styles"] # default style needs not to be advertised by the server if not (style_parameter == "default" or style_parameter == "" or style_parameter == "," * len(layers)): styles = style_parameter.split(",") if not len(styles) == len(layers): messages.append( Message( level=MessageLevel.ERROR, message= f"Not the same number of styles and layers. {len(styles)} vs {len(layers)}", )) else: for layer_name, style_name in zip(layers, styles): if (len(style_name) > 0 and not style_name == "default" and layer_name in wms.layers and style_name not in wms.layers[layer_name].styles): messages.append( Message( level=MessageLevel.ERROR, message= f"Layer '{layer_name}' does not support style '{style_name}'", )) # Check CRS if "available_projections" not in source["properties"]: messages.append( Message( level=MessageLevel.ERROR, message= f"Sources of type wms must include the 'available_projections' element.", )) else: # A WMS server can include many CRS. Some of them are frequently used by editors. We require them to be included if they are supported by the WMS server. crs_should_included_if_available = { "EPSG:4326", "EPSG:3857", "CRS:84" } for layer_name in layers: if layer_name in wms.layers: # Check for CRS in available_projections that are not advertised by the WMS server not_supported_crs: Set[str] = set() available_projections: List[str] = source["properties"][ "available_projections"] for crs in available_projections: if crs.upper() not in wms.layers[layer_name].crs: not_supported_crs.add(crs) if len(not_supported_crs) > 0: supported_crs_str = ",".join( wms.layers[layer_name].crs) not_supported_crs_str = ",".join(not_supported_crs) messages.append( Message( level=MessageLevel.WARNING, message= f"Layer '{layer_name}': CRS '{not_supported_crs_str}' not in: {supported_crs_str}. Some server support CRS which are not advertised.", )) # Check for CRS supported by the WMS server but not in available_projections supported_but_not_included: Set[str] = set() for crs in crs_should_included_if_available: if crs not in available_projections and crs in wms.layers[ layer_name].crs: supported_but_not_included.add(crs) if len(supported_but_not_included) > 0: supported_but_not_included_str = ",".join( supported_but_not_included) messages.append( Message( level=MessageLevel.WARNING, message= f"Layer '{layer_name}': CRS '{supported_but_not_included_str}' not included in available_projections but supported by server.", )) # Check if server supports a newer WMS version as in url if wms_args["version"] < wms.version: messages.append( Message( level=MessageLevel.WARNING, message= f"Query requests WMS version '{wms_args['version']}', server supports '{wms.version}'", )) # Check image formats request_imagery_format = wms_args["format"] wms_advertised_formats_str = "', '".join(wms.formats) if request_imagery_format not in wms.formats: messages.append( Message( level=MessageLevel.ERROR, message= f"Format '{request_imagery_format}' not in '{wms_advertised_formats_str}': {url}.", )) # For photo sources it is recommended to use jpeg format, if it is available if "category" in source["properties"] and "photo" in source["properties"][ "category"]: if "jpeg" not in request_imagery_format and "jpeg" in wms.formats: messages.append( Message( level=MessageLevel.WARNING, message= f"Server supports JPEG, but '{request_imagery_format}' is used. " f"JPEG is typically preferred for photo sources, but might not be always " f"the best choice. (Server supports: '{wms_advertised_formats_str}')", ))