async def get_wms_image(tile, source, session): bounds = list(mercantile.bounds(tile)) if "available_projections" not in source["properties"]: return None available_projections = source["properties"]["available_projections"] url = source["properties"]["url"] proj = None if "EPSG:4326" in available_projections: proj = "EPSG:4326" elif "EPSG:3857" in available_projections: proj = "EPSG:3857" else: for proj in sorted(available_projections): try: CRS.from_string(proj) except: continue break if proj is None: return None wms_version = wms_version_from_url(url) bbox = _get_bbox(proj, bounds, wms_version) if bbox is None: return None formatted_url = url.format(proj=proj, width=256, height=256, bbox=bbox) return formatted_url
def _get_bbox(proj, bounds, wms_version): """ Build wms bbox parameter for GetMap request""" if proj in {"EPSG:4326", "CRS:84"}: if proj == "EPSG:4326" and wms_version == "1.3.0": bbox = ",".join( map(str, [bounds[1], bounds[0], bounds[3], bounds[2]])) else: bbox = ",".join(map(str, bounds)) else: try: crs_from = CRS.from_string("epsg:4326") crs_to = CRS.from_string(proj) transformer = get_transformer(crs_from, crs_to) bounds = list(transformer.transform(bounds[0], bounds[1])) + list( transformer.transform(bounds[2], bounds[3])) except: return None # WMS < 1.3.0 assumes x,y coordinate ordering. # WMS 1.3.0 expects coordinate ordering defined in CRS. # if crs_to.axis_info[0].direction == "north" and wms_version == "1.3.0": bbox = ",".join( map(str, [bounds[1], bounds[0], bounds[3], bounds[2]])) else: bbox = ",".join(map(str, bounds)) return bbox
def test_proj(proj): if proj == 'CRS:84': return True if 'AUTO' in proj: return False if 'EPSG' in proj: try: CRS.from_string(proj) return True except: return False return False
def test_proj(proj): if proj == "CRS:84": return True if "AUTO" in proj: return False # 'EPSG:102067' is not valid, should be ESRI:102067: https://epsg.io/102067 if proj == "EPSG:102067": return False # 'EPSG:102066' is not valid, should be ESRI:102066: https://epsg.io/102066 if proj == "EPSG:102066": return False if "EPSG" in proj: try: CRS.from_string(proj) return True except: return False return False
def create_hist(result_dir, map_raster, alert): if not map_raster.is_file(): raise Exception('No gfc map') # project raster in world mollweide with rio.open(map_raster) as src: proj_crs = CRS.from_string('ESRI:54009') if LooseVersion(rio.__gdal_version__) < LooseVersion("3.0.0"): rio_crs = RioCRS.from_wkt(proj_crs.to_wkt(WktVersion.WKT1_GDAL)) else: rio_crs = RioCRS.from_wkt(proj_crs.to_wkt()) transform, width, height = calculate_default_transform( src.crs, rio_crs, src.width, src.height, *src.bounds) kwargs = src.meta.copy() kwargs.update({ 'crs': rio_crs, 'transform': transform, 'width': width, 'height': height }) map_raster_proj = result_dir / f'{map_raster.stem}_proj.tif' with rio.open(map_raster_proj, 'w', **kwargs) as dst: for i in range(1, src.count + 1): reproject(source=rio.band(src, i), destination=rio.band(dst, i), src_transform=src.transform, src_crs=src.crs, dst_transform=transform, dst_crs=rio_crs, resampling=Resampling.nearest) resx, resy = dst.res # realize a primary hist hist = pixel_count(map_raster_proj) # convert to hectars hist['area'] = to_hectar(hist['pixels'], abs(resx), abs(resy)) return hist
def subset_shape( ds: Union[xarray.DataArray, xarray.Dataset], shape: Union[str, Path, gpd.GeoDataFrame], vectorize: bool = True, raster_crs: Optional[Union[str, int]] = None, shape_crs: Optional[Union[str, int]] = None, buffer: Optional[Union[int, float]] = None, start_date: Optional[str] = None, end_date: Optional[str] = None, ) -> Union[xarray.DataArray, xarray.Dataset]: """Subset a DataArray or Dataset spatially (and temporally) using a vector shape and date selection. Return a subset of a DataArray or Dataset for grid points falling within the area of a Polygon and/or MultiPolygon shape, or grid points along the path of a LineString and/or MultiLineString. Parameters ---------- ds : Union[xarray.DataArray, xarray.Dataset] Input values. shape : Union[str, Path, gpd.GeoDataFrame] Path to shape file, or directly a geodataframe. Supports formats compatible with geopandas. vectorize: bool Whether to use the spatialjoin or vectorize backend. raster_crs : Optional[Union[str, int]] EPSG number or PROJ4 string. shape_crs : Optional[Union[str, int]] EPSG number or PROJ4 string. buffer : Optional[Union[int, float]] Buffer the shape in order to select a larger region stemming from it. Units are based on the shape degrees/metres. start_date : Optional[str] Start date of the subset. Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d"). Defaults to first day of input data-array. end_date : Optional[str] End date of the subset. Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d"). Defaults to last day of input data-array. Returns ------- Union[xarray.DataArray, xarray.Dataset] A subset of `ds` Examples -------- >>> import xarray as xr # doctest: +SKIP >>> from xclim.subset import subset_shape # doctest: +SKIP >>> pr = xr.open_dataset(path_to_pr_file).pr # doctest: +SKIP ... # Subset data array by shape >>> prSub = subset_shape(pr, shape=path_to_shape_file) # doctest: +SKIP ... # Subset data array by shape and single year >>> prSub = subset_shape(pr, shape=path_to_shape_file, start_date='1990-01-01', end_date='1990-12-31') # doctest: +SKIP ... # Subset multiple variables in a single dataset >>> ds = xr.open_mfdataset([path_to_tasmin_file, path_to_tasmax_file]) # doctest: +SKIP >>> dsSub = subset_shape(ds, shape=path_to_shape_file) # doctest: +SKIP """ wgs84 = CRS(4326) # PROJ4 definition for WGS84 with longitudes ranged between -180/+180. wgs84_wrapped = CRS.from_string( "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs lon_wrap=180") if isinstance(ds, xarray.DataArray): ds_copy = ds._to_temp_dataset() else: ds_copy = ds.copy() if isinstance(shape, gpd.GeoDataFrame): poly = shape.copy() else: poly = gpd.GeoDataFrame.from_file(shape) if buffer is not None: poly.geometry = poly.buffer(buffer) # Get the shape's bounding box. minx, miny, maxx, maxy = poly.total_bounds lon_bnds = (minx, maxx) lat_bnds = (miny, maxy) # If polygon doesn't cross prime meridian, subset bbox first to reduce processing time # Only case not implemented is when lon_bnds cross the 0 deg meridian but dataset grid has all positive lons try: ds_copy = subset_bbox(ds_copy, lon_bnds=lon_bnds, lat_bnds=lat_bnds) except NotImplementedError: pass if ds_copy.lon.size == 0 or ds_copy.lat.size == 0: raise ValueError( "No grid cell centroids found within provided polygon bounding box. " 'Try using the "buffer" option to create an expanded area.') if start_date or end_date: ds_copy = subset_time(ds_copy, start_date=start_date, end_date=end_date) # Determine whether CRS types are the same between shape and raster if shape_crs is not None: try: shape_crs = CRS.from_user_input(shape_crs) except ValueError: raise else: shape_crs = CRS(poly.crs) wrap_lons = False if raster_crs is not None: try: raster_crs = CRS.from_user_input(raster_crs) except ValueError: raise else: if np.min(lat_bnds) < -90 or np.max(lat_bnds) > 90: raise ValueError( "Latitudes exceed domain of WGS84 coordinate system.") if np.min(lon_bnds) < -180 or np.max(lon_bnds) > 180: raise ValueError( "Longitudes exceed domain of WGS84 coordinate system.") try: # Extract CF-compliant CRS_WKT from crs variable. raster_crs = CRS.from_cf(ds_copy.crs.attrs) except AttributeError: if np.min(ds_copy.lon) >= 0 and np.max(ds_copy.lon) <= 360: wrap_lons = True raster_crs = wgs84_wrapped else: raster_crs = wgs84 _check_crs_compatibility(shape_crs=shape_crs, raster_crs=raster_crs) # Create mask using the vectorize or spatial join methods. if vectorize: mask_2d = create_mask_vectorize(x_dim=ds_copy.lon, y_dim=ds_copy.lat, poly=poly, wrap_lons=wrap_lons) else: mask_2d = create_mask(x_dim=ds_copy.lon, y_dim=ds_copy.lat, poly=poly, wrap_lons=wrap_lons) if np.all(mask_2d.isnull()): raise ValueError( f"No grid cell centroids found within provided polygon bounds ({poly.bounds}). " 'Try using the "buffer" option to create an expanded areas or verify polygon.' ) # loop through variables for v in ds_copy.data_vars: if set.issubset(set(mask_2d.dims), set(ds_copy[v].dims)): ds_copy[v] = ds_copy[v].where(mask_2d.notnull()) # Remove coordinates where all values are outside of region mask for dim in mask_2d.dims: mask_2d = mask_2d.dropna(dim, how="all") ds_copy = ds_copy.sel({dim: mask_2d[dim] for dim in mask_2d.dims}) # Add a CRS definition using CF conventions and as a global attribute in CRS_WKT for reference purposes ds_copy.attrs["crs"] = raster_crs.to_string() ds_copy["crs"] = 1 ds_copy["crs"].attrs.update(raster_crs.to_cf()) for v in ds_copy.variables: if {"lat", "lon"}.issubset(set(ds_copy[v].dims)): ds_copy[v].attrs["grid_mapping"] = "crs" if isinstance(ds, xarray.DataArray): return ds._from_temp_dataset(ds_copy) return ds_copy
def create_mask( *, x_dim: xarray.DataArray = None, y_dim: xarray.DataArray = None, poly: gpd.GeoDataFrame = None, wrap_lons: bool = False, check_overlap: bool = False, ): """Create a mask with values corresponding to the features in a GeoDataFrame using spatial join methods. The returned mask's points have the value of the first geometry of `poly` they fall in. Parameters ---------- x_dim : xarray.DataArray X or longitudinal dimension of xarray object. y_dim : xarray.DataArray Y or latitudinal dimension of xarray object. poly : gpd.GeoDataFrame GeoDataFrame used to create the xarray.DataArray mask. wrap_lons : bool Shift vector longitudes by -180,180 degrees to 0,360 degrees; Default = False check_overlap: bool Perform a check to verify if shapes contain overlapping geometries. Returns ------- xarray.DataArray Examples -------- >>> import xarray as xr # doctest: +SKIP >>> import geopandas as gpd # doctest: +SKIP >>> from xclim.subset import create_mask # doctest: +SKIP >>> ds = xr.open_dataset(path_to_tasmin_file) # doctest: +SKIP >>> polys = gpd.read_file(path_to_multi_shape_file) # doctest: +SKIP ... # Get a mask from all polygons in the shape file >>> mask = create_mask(x_dim=ds.lon, y_dim=ds.lat, poly=polys) # doctest: +SKIP >>> ds = ds.assign_coords(regions=mask) # doctest: +SKIP ... # Operations can be applied to each regions with `groupby`. Ex: >>> ds = ds.groupby('regions').mean() # doctest: +SKIP ... # Extra step to retrieve the names of those polygons stored in the "id" column >>> region_names = xr.DataArray(polys.id, dims=('regions',)) # doctest: +SKIP >>> ds = ds.assign_coords(regions_names=region_names) # doctest: +SKIP """ wgs84 = CRS(4326) if check_overlap: _check_has_overlaps(polygons=poly) if wrap_lons: warnings.warn("Wrapping longitudes at 180 degrees.") if len(x_dim.shape) == 1 & len(y_dim.shape) == 1: # create a 2d grid of lon, lat values lon1, lat1 = np.meshgrid(np.asarray(x_dim.values), np.asarray(y_dim.values), indexing="ij") dims_out = x_dim.dims + y_dim.dims coords_out = dict() coords_out[dims_out[0]] = x_dim.values coords_out[dims_out[1]] = y_dim.values else: lon1 = x_dim.values lat1 = y_dim.values dims_out = x_dim.dims coords_out = x_dim.coords # create pandas Dataframe from NetCDF lat and lon points df = pd.DataFrame({ "id": np.arange(0, lon1.size), "lon": lon1.flatten(), "lat": lat1.flatten() }) df["Coordinates"] = list(zip(df.lon, df.lat)) df["Coordinates"] = df["Coordinates"].apply(Point) # create GeoDataFrame (spatially referenced with shifted longitude values if needed). if wrap_lons: wgs84 = CRS.from_string( "+proj=longlat +datum=WGS84 +no_defs +type=crs +lon_wrap=180") gdf_points = gpd.GeoDataFrame(df, geometry="Coordinates", crs=wgs84) # spatial join geodata points with region polygons and remove duplicates point_in_poly = gpd.tools.sjoin(gdf_points, poly, how="left", op="intersects") point_in_poly = point_in_poly.loc[~point_in_poly.index.duplicated( keep="first")] # extract polygon ids for points mask = point_in_poly["index_right"] mask_2d = np.array(mask).reshape(lat1.shape[0], lat1.shape[1]) mask_2d = xarray.DataArray(mask_2d, dims=dims_out, coords=coords_out) return mask_2d
def func_checker(*args, **kwargs): """ Split and reproject polygon vectors in a GeoDataFrame whose values cross the Greenwich Meridian. Begins by examining whether the geometry bounds the supplied cross longitude = 0 and if so, proceeds to split the polygons at the meridian into new polygons and erase a small buffer to prevent invalid geometries when transforming the lons from WGS84 to WGS84 +lon_wrap=180 (longitudes from 0 to 360). Returns a GeoDataFrame with the new features in a wrap_lon WGS84 projection if needed. """ try: poly = kwargs["poly"] x_dim = kwargs["x_dim"] wrap_lons = kwargs["wrap_lons"] except KeyError: return func(*args, **kwargs) if wrap_lons: if (np.min(x_dim) < 0 and np.max(x_dim) >= 360) or (np.min(x_dim) < -180 and np.max >= 180): # TODO: This should raise an exception, right? warnings.warn( "DataArray doesn't seem to be using lons between 0 and 360 degrees or between -180 and 180 degrees." " Tread with caution.", UserWarning, stacklevel=4, ) split_flag = False for index, feature in poly.iterrows(): if (feature.geometry.bounds[0] < 0) and (feature.geometry.bounds[2] > 0): split_flag = True warnings.warn( "Geometry crosses the Greenwich Meridian. Proceeding to split polygon at Greenwich." " This feature is experimental. Output might not be accurate.", UserWarning, stacklevel=4, ) # Create a meridian line at Greenwich, split polygons at this line and erase a buffer line if isinstance(feature.geometry, MultiPolygon): union = MultiPolygon(cascaded_union(feature.geometry)) else: union = Polygon(cascaded_union(feature.geometry)) meridian = LineString([Point(0, 90), Point(0, -90)]) buffered = meridian.buffer(0.000000001) split_polygons = split(union, meridian) buffered_split_polygons = [ feat.difference(buffered) for feat in split_polygons ] # Cannot assign iterable with `at` (pydata/pandas#26333) so a small hack: # Load split features into a new GeoDataFrame with WGS84 CRS split_gdf = gpd.GeoDataFrame( geometry=[cascaded_union(buffered_split_polygons)], crs=CRS(4326), ) poly.at[[index], "geometry"] = split_gdf.geometry.values # Reproject features in WGS84 CSR to use 0 to 360 as longitudinal values wrapped_lons = CRS.from_string( "+proj=longlat +ellps=WGS84 +lon_wrap=180 +datum=WGS84 +no_defs" ) poly = poly.to_crs(crs=wrapped_lons) if split_flag: warnings.warn( "Rebuffering split polygons to ensure edge inclusion in selection.", UserWarning, stacklevel=4, ) poly = gpd.GeoDataFrame(poly.buffer(0.000000001), columns=["geometry"]) poly.crs = wrapped_lons kwargs["poly"] = poly return func(*args, **kwargs)
async def process_source(filename, session: ClientSession): try: async with aiofiles.open(filename, mode="r", encoding="utf-8") as f: contents = await f.read() source = json.loads(contents) # Exclude sources # Skip non wms layers if not source["properties"]["type"] == "wms": return # check if it is esri rest and not wms if "bboxSR" in source["properties"]["url"]: return if "available_projections" not in source["properties"]: return if "header" in source["properties"]["url"]: return if "geometry" not in source: return if source["geometry"] is None: geom = box(-180, -90, 180, 90) pt = Point(7.44, 46.56) else: geom = parse_eli_geometry(source["geometry"]) pt = geom.representative_point() test_zoom_level = ZOOM_LEVEL if "min_zoom" in source["properties"]: test_zoom_level = max(source["properties"]["min_zoom"], test_zoom_level) # Get existing image hash original_img_messages = [] image_hash = await get_image( url=source["properties"]["url"], available_projections=source["properties"] ["available_projections"], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=original_img_messages, ) if image_hash is None: # We are finished if it was not possible to get the image return if max_count(str(image_hash)) == 16: # These image hashes indicate that the downloaded image is not useful to determine # if the updated query returns the same image logging.info("ImageHash {} not useful for: {} || {}".format( str(image_hash), filename, " || ".join(original_img_messages))) return # Update wms wms_messages = [] result = await update_wms(source["properties"]["url"], session, wms_messages) if result is None: logging.info("Not possible to update wms url for {}: {}".format( filename, " || ".join(wms_messages))) return # Test if selected projections work despite not being advertised for EPSG in {"EPSG:3857", "EPSG:4326"}: if EPSG not in result["available_projections"]: epsg_image_hash = await get_image( url=result["url"], available_projections=[EPSG], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=[], ) if epsg_image_hash == image_hash: result["available_projections"].add(EPSG) # Download image for updated url new_img_messages = [] new_image_hash = await get_image( url=result["url"], available_projections=result["available_projections"], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=new_img_messages, ) if new_image_hash is None: logging.warning("Could not download new image: {}".format( " || ".join(new_img_messages))) return # Only sources are updated where the new query returns the same image if not image_hash == new_image_hash: logging.info( "Image hash not the same for: {}: '{}' vs '{}' | {} | {}". format( filename, image_hash, new_image_hash, " || ".join(original_img_messages), " || ".join(new_img_messages), )) # Servers might support projections that are not used in the area covered by a source # Keep only EPSG codes that are used in the area covered by the sources geometry if source["geometry"] is not None: epsg_outside_area_of_use = set() for epsg in result["available_projections"]: try: if epsg == "CRS:84": continue crs = CRS.from_string(epsg) area_of_use = crs.area_of_use crs_box = box( area_of_use.west, area_of_use.south, area_of_use.east, area_of_use.north, ) if not crs_box.intersects(geom): epsg_outside_area_of_use.add(epsg) except Exception as e: logging.exception( "Could not check area of use for projection {}: {}". format(epsg, str(e))) continue if len(result["available_projections"]) == len( epsg_outside_area_of_use): logging.error( "{}: epsg_outside_area_of_use filter removes all EPSG". format(filename)) result["available_projections"] -= epsg_outside_area_of_use # Servers that report a lot of projection may be configured wrongly # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised if len(result["available_projections"]) > 15: filtered_projs = set() for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]: if proj in result["available_projections"]: filtered_projs.add(proj) for proj in source["properties"]["available_projections"]: if proj in result["available_projections"]: filtered_projs.add(proj) result["available_projections"] = filtered_projs # Filter alias projections if "EPSG:3857" in result["available_projections"]: result["available_projections"] -= epsg_3857_alias else: # if EPSG:3857 not present but alias, keep only alias with highest number to be consistent result_epsg_3857_alias = result[ "available_projections"] & epsg_3857_alias result_epsg_3857_alias_sorted = list( sorted( result_epsg_3857_alias, key=lambda x: (x.split(":")[0], int(x.split(":")[1])), reverse=True, )) result["available_projections"] -= set( result_epsg_3857_alias_sorted[1:]) # Filter deprecated projections result["available_projections"].intersection_update(valid_epsgs) # Check if only formatting has changed url_has_changed = not compare_urls(source["properties"]["url"], result["url"]) projections_have_changed = not compare_projs( source["properties"]["available_projections"], result["available_projections"], ) if url_has_changed: source["properties"]["url"] = result["url"] if projections_have_changed: source["properties"]["available_projections"] = list( sorted( result["available_projections"], key=lambda x: (x.split(":")[0], int(x.split(":")[1])), )) if url_has_changed or projections_have_changed: with open(filename, "w", encoding="utf-8") as out: json.dump(source, out, indent=4, sort_keys=False, ensure_ascii=False) out.write("\n") except Exception as e: logging.exception("Failed to check source {}: {}".format( filename, str(e)))
async def get_image(url, available_projections, lon, lat, zoom, session, messages): """Download image (tms tile for coordinate lon,lat on level zoom and calculate image hash Parameters ---------- url : str available_projections : collection lon : float lat : float zoom : int session : ClientSession messages : list Returns ------- ImageHash or None """ tile = list(mercantile.tiles(lon, lat, lon, lat, zooms=zoom))[0] bounds = list(mercantile.bounds(tile)) proj = None if "EPSG:4326" in available_projections: proj = "EPSG:4326" elif "EPSG:3857" in available_projections: proj = "EPSG:3857" else: for proj in sorted(available_projections): try: CRS.from_string(proj) except: continue break if proj is None: messages.append("No projection left: {}".format(available_projections)) return None crs_from = CRS.from_string("epsg:4326") crs_to = CRS.from_string(proj) if not proj == "EPSG:4326": transformer = Transformer.from_crs(crs_from, crs_to, always_xy=True) bounds = list(transformer.transform(bounds[0], bounds[1])) + list( transformer.transform(bounds[2], bounds[3])) # WMS < 1.3.0 assumes x,y coordinate ordering. # WMS 1.3.0 expects coordinate ordering defined in CRS. if crs_to.axis_info[0].direction == "north" and "=1.3.0" in url: bbox = ",".join(map(str, [bounds[1], bounds[0], bounds[3], bounds[2]])) else: bbox = ",".join(map(str, bounds)) formatted_url = url.format(proj=proj, width=512, height=512, bbox=bbox) messages.append("Image URL: {}".format(formatted_url)) for i in range(3): try: # Download image async with session.request(method="GET", url=formatted_url, ssl=False) as response: data = await response.read() img = Image.open(io.BytesIO(data)) img_hash = imagehash.average_hash(img) messages.append("ImageHash: {}".format(img_hash)) return img_hash except Exception as e: messages.append("Could not download image in try {}: {}".format( i, str(e))) await asyncio.sleep(5) return None
async def process_source(filename, session: ClientSession): try: async with aiofiles.open(filename, mode="r", encoding="utf-8") as f: contents = await f.read() source = json.loads(contents) # Exclude sources # Skip non wms layers if not source["properties"]["type"] == "wms": return # check if it is esri rest and not wms if "bboxSR" in source["properties"]["url"]: return if "available_projections" not in source["properties"]: return if "header" in source["properties"]["url"]: return if "geometry" not in source: return category = source["properties"].get("category", None) if source["geometry"] is None: geom = box(-180, -90, 180, 90) pt = Point(7.44, 46.56) else: geom = parse_eli_geometry(source["geometry"]) pt = geom.representative_point() test_zoom_level = ZOOM_LEVEL if "min_zoom" in source["properties"]: test_zoom_level = max(source["properties"]["min_zoom"], test_zoom_level) if "max_zoom" in source["properties"]: test_zoom_level = min(source["properties"]["max_zoom"], test_zoom_level) old_url = source["properties"]["url"] old_projections = source["properties"]["available_projections"] # Get existing image hash original_img_messages = [] status, image_hash = await get_image( url=old_url, available_projections=old_projections, lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=original_img_messages, ) if not status == ImageHashStatus.SUCCESS or image_hash is None: # We are finished if it was not possible to get the image return if max_count(str(image_hash)) == 16: if ("category" in source["properties"] and "photo" in source["properties"]["category"]): msgs = "\n\t".join(original_img_messages) logging.warning( f"{filename} has category {category} but image hash is {image_hash}:\n\t{msgs}" ) # These image hashes indicate that the downloaded image is not useful to determine # if the updated query returns the same image error_msgs = "\n\t".join(original_img_messages) logging.warning( f"Image hash {image_hash} not useful for: {filename} ({category}): \n\t{error_msgs}" ) return # Update wms wms_messages = [] result = await update_wms(old_url, session, wms_messages) if result is None: error_msgs = "\n\t".join(wms_messages) logging.info( f"Not possible to update wms url for {filename}:\n\t{error_msgs}" ) return new_url = result["url"] new_projections = result["available_projections"] del result # Download image for updated url new_img_messages = [] new_status, new_image_hash = await get_image( url=new_url, available_projections=new_projections, lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=new_img_messages, ) if not new_status == ImageHashStatus.SUCCESS or new_image_hash is None: error_msgs = "\n\t".join(new_img_messages) logging.warning( f"Could not download new image: {new_status}\n\t{error_msgs}") return # Only sources are updated where the new query returns the same image if not image_similar(image_hash, new_image_hash, test_zoom_level): error_original_img_messages = "\n\t".join(original_img_messages) error_new_img_messages = "\n\t".join(new_img_messages) logging.info( f"Image hash not the same for: {filename}: {image_hash} - {new_image_hash}: {image_hash - new_image_hash}\n\t{error_original_img_messages} \n\t{error_new_img_messages}" ) return # Test if selected projections work despite not being advertised for EPSG in {"EPSG:3857", "EPSG:4326"}: if EPSG not in new_projections: epsg_check_messages = [] epsg_image_status, epsg_image_hash = await get_image( url=new_url, available_projections=[EPSG], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=epsg_check_messages, ) if not epsg_image_status == ImageHashStatus.SUCCESS: continue # Relax similarity constraint to account for differences due to reprojection hash_diff = image_hash - epsg_image_hash # org_hash_msgs = "\n\t".join(original_img_messages) # epsg_check_msgs = "\n\t".join(epsg_check_messages) if image_similar(image_hash, epsg_image_hash, test_zoom_level): new_projections.add(EPSG) # logging.info( # f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {image_hash}: {hash_diff}\n\t{org_hash_msgs}\n\t{epsg_check_msgs}" # ) logging.info( f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {image_hash}: {hash_diff}" ) elif epsg_image_hash is not None: # logging.info( # f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {image_hash}: {hash_diff}\n\t{org_hash_msgs}\n\t{epsg_check_msgs}" # ) logging.info( f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {image_hash}: {hash_diff}" ) # Servers might support projections that are not used in the area covered by a source # Keep only EPSG codes that are used in the area covered by the sources geometry if source["geometry"] is not None: epsg_outside_area_of_use = set() for epsg in new_projections: try: if epsg == "CRS:84": continue crs = CRS.from_string(epsg) area_of_use = crs.area_of_use crs_box = box( area_of_use.west, area_of_use.south, area_of_use.east, area_of_use.north, ) if not crs_box.intersects(geom): epsg_outside_area_of_use.add(epsg) except Exception as e: logging.exception( f"Could not check area of use for projection {epsg}: {e}" ) continue if len(new_projections) == len(epsg_outside_area_of_use): logging.error( f"{filename}: epsg_outside_area_of_use filter removes all EPSG" ) new_projections -= epsg_outside_area_of_use # Servers that report a lot of projection may be configured wrongly # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised if len(new_projections) > 15: filtered_projs = set() for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]: if proj in new_projections: filtered_projs.add(proj) for proj in old_projections: if proj in new_projections: filtered_projs.add(proj) new_projections = filtered_projs # Filter alias projections if "EPSG:3857" in new_projections: new_projections -= epsg_3857_alias else: # if EPSG:3857 not present but alias, keep only alias with highest number to be consistent result_epsg_3857_alias = new_projections & epsg_3857_alias result_epsg_3857_alias_sorted = list( sorted( result_epsg_3857_alias, key=lambda x: (x.split(":")[0], int(x.split(":")[1])), reverse=True, )) new_projections -= set(result_epsg_3857_alias_sorted[1:]) # Filter deprecated projections new_projections.intersection_update(valid_epsgs) # Check if projections are supported by server not_supported_projections = set() image_hashes = {} for proj in new_projections: proj_messages = [] proj_status, proj_image_hash = await get_image( url=new_url, available_projections=[proj], lon=pt.x, lat=pt.y, zoom=test_zoom_level, session=session, messages=proj_messages, ) image_hashes[proj] = { "status": proj_status, "hash": proj_image_hash, "logs": proj_messages, } if proj_status == ImageHashStatus.IMAGE_ERROR: not_supported_projections.add(proj) # msgs = "\n\t".join(proj_messages) # logging.info(f"{filename} {proj}: {proj_status}:\n\t{msgs}") # elif proj_status == ImageHashStatus.SUCCESS and max_count(str(proj_image_hash)) == 16 and not max_count(str(image_hash)) == 16: # # Empty images indicate that server does not support this projection correctly # not_supported_projections.add(proj) elif proj_status == ImageHashStatus.NETWORK_ERROR: # If not sucessfull status do not add if not previously addedd if proj not in old_projections: not_supported_projections.add(proj) if len(not_supported_projections) > 0: removed_projections = ",".join(not_supported_projections) logging.info( f"{filename}: remove projections that are advertised but do not return an image: {removed_projections}" ) new_projections -= not_supported_projections # Check if EPSG:3857 and EPSG:4326 are similar if ("EPSG:3857" in image_hashes and "EPSG:4326" in image_hashes and image_hashes["EPSG:3857"]["status"] == ImageHashStatus.SUCCESS and image_hashes["EPSG:4326"]["status"] == ImageHashStatus.SUCCESS): img_hash_3857 = image_hashes["EPSG:3857"]["hash"] img_hash_4326 = image_hashes["EPSG:4326"]["hash"] if not image_similar(img_hash_3857, img_hash_4326, test_zoom_level): msgs = "\n\t".join(image_hashes["EPSG:3857"]["logs"] + image_hashes["EPSG:4326"]["logs"]) logging.warning( f"{filename}: ({category}) ImageHash for EPSG:3857 and EPSG:4326 not similiar: {img_hash_3857} - {img_hash_4326}: {img_hash_3857-img_hash_4326}:\n\t{msgs}" ) # Check if only formatting has changed url_has_changed = not compare_urls(source["properties"]["url"], new_url) projections_have_changed = not compare_projs( source["properties"]["available_projections"], new_projections, ) if url_has_changed: source["properties"]["url"] = new_url if projections_have_changed: source["properties"]["available_projections"] = list( sorted( new_projections, key=lambda x: (x.split(":")[0], int(x.split(":")[1])), )) if url_has_changed or projections_have_changed: with open(filename, "w", encoding="utf-8") as out: json.dump(source, out, indent=4, sort_keys=False, ensure_ascii=False) out.write("\n") except Exception as e: logging.exception(f"Failed to check source {filename}: {e}")
async def get_image(url, available_projections, lon, lat, zoom, session, messages): """Download image (tms tile for coordinate lon,lat on level zoom and calculate image hash Parameters ---------- url : str available_projections : collection lon : float lat : float zoom : int session : ClientSession messages : list Returns ------- ImageHash or None """ tile = list(mercantile.tiles(lon, lat, lon, lat, zooms=zoom))[0] bounds = list(mercantile.bounds(tile)) img_hash = None status = ImageHashStatus.OTHER proj = None if "EPSG:4326" in available_projections: proj = "EPSG:4326" elif "EPSG:3857" in available_projections: proj = "EPSG:3857" else: for proj in sorted(available_projections): try: CRS.from_string(proj) except: continue break if proj is None: messages.append("No projection left: {}".format(available_projections)) return status, img_hash wms_version = wms_version_from_url(url) bbox = _get_bbox(proj, bounds, wms_version) if bbox is None: messages.append(f"Projection {proj} could not be parsed by pyproj.") return status, img_hash formatted_url = url.format(proj=proj, width=IMAGE_SIZE, height=IMAGE_SIZE, bbox=bbox) messages.append(f"Image URL: {formatted_url}") for i in range(3): try: # Download image async with session.request(method="GET", url=formatted_url, ssl=False) as response: if response.status == 200: data = await response.read() try: img = Image.open(io.BytesIO(data)) img_hash = imagehash.average_hash(img) status = ImageHashStatus.SUCCESS messages.append(f"ImageHash: {img_hash}") return status, img_hash except Exception as e: status = ImageHashStatus.IMAGE_ERROR messages.append(str(e)) filetype = magic.from_buffer(data) messages.append( f"Could not open recieved data as image (Recieved filetype: {filetype} {formatted_url})" ) else: status = ImageHashStatus.NETWORK_ERROR except Exception as e: status = ImageHashStatus.NETWORK_ERROR messages.append(f"Could not download image in try {i}: {e}") await asyncio.sleep(5) return status, img_hash
async def get_image(url, available_projections, lon, lat, zoom, session, messages): """Download image (tms tile for coordinate lon,lat on level zoom and calculate image hash Parameters ---------- url : str available_projections : collection lon : float lat : float zoom : int session : ClientSession messages : list Returns ------- ImageHash or None """ tile = list(mercantile.tiles(lon, lat, lon, lat, zooms=zoom))[0] bounds = list(mercantile.bounds(tile)) img_hash = None status = ImageHashStatus.OTHER proj = None if "EPSG:4326" in available_projections: proj = "EPSG:4326" elif "EPSG:3857" in available_projections: proj = "EPSG:3857" else: for proj in sorted(available_projections): try: CRS.from_string(proj) except: continue break if proj is None: messages.append(f"No projection left: {available_projections}") return status, img_hash wms_version = wmshelper.wms_version_from_url(url) bbox = wmshelper.get_bbox(proj, bounds, wms_version) if bbox is None: messages.append(f"Projection {proj} could not be parsed by pyproj.") return status, img_hash formatted_url = url.format(proj=proj, width=IMAGE_SIZE, height=IMAGE_SIZE, bbox=bbox) messages.append(f"Image URL: {formatted_url}") for i in range(2): try: # Download image async with session.request(method="GET", url=formatted_url, ssl=False) as response: messages.append(f"Try: {i}: HTTP CODE {response.status}") for header in response.headers: messages.append(f"{header}: {response.headers[header]}") if response.status == 200: data = await response.read() data_length = len(data) if data_length == 0: messages.append( f"Retrieved empty body, treat as NETWORK_ERROR: {data_length}" ) status = ImageHashStatus.NETWORK_ERROR else: messages.append(f"len(data): {data_length}") if "Content-Length" in response.headers: advertised_length = int( response.headers["Content-Length"]) if not data_length == advertised_length: messages.append( f"Body not same size as advertised: {data_length} vs {advertised_length}" ) try: img = Image.open(io.BytesIO(data)) img_hash = imagehash.average_hash(img) status = ImageHashStatus.SUCCESS messages.append(f"ImageHash: {img_hash}") return status, img_hash except Exception as e: status = ImageHashStatus.IMAGE_ERROR messages.append(str(e)) filetype = magic.from_buffer(data) messages.append( f"Could not open received data as image (Received filetype: {filetype} Body Length: {data_length} {formatted_url})" ) else: status = ImageHashStatus.NETWORK_ERROR if response.status == 503: # 503 Service Unavailable await asyncio.sleep(30) except Exception as e: status = ImageHashStatus.NETWORK_ERROR messages.append(f"Could not download image in try {i}: {e}") await asyncio.sleep(15) return status, img_hash
def from_string(string): """Returns a CRS from a proj string""" return CRS.from_string(string)
async def process_source(filename, session: ClientSession): async with aiofiles.open(filename, mode='r', encoding='utf-8') as f: contents = await f.read() source = json.loads(contents) # Exclude sources # Skip non wms layers if not source['properties']['type'] == 'wms': return # check if it is esri rest and not wms if 'bboxSR' in source['properties']['url']: return if 'available_projections' not in source['properties']: return if 'header' in source['properties']['url']: return if 'geometry' not in source: return if source['geometry'] is None: geom = box(-180, -90, 180, 90) pt = Point(7.44, 46.56) else: geom = parse_eli_geometry(source['geometry']) pt = geom.representative_point() # Get existing image hash original_img_messages = [] image_hash = await get_image( url=source['properties']['url'], available_projections=source['properties']['available_projections'], lon=pt.x, lat=pt.y, zoom=ZOOM_LEVEL, session=session, messages=original_img_messages) if image_hash is None: # We are finished if it was not possible to get the image return if max_count(str(image_hash)) == 16: # These image hashes indicate that the downloaded image is not useful to determine # if the updated query returns the same image logging.info("ImageHash {} not useful for: {} || {}".format( str(image_hash), filename, " || ".join(original_img_messages))) return # Update wms wms_messages = [] result = await update_wms(source['properties']['url'], session, wms_messages) if result is None: logging.info("Not possible to update wms url for {}: {}".format( filename, " || ".join(wms_messages))) return # Test if selected projections work despite not being advertised for EPSG in {'EPSG:3857', 'EPSG:4326'}: if EPSG not in result['available_projections']: epsg_image_hash = await get_image(url=result['url'], available_projections=[EPSG], lon=pt.x, lat=pt.y, zoom=ZOOM_LEVEL, session=session, messages=[]) if epsg_image_hash == image_hash: result['available_projections'].add(EPSG) # Download image for updated url new_img_messages = [] new_image_hash = await get_image( url=result['url'], available_projections=result['available_projections'], lon=pt.x, lat=pt.y, zoom=ZOOM_LEVEL, session=session, messages=new_img_messages) if new_image_hash is None: logging.warning("Could not download new image: {}".format( " || ".join(new_img_messages))) return # Only sources are updated where the new query returns the same image if not image_hash == new_image_hash: logging.info("Image hash not the same for: {}: {} {} | {} | {}".format( filename, image_hash, new_image_hash, "||".join(original_img_messages), "||".join(new_img_messages))) # Servers might support projections that are not used in the area covered by a source # Keep only EPSG codes that are used in the area covered by the sources geometry if source['geometry'] is not None: epsg_outside_area_of_use = set() for epsg in source['properties']['available_projections']: try: if epsg == 'CRS:84': continue crs = CRS.from_string(epsg) area_of_use = crs.area_of_use crs_box = box( area_of_use.west, area_of_use.south, area_of_use.east, area_of_use.north, ) if not crs_box.intersects(geom): epsg_outside_area_of_use.add(epsg) except Exception as e: logging.exception( "Could not check area of use for projection {}: {}".format( epsg, str(e))) continue if len(result["available_projections"]) == len( epsg_outside_area_of_use): logging.error( "{}: epsg_outside_area_of_use filter removes all EPSG".format( filename)) result["available_projections"] = [ epsg for epsg in result["available_projections"] if epsg not in epsg_outside_area_of_use ] # Servers that report a lot of projection may be configured wrongly # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised if len(result['available_projections']) > 15: filtered_projs = set() for proj in ['CRS:84', 'EPSG:3857', 'EPSG:4326']: if proj in result['available_projections']: filtered_projs.add(proj) for proj in source['properties']['available_projections']: if proj in result['available_projections']: filtered_projs.add(proj) result['available_projections'] = filtered_projs # Filter alias projections if 'EPSG:3857' in result['available_projections']: for epsg in epsg_3857_alias: if epsg in result['available_projections']: result['available_projections'].remove(epsg) # Filter deprecated projections result["available_projections"] = [ epsg for epsg in result["available_projections"] if epsg == "CRS:84" or ( epsg in valid_epsgs and epsg not in epsg_3857_alias) ] # Check if only formatting has changes url_has_changed = not compare_urls(source['properties']['url'], result['url']) projections_have_changed = not compare_projs( source['properties']['available_projections'], result['available_projections']) if url_has_changed: source['properties']['url'] = result['url'] if projections_have_changed: source['properties']['available_projections'] = list( sorted(result['available_projections'], key=lambda x: (x.split(':')[0], int(x.split(':')[1])))) if url_has_changed or projections_have_changed: with open(filename, 'w', encoding='utf-8') as out: json.dump(source, out, indent=4, sort_keys=False, ensure_ascii=False) out.write("\n")