async def get_wms_image(tile, source, session):
    bounds = list(mercantile.bounds(tile))
    if "available_projections" not in source["properties"]:
        return None
    available_projections = source["properties"]["available_projections"]
    url = source["properties"]["url"]

    proj = None
    if "EPSG:4326" in available_projections:
        proj = "EPSG:4326"
    elif "EPSG:3857" in available_projections:
        proj = "EPSG:3857"
    else:
        for proj in sorted(available_projections):
            try:
                CRS.from_string(proj)
            except:
                continue
            break
    if proj is None:
        return None

    wms_version = wms_version_from_url(url)
    bbox = _get_bbox(proj, bounds, wms_version)
    if bbox is None:
        return None

    formatted_url = url.format(proj=proj, width=256, height=256, bbox=bbox)

    return formatted_url
Beispiel #2
0
def _get_bbox(proj, bounds, wms_version):
    """ Build wms bbox parameter for GetMap request"""
    if proj in {"EPSG:4326", "CRS:84"}:
        if proj == "EPSG:4326" and wms_version == "1.3.0":
            bbox = ",".join(
                map(str, [bounds[1], bounds[0], bounds[3], bounds[2]]))
        else:
            bbox = ",".join(map(str, bounds))
    else:
        try:
            crs_from = CRS.from_string("epsg:4326")
            crs_to = CRS.from_string(proj)
            transformer = get_transformer(crs_from, crs_to)
            bounds = list(transformer.transform(bounds[0], bounds[1])) + list(
                transformer.transform(bounds[2], bounds[3]))
        except:
            return None

        # WMS < 1.3.0 assumes x,y coordinate ordering.
        # WMS 1.3.0 expects coordinate ordering defined in CRS.
        #
        if crs_to.axis_info[0].direction == "north" and wms_version == "1.3.0":
            bbox = ",".join(
                map(str, [bounds[1], bounds[0], bounds[3], bounds[2]]))
        else:
            bbox = ",".join(map(str, bounds))
    return bbox
Beispiel #3
0
 def test_proj(proj):
     if proj == 'CRS:84':
         return True
     if 'AUTO' in proj:
         return False
     if 'EPSG' in proj:
         try:
             CRS.from_string(proj)
             return True
         except:
             return False
     return False
Beispiel #4
0
 def test_proj(proj):
     if proj == "CRS:84":
         return True
     if "AUTO" in proj:
         return False
     # 'EPSG:102067' is not valid, should be ESRI:102067: https://epsg.io/102067
     if proj == "EPSG:102067":
         return False
     # 'EPSG:102066' is not valid, should be ESRI:102066: https://epsg.io/102066
     if proj == "EPSG:102066":
         return False
     if "EPSG" in proj:
         try:
             CRS.from_string(proj)
             return True
         except:
             return False
     return False
def create_hist(result_dir, map_raster, alert):

    if not map_raster.is_file():
        raise Exception('No gfc map')

    # project raster in world mollweide
    with rio.open(map_raster) as src:

        proj_crs = CRS.from_string('ESRI:54009')
        if LooseVersion(rio.__gdal_version__) < LooseVersion("3.0.0"):
            rio_crs = RioCRS.from_wkt(proj_crs.to_wkt(WktVersion.WKT1_GDAL))
        else:
            rio_crs = RioCRS.from_wkt(proj_crs.to_wkt())

        transform, width, height = calculate_default_transform(
            src.crs, rio_crs, src.width, src.height, *src.bounds)

        kwargs = src.meta.copy()

        kwargs.update({
            'crs': rio_crs,
            'transform': transform,
            'width': width,
            'height': height
        })

        map_raster_proj = result_dir / f'{map_raster.stem}_proj.tif'

        with rio.open(map_raster_proj, 'w', **kwargs) as dst:
            for i in range(1, src.count + 1):
                reproject(source=rio.band(src, i),
                          destination=rio.band(dst, i),
                          src_transform=src.transform,
                          src_crs=src.crs,
                          dst_transform=transform,
                          dst_crs=rio_crs,
                          resampling=Resampling.nearest)

            resx, resy = dst.res

    # realize a primary hist
    hist = pixel_count(map_raster_proj)

    # convert to hectars
    hist['area'] = to_hectar(hist['pixels'], abs(resx), abs(resy))

    return hist
Beispiel #6
0
def subset_shape(
    ds: Union[xarray.DataArray, xarray.Dataset],
    shape: Union[str, Path, gpd.GeoDataFrame],
    vectorize: bool = True,
    raster_crs: Optional[Union[str, int]] = None,
    shape_crs: Optional[Union[str, int]] = None,
    buffer: Optional[Union[int, float]] = None,
    start_date: Optional[str] = None,
    end_date: Optional[str] = None,
) -> Union[xarray.DataArray, xarray.Dataset]:
    """Subset a DataArray or Dataset spatially (and temporally) using a vector shape and date selection.

    Return a subset of a DataArray or Dataset for grid points falling within the area of a Polygon and/or
    MultiPolygon shape, or grid points along the path of a LineString and/or MultiLineString.

    Parameters
    ----------
    ds : Union[xarray.DataArray, xarray.Dataset]
      Input values.
    shape : Union[str, Path, gpd.GeoDataFrame]
      Path to shape file, or directly a geodataframe. Supports formats compatible with geopandas.
    vectorize: bool
      Whether to use the spatialjoin or vectorize backend.
    raster_crs : Optional[Union[str, int]]
      EPSG number or PROJ4 string.
    shape_crs : Optional[Union[str, int]]
      EPSG number or PROJ4 string.
    buffer : Optional[Union[int, float]]
      Buffer the shape in order to select a larger region stemming from it. Units are based on the shape degrees/metres.
    start_date : Optional[str]
      Start date of the subset.
      Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d").
      Defaults to first day of input data-array.
    end_date : Optional[str]
      End date of the subset.
      Date string format -- can be year ("%Y"), year-month ("%Y-%m") or year-month-day("%Y-%m-%d").
      Defaults to last day of input data-array.

    Returns
    -------
    Union[xarray.DataArray, xarray.Dataset]
      A subset of `ds`

    Examples
    --------
    >>> import xarray as xr  # doctest: +SKIP
    >>> from xclim.subset import subset_shape  # doctest: +SKIP
    >>> pr = xr.open_dataset(path_to_pr_file).pr  # doctest: +SKIP
    ...
    # Subset data array by shape
    >>> prSub = subset_shape(pr, shape=path_to_shape_file)  # doctest: +SKIP
    ...
    # Subset data array by shape and single year
    >>> prSub = subset_shape(pr, shape=path_to_shape_file, start_date='1990-01-01', end_date='1990-12-31')  # doctest: +SKIP
    ...
    # Subset multiple variables in a single dataset
    >>> ds = xr.open_mfdataset([path_to_tasmin_file, path_to_tasmax_file])  # doctest: +SKIP
    >>> dsSub = subset_shape(ds, shape=path_to_shape_file)  # doctest: +SKIP
    """
    wgs84 = CRS(4326)
    # PROJ4 definition for WGS84 with longitudes ranged between -180/+180.
    wgs84_wrapped = CRS.from_string(
        "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs lon_wrap=180")

    if isinstance(ds, xarray.DataArray):
        ds_copy = ds._to_temp_dataset()
    else:
        ds_copy = ds.copy()

    if isinstance(shape, gpd.GeoDataFrame):
        poly = shape.copy()
    else:
        poly = gpd.GeoDataFrame.from_file(shape)

    if buffer is not None:
        poly.geometry = poly.buffer(buffer)

    # Get the shape's bounding box.
    minx, miny, maxx, maxy = poly.total_bounds
    lon_bnds = (minx, maxx)
    lat_bnds = (miny, maxy)

    # If polygon doesn't cross prime meridian, subset bbox first to reduce processing time
    # Only case not implemented is when lon_bnds cross the 0 deg meridian but dataset grid has all positive lons
    try:
        ds_copy = subset_bbox(ds_copy, lon_bnds=lon_bnds, lat_bnds=lat_bnds)
    except NotImplementedError:
        pass

    if ds_copy.lon.size == 0 or ds_copy.lat.size == 0:
        raise ValueError(
            "No grid cell centroids found within provided polygon bounding box. "
            'Try using the "buffer" option to create an expanded area.')

    if start_date or end_date:
        ds_copy = subset_time(ds_copy,
                              start_date=start_date,
                              end_date=end_date)

    # Determine whether CRS types are the same between shape and raster
    if shape_crs is not None:
        try:
            shape_crs = CRS.from_user_input(shape_crs)
        except ValueError:
            raise
    else:
        shape_crs = CRS(poly.crs)

    wrap_lons = False
    if raster_crs is not None:
        try:
            raster_crs = CRS.from_user_input(raster_crs)
        except ValueError:
            raise
    else:
        if np.min(lat_bnds) < -90 or np.max(lat_bnds) > 90:
            raise ValueError(
                "Latitudes exceed domain of WGS84 coordinate system.")
        if np.min(lon_bnds) < -180 or np.max(lon_bnds) > 180:
            raise ValueError(
                "Longitudes exceed domain of WGS84 coordinate system.")

        try:
            # Extract CF-compliant CRS_WKT from crs variable.
            raster_crs = CRS.from_cf(ds_copy.crs.attrs)
        except AttributeError:
            if np.min(ds_copy.lon) >= 0 and np.max(ds_copy.lon) <= 360:
                wrap_lons = True
                raster_crs = wgs84_wrapped
            else:
                raster_crs = wgs84
    _check_crs_compatibility(shape_crs=shape_crs, raster_crs=raster_crs)

    # Create mask using the vectorize or spatial join methods.
    if vectorize:
        mask_2d = create_mask_vectorize(x_dim=ds_copy.lon,
                                        y_dim=ds_copy.lat,
                                        poly=poly,
                                        wrap_lons=wrap_lons)
    else:
        mask_2d = create_mask(x_dim=ds_copy.lon,
                              y_dim=ds_copy.lat,
                              poly=poly,
                              wrap_lons=wrap_lons)

    if np.all(mask_2d.isnull()):
        raise ValueError(
            f"No grid cell centroids found within provided polygon bounds ({poly.bounds}). "
            'Try using the "buffer" option to create an expanded areas or verify polygon.'
        )

    # loop through variables
    for v in ds_copy.data_vars:
        if set.issubset(set(mask_2d.dims), set(ds_copy[v].dims)):
            ds_copy[v] = ds_copy[v].where(mask_2d.notnull())

    # Remove coordinates where all values are outside of region mask
    for dim in mask_2d.dims:
        mask_2d = mask_2d.dropna(dim, how="all")
    ds_copy = ds_copy.sel({dim: mask_2d[dim] for dim in mask_2d.dims})

    # Add a CRS definition using CF conventions and as a global attribute in CRS_WKT for reference purposes
    ds_copy.attrs["crs"] = raster_crs.to_string()
    ds_copy["crs"] = 1
    ds_copy["crs"].attrs.update(raster_crs.to_cf())

    for v in ds_copy.variables:
        if {"lat", "lon"}.issubset(set(ds_copy[v].dims)):
            ds_copy[v].attrs["grid_mapping"] = "crs"

    if isinstance(ds, xarray.DataArray):
        return ds._from_temp_dataset(ds_copy)
    return ds_copy
Beispiel #7
0
def create_mask(
    *,
    x_dim: xarray.DataArray = None,
    y_dim: xarray.DataArray = None,
    poly: gpd.GeoDataFrame = None,
    wrap_lons: bool = False,
    check_overlap: bool = False,
):
    """Create a mask with values corresponding to the features in a GeoDataFrame using spatial join methods.

    The returned mask's points have the value of the first geometry of `poly` they fall in.

    Parameters
    ----------
    x_dim : xarray.DataArray
      X or longitudinal dimension of xarray object.
    y_dim : xarray.DataArray
      Y or latitudinal dimension of xarray object.
    poly : gpd.GeoDataFrame
      GeoDataFrame used to create the xarray.DataArray mask.
    wrap_lons : bool
      Shift vector longitudes by -180,180 degrees to 0,360 degrees; Default = False
    check_overlap: bool
      Perform a check to verify if shapes contain overlapping geometries.

    Returns
    -------
    xarray.DataArray

    Examples
    --------
    >>> import xarray as xr  # doctest: +SKIP
    >>> import geopandas as gpd  # doctest: +SKIP
    >>> from xclim.subset import create_mask  # doctest: +SKIP
    >>> ds = xr.open_dataset(path_to_tasmin_file)  # doctest: +SKIP
    >>> polys = gpd.read_file(path_to_multi_shape_file)  # doctest: +SKIP
    ...
    # Get a mask from all polygons in the shape file
    >>> mask = create_mask(x_dim=ds.lon, y_dim=ds.lat, poly=polys)  # doctest: +SKIP
    >>> ds = ds.assign_coords(regions=mask)  # doctest: +SKIP
    ...
    # Operations can be applied to each regions with  `groupby`. Ex:
    >>> ds = ds.groupby('regions').mean()  # doctest: +SKIP
    ...
    # Extra step to retrieve the names of those polygons stored in the "id" column
    >>> region_names = xr.DataArray(polys.id, dims=('regions',))  # doctest: +SKIP
    >>> ds = ds.assign_coords(regions_names=region_names)  # doctest: +SKIP
    """
    wgs84 = CRS(4326)

    if check_overlap:
        _check_has_overlaps(polygons=poly)
    if wrap_lons:
        warnings.warn("Wrapping longitudes at 180 degrees.")

    if len(x_dim.shape) == 1 & len(y_dim.shape) == 1:
        # create a 2d grid of lon, lat values
        lon1, lat1 = np.meshgrid(np.asarray(x_dim.values),
                                 np.asarray(y_dim.values),
                                 indexing="ij")
        dims_out = x_dim.dims + y_dim.dims
        coords_out = dict()
        coords_out[dims_out[0]] = x_dim.values
        coords_out[dims_out[1]] = y_dim.values
    else:
        lon1 = x_dim.values
        lat1 = y_dim.values
        dims_out = x_dim.dims
        coords_out = x_dim.coords

    # create pandas Dataframe from NetCDF lat and lon points
    df = pd.DataFrame({
        "id": np.arange(0, lon1.size),
        "lon": lon1.flatten(),
        "lat": lat1.flatten()
    })
    df["Coordinates"] = list(zip(df.lon, df.lat))
    df["Coordinates"] = df["Coordinates"].apply(Point)

    # create GeoDataFrame (spatially referenced with shifted longitude values if needed).
    if wrap_lons:
        wgs84 = CRS.from_string(
            "+proj=longlat +datum=WGS84 +no_defs +type=crs +lon_wrap=180")
    gdf_points = gpd.GeoDataFrame(df, geometry="Coordinates", crs=wgs84)

    # spatial join geodata points with region polygons and remove duplicates
    point_in_poly = gpd.tools.sjoin(gdf_points,
                                    poly,
                                    how="left",
                                    op="intersects")
    point_in_poly = point_in_poly.loc[~point_in_poly.index.duplicated(
        keep="first")]

    # extract polygon ids for points
    mask = point_in_poly["index_right"]
    mask_2d = np.array(mask).reshape(lat1.shape[0], lat1.shape[1])
    mask_2d = xarray.DataArray(mask_2d, dims=dims_out, coords=coords_out)
    return mask_2d
Beispiel #8
0
    def func_checker(*args, **kwargs):
        """
        Split and reproject polygon vectors in a GeoDataFrame whose values cross the Greenwich Meridian.

        Begins by examining whether the geometry bounds the supplied cross longitude = 0 and if so, proceeds to split
        the polygons at the meridian into new polygons and erase a small buffer to prevent invalid geometries when
        transforming the lons from WGS84 to WGS84 +lon_wrap=180 (longitudes from 0 to 360).

        Returns a GeoDataFrame with the new features in a wrap_lon WGS84 projection if needed.
        """
        try:
            poly = kwargs["poly"]
            x_dim = kwargs["x_dim"]
            wrap_lons = kwargs["wrap_lons"]
        except KeyError:
            return func(*args, **kwargs)

        if wrap_lons:
            if (np.min(x_dim) < 0
                    and np.max(x_dim) >= 360) or (np.min(x_dim) < -180
                                                  and np.max >= 180):
                # TODO: This should raise an exception, right?
                warnings.warn(
                    "DataArray doesn't seem to be using lons between 0 and 360 degrees or between -180 and 180 degrees."
                    " Tread with caution.",
                    UserWarning,
                    stacklevel=4,
                )
            split_flag = False
            for index, feature in poly.iterrows():
                if (feature.geometry.bounds[0] <
                        0) and (feature.geometry.bounds[2] > 0):
                    split_flag = True
                    warnings.warn(
                        "Geometry crosses the Greenwich Meridian. Proceeding to split polygon at Greenwich."
                        " This feature is experimental. Output might not be accurate.",
                        UserWarning,
                        stacklevel=4,
                    )

                    # Create a meridian line at Greenwich, split polygons at this line and erase a buffer line
                    if isinstance(feature.geometry, MultiPolygon):
                        union = MultiPolygon(cascaded_union(feature.geometry))
                    else:
                        union = Polygon(cascaded_union(feature.geometry))
                    meridian = LineString([Point(0, 90), Point(0, -90)])
                    buffered = meridian.buffer(0.000000001)
                    split_polygons = split(union, meridian)
                    buffered_split_polygons = [
                        feat.difference(buffered) for feat in split_polygons
                    ]

                    # Cannot assign iterable with `at` (pydata/pandas#26333) so a small hack:
                    # Load split features into a new GeoDataFrame with WGS84 CRS
                    split_gdf = gpd.GeoDataFrame(
                        geometry=[cascaded_union(buffered_split_polygons)],
                        crs=CRS(4326),
                    )
                    poly.at[[index], "geometry"] = split_gdf.geometry.values

            # Reproject features in WGS84 CSR to use 0 to 360 as longitudinal values
            wrapped_lons = CRS.from_string(
                "+proj=longlat +ellps=WGS84 +lon_wrap=180 +datum=WGS84 +no_defs"
            )

            poly = poly.to_crs(crs=wrapped_lons)
            if split_flag:
                warnings.warn(
                    "Rebuffering split polygons to ensure edge inclusion in selection.",
                    UserWarning,
                    stacklevel=4,
                )
                poly = gpd.GeoDataFrame(poly.buffer(0.000000001),
                                        columns=["geometry"])
                poly.crs = wrapped_lons

            kwargs["poly"] = poly

        return func(*args, **kwargs)
Beispiel #9
0
async def process_source(filename, session: ClientSession):

    try:
        async with aiofiles.open(filename, mode="r", encoding="utf-8") as f:
            contents = await f.read()
            source = json.loads(contents)

        # Exclude sources
        # Skip non wms layers
        if not source["properties"]["type"] == "wms":
            return
        # check if it is esri rest and not wms
        if "bboxSR" in source["properties"]["url"]:
            return
        if "available_projections" not in source["properties"]:
            return
        if "header" in source["properties"]["url"]:
            return
        if "geometry" not in source:
            return

        if source["geometry"] is None:
            geom = box(-180, -90, 180, 90)
            pt = Point(7.44, 46.56)
        else:
            geom = parse_eli_geometry(source["geometry"])
            pt = geom.representative_point()

        test_zoom_level = ZOOM_LEVEL
        if "min_zoom" in source["properties"]:
            test_zoom_level = max(source["properties"]["min_zoom"],
                                  test_zoom_level)

        # Get existing image hash
        original_img_messages = []
        image_hash = await get_image(
            url=source["properties"]["url"],
            available_projections=source["properties"]
            ["available_projections"],
            lon=pt.x,
            lat=pt.y,
            zoom=test_zoom_level,
            session=session,
            messages=original_img_messages,
        )
        if image_hash is None:
            # We are finished if it was not possible to get the image
            return

        if max_count(str(image_hash)) == 16:
            # These image hashes indicate that the downloaded image is not useful to determine
            # if the updated query returns the same image
            logging.info("ImageHash {} not useful for: {} || {}".format(
                str(image_hash), filename, " || ".join(original_img_messages)))
            return

        # Update wms
        wms_messages = []
        result = await update_wms(source["properties"]["url"], session,
                                  wms_messages)
        if result is None:
            logging.info("Not possible to update wms url for {}: {}".format(
                filename, " || ".join(wms_messages)))
            return

        # Test if selected projections work despite not being advertised
        for EPSG in {"EPSG:3857", "EPSG:4326"}:
            if EPSG not in result["available_projections"]:
                epsg_image_hash = await get_image(
                    url=result["url"],
                    available_projections=[EPSG],
                    lon=pt.x,
                    lat=pt.y,
                    zoom=test_zoom_level,
                    session=session,
                    messages=[],
                )
                if epsg_image_hash == image_hash:
                    result["available_projections"].add(EPSG)

        # Download image for updated url
        new_img_messages = []
        new_image_hash = await get_image(
            url=result["url"],
            available_projections=result["available_projections"],
            lon=pt.x,
            lat=pt.y,
            zoom=test_zoom_level,
            session=session,
            messages=new_img_messages,
        )

        if new_image_hash is None:
            logging.warning("Could not download new image: {}".format(
                " || ".join(new_img_messages)))
            return

        # Only sources are updated where the new query returns the same image
        if not image_hash == new_image_hash:
            logging.info(
                "Image hash not the same for: {}: '{}' vs '{}' | {} | {}".
                format(
                    filename,
                    image_hash,
                    new_image_hash,
                    " || ".join(original_img_messages),
                    " || ".join(new_img_messages),
                ))

        # Servers might support projections that are not used in the area covered by a source
        # Keep only EPSG codes that are used in the area covered by the sources geometry
        if source["geometry"] is not None:
            epsg_outside_area_of_use = set()
            for epsg in result["available_projections"]:
                try:
                    if epsg == "CRS:84":
                        continue
                    crs = CRS.from_string(epsg)
                    area_of_use = crs.area_of_use
                    crs_box = box(
                        area_of_use.west,
                        area_of_use.south,
                        area_of_use.east,
                        area_of_use.north,
                    )
                    if not crs_box.intersects(geom):
                        epsg_outside_area_of_use.add(epsg)
                except Exception as e:
                    logging.exception(
                        "Could not check area of use for projection {}: {}".
                        format(epsg, str(e)))
                    continue
            if len(result["available_projections"]) == len(
                    epsg_outside_area_of_use):
                logging.error(
                    "{}: epsg_outside_area_of_use filter removes all EPSG".
                    format(filename))
            result["available_projections"] -= epsg_outside_area_of_use

        # Servers that report a lot of projection may be configured wrongly
        # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised
        if len(result["available_projections"]) > 15:
            filtered_projs = set()
            for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]:
                if proj in result["available_projections"]:
                    filtered_projs.add(proj)
            for proj in source["properties"]["available_projections"]:
                if proj in result["available_projections"]:
                    filtered_projs.add(proj)
            result["available_projections"] = filtered_projs

        # Filter alias projections
        if "EPSG:3857" in result["available_projections"]:
            result["available_projections"] -= epsg_3857_alias
        else:
            # if EPSG:3857 not present but alias, keep only alias with highest number to be consistent
            result_epsg_3857_alias = result[
                "available_projections"] & epsg_3857_alias
            result_epsg_3857_alias_sorted = list(
                sorted(
                    result_epsg_3857_alias,
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                    reverse=True,
                ))
            result["available_projections"] -= set(
                result_epsg_3857_alias_sorted[1:])

        # Filter deprecated projections
        result["available_projections"].intersection_update(valid_epsgs)

        # Check if only formatting has changed
        url_has_changed = not compare_urls(source["properties"]["url"],
                                           result["url"])
        projections_have_changed = not compare_projs(
            source["properties"]["available_projections"],
            result["available_projections"],
        )

        if url_has_changed:
            source["properties"]["url"] = result["url"]
        if projections_have_changed:
            source["properties"]["available_projections"] = list(
                sorted(
                    result["available_projections"],
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                ))

        if url_has_changed or projections_have_changed:
            with open(filename, "w", encoding="utf-8") as out:
                json.dump(source,
                          out,
                          indent=4,
                          sort_keys=False,
                          ensure_ascii=False)
                out.write("\n")
    except Exception as e:
        logging.exception("Failed to check source {}: {}".format(
            filename, str(e)))
Beispiel #10
0
async def get_image(url, available_projections, lon, lat, zoom, session,
                    messages):
    """Download image (tms tile for coordinate lon,lat on level zoom and calculate image hash

    Parameters
    ----------
    url : str
    available_projections : collection
    lon : float
    lat : float
    zoom : int
    session : ClientSession
    messages : list

    Returns
    -------
    ImageHash or None

    """
    tile = list(mercantile.tiles(lon, lat, lon, lat, zooms=zoom))[0]
    bounds = list(mercantile.bounds(tile))

    proj = None
    if "EPSG:4326" in available_projections:
        proj = "EPSG:4326"
    elif "EPSG:3857" in available_projections:
        proj = "EPSG:3857"
    else:
        for proj in sorted(available_projections):
            try:
                CRS.from_string(proj)
            except:
                continue
            break
    if proj is None:
        messages.append("No projection left: {}".format(available_projections))
        return None

    crs_from = CRS.from_string("epsg:4326")
    crs_to = CRS.from_string(proj)
    if not proj == "EPSG:4326":
        transformer = Transformer.from_crs(crs_from, crs_to, always_xy=True)
        bounds = list(transformer.transform(bounds[0], bounds[1])) + list(
            transformer.transform(bounds[2], bounds[3]))

    # WMS < 1.3.0 assumes x,y coordinate ordering.
    # WMS 1.3.0 expects coordinate ordering defined in CRS.
    if crs_to.axis_info[0].direction == "north" and "=1.3.0" in url:
        bbox = ",".join(map(str, [bounds[1], bounds[0], bounds[3], bounds[2]]))
    else:
        bbox = ",".join(map(str, bounds))

    formatted_url = url.format(proj=proj, width=512, height=512, bbox=bbox)
    messages.append("Image URL: {}".format(formatted_url))
    for i in range(3):
        try:
            # Download image
            async with session.request(method="GET",
                                       url=formatted_url,
                                       ssl=False) as response:
                data = await response.read()
                img = Image.open(io.BytesIO(data))
                img_hash = imagehash.average_hash(img)
                messages.append("ImageHash: {}".format(img_hash))
                return img_hash
        except Exception as e:
            messages.append("Could not download image in try {}: {}".format(
                i, str(e)))
        await asyncio.sleep(5)

    return None
Beispiel #11
0
async def process_source(filename, session: ClientSession):

    try:
        async with aiofiles.open(filename, mode="r", encoding="utf-8") as f:
            contents = await f.read()
            source = json.loads(contents)

        # Exclude sources
        # Skip non wms layers
        if not source["properties"]["type"] == "wms":
            return
        # check if it is esri rest and not wms
        if "bboxSR" in source["properties"]["url"]:
            return
        if "available_projections" not in source["properties"]:
            return
        if "header" in source["properties"]["url"]:
            return
        if "geometry" not in source:
            return

        category = source["properties"].get("category", None)

        if source["geometry"] is None:
            geom = box(-180, -90, 180, 90)
            pt = Point(7.44, 46.56)
        else:
            geom = parse_eli_geometry(source["geometry"])
            pt = geom.representative_point()

        test_zoom_level = ZOOM_LEVEL
        if "min_zoom" in source["properties"]:
            test_zoom_level = max(source["properties"]["min_zoom"],
                                  test_zoom_level)
        if "max_zoom" in source["properties"]:
            test_zoom_level = min(source["properties"]["max_zoom"],
                                  test_zoom_level)

        old_url = source["properties"]["url"]
        old_projections = source["properties"]["available_projections"]

        # Get existing image hash
        original_img_messages = []
        status, image_hash = await get_image(
            url=old_url,
            available_projections=old_projections,
            lon=pt.x,
            lat=pt.y,
            zoom=test_zoom_level,
            session=session,
            messages=original_img_messages,
        )
        if not status == ImageHashStatus.SUCCESS or image_hash is None:
            # We are finished if it was not possible to get the image
            return

        if max_count(str(image_hash)) == 16:

            if ("category" in source["properties"]
                    and "photo" in source["properties"]["category"]):
                msgs = "\n\t".join(original_img_messages)
                logging.warning(
                    f"{filename} has category {category} but image hash is {image_hash}:\n\t{msgs}"
                )

            # These image hashes indicate that the downloaded image is not useful to determine
            # if the updated query returns the same image
            error_msgs = "\n\t".join(original_img_messages)
            logging.warning(
                f"Image hash {image_hash} not useful for: {filename} ({category}): \n\t{error_msgs}"
            )
            return

        # Update wms
        wms_messages = []
        result = await update_wms(old_url, session, wms_messages)
        if result is None:
            error_msgs = "\n\t".join(wms_messages)
            logging.info(
                f"Not possible to update wms url for {filename}:\n\t{error_msgs}"
            )
            return
        new_url = result["url"]
        new_projections = result["available_projections"]
        del result

        # Download image for updated url
        new_img_messages = []
        new_status, new_image_hash = await get_image(
            url=new_url,
            available_projections=new_projections,
            lon=pt.x,
            lat=pt.y,
            zoom=test_zoom_level,
            session=session,
            messages=new_img_messages,
        )

        if not new_status == ImageHashStatus.SUCCESS or new_image_hash is None:
            error_msgs = "\n\t".join(new_img_messages)
            logging.warning(
                f"Could not download new image: {new_status}\n\t{error_msgs}")
            return

        # Only sources are updated where the new query returns the same image
        if not image_similar(image_hash, new_image_hash, test_zoom_level):
            error_original_img_messages = "\n\t".join(original_img_messages)
            error_new_img_messages = "\n\t".join(new_img_messages)
            logging.info(
                f"Image hash not the same for: {filename}: {image_hash} - {new_image_hash}: {image_hash - new_image_hash}\n\t{error_original_img_messages} \n\t{error_new_img_messages}"
            )
            return

        # Test if selected projections work despite not being advertised
        for EPSG in {"EPSG:3857", "EPSG:4326"}:
            if EPSG not in new_projections:
                epsg_check_messages = []
                epsg_image_status, epsg_image_hash = await get_image(
                    url=new_url,
                    available_projections=[EPSG],
                    lon=pt.x,
                    lat=pt.y,
                    zoom=test_zoom_level,
                    session=session,
                    messages=epsg_check_messages,
                )

                if not epsg_image_status == ImageHashStatus.SUCCESS:
                    continue

                # Relax similarity constraint to account for differences due to reprojection
                hash_diff = image_hash - epsg_image_hash
                # org_hash_msgs = "\n\t".join(original_img_messages)
                # epsg_check_msgs = "\n\t".join(epsg_check_messages)
                if image_similar(image_hash, epsg_image_hash, test_zoom_level):
                    new_projections.add(EPSG)
                    # logging.info(
                    #     f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {image_hash}: {hash_diff}\n\t{org_hash_msgs}\n\t{epsg_check_msgs}"
                    # )
                    logging.info(
                        f"{filename}: Add {EPSG} despite not being advertised: {epsg_image_hash} - {image_hash}: {hash_diff}"
                    )
                elif epsg_image_hash is not None:
                    # logging.info(
                    #     f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {image_hash}: {hash_diff}\n\t{org_hash_msgs}\n\t{epsg_check_msgs}"
                    # )
                    logging.info(
                        f"{filename}: Do not add {EPSG} Difference: {epsg_image_hash} - {image_hash}: {hash_diff}"
                    )

        # Servers might support projections that are not used in the area covered by a source
        # Keep only EPSG codes that are used in the area covered by the sources geometry
        if source["geometry"] is not None:
            epsg_outside_area_of_use = set()
            for epsg in new_projections:
                try:
                    if epsg == "CRS:84":
                        continue
                    crs = CRS.from_string(epsg)
                    area_of_use = crs.area_of_use
                    crs_box = box(
                        area_of_use.west,
                        area_of_use.south,
                        area_of_use.east,
                        area_of_use.north,
                    )
                    if not crs_box.intersects(geom):
                        epsg_outside_area_of_use.add(epsg)
                except Exception as e:
                    logging.exception(
                        f"Could not check area of use for projection {epsg}: {e}"
                    )
                    continue
            if len(new_projections) == len(epsg_outside_area_of_use):
                logging.error(
                    f"{filename}: epsg_outside_area_of_use filter removes all EPSG"
                )
            new_projections -= epsg_outside_area_of_use

        # Servers that report a lot of projection may be configured wrongly
        # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised
        if len(new_projections) > 15:
            filtered_projs = set()
            for proj in ["CRS:84", "EPSG:3857", "EPSG:4326"]:
                if proj in new_projections:
                    filtered_projs.add(proj)
            for proj in old_projections:
                if proj in new_projections:
                    filtered_projs.add(proj)
            new_projections = filtered_projs

        # Filter alias projections
        if "EPSG:3857" in new_projections:
            new_projections -= epsg_3857_alias
        else:
            # if EPSG:3857 not present but alias, keep only alias with highest number to be consistent
            result_epsg_3857_alias = new_projections & epsg_3857_alias
            result_epsg_3857_alias_sorted = list(
                sorted(
                    result_epsg_3857_alias,
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                    reverse=True,
                ))
            new_projections -= set(result_epsg_3857_alias_sorted[1:])

        # Filter deprecated projections
        new_projections.intersection_update(valid_epsgs)

        # Check if projections are supported by server
        not_supported_projections = set()
        image_hashes = {}
        for proj in new_projections:
            proj_messages = []
            proj_status, proj_image_hash = await get_image(
                url=new_url,
                available_projections=[proj],
                lon=pt.x,
                lat=pt.y,
                zoom=test_zoom_level,
                session=session,
                messages=proj_messages,
            )
            image_hashes[proj] = {
                "status": proj_status,
                "hash": proj_image_hash,
                "logs": proj_messages,
            }

            if proj_status == ImageHashStatus.IMAGE_ERROR:
                not_supported_projections.add(proj)
                # msgs = "\n\t".join(proj_messages)
                # logging.info(f"{filename} {proj}: {proj_status}:\n\t{msgs}")
            # elif proj_status == ImageHashStatus.SUCCESS and max_count(str(proj_image_hash)) == 16 and not max_count(str(image_hash)) == 16:
            #     # Empty images indicate that server does not support this projection correctly
            #     not_supported_projections.add(proj)
            elif proj_status == ImageHashStatus.NETWORK_ERROR:
                # If not sucessfull status do not add if not previously addedd
                if proj not in old_projections:
                    not_supported_projections.add(proj)

        if len(not_supported_projections) > 0:
            removed_projections = ",".join(not_supported_projections)
            logging.info(
                f"{filename}: remove projections that are advertised but do not return an image: {removed_projections}"
            )
            new_projections -= not_supported_projections

        # Check if EPSG:3857 and EPSG:4326 are similar
        if ("EPSG:3857" in image_hashes and "EPSG:4326" in image_hashes and
                image_hashes["EPSG:3857"]["status"] == ImageHashStatus.SUCCESS
                and image_hashes["EPSG:4326"]["status"]
                == ImageHashStatus.SUCCESS):
            img_hash_3857 = image_hashes["EPSG:3857"]["hash"]
            img_hash_4326 = image_hashes["EPSG:4326"]["hash"]
            if not image_similar(img_hash_3857, img_hash_4326,
                                 test_zoom_level):
                msgs = "\n\t".join(image_hashes["EPSG:3857"]["logs"] +
                                   image_hashes["EPSG:4326"]["logs"])
                logging.warning(
                    f"{filename}: ({category}) ImageHash for EPSG:3857 and EPSG:4326 not similiar: {img_hash_3857} - {img_hash_4326}: {img_hash_3857-img_hash_4326}:\n\t{msgs}"
                )

        # Check if only formatting has changed
        url_has_changed = not compare_urls(source["properties"]["url"],
                                           new_url)
        projections_have_changed = not compare_projs(
            source["properties"]["available_projections"],
            new_projections,
        )

        if url_has_changed:
            source["properties"]["url"] = new_url
        if projections_have_changed:
            source["properties"]["available_projections"] = list(
                sorted(
                    new_projections,
                    key=lambda x: (x.split(":")[0], int(x.split(":")[1])),
                ))

        if url_has_changed or projections_have_changed:
            with open(filename, "w", encoding="utf-8") as out:
                json.dump(source,
                          out,
                          indent=4,
                          sort_keys=False,
                          ensure_ascii=False)
                out.write("\n")
    except Exception as e:
        logging.exception(f"Failed to check source {filename}: {e}")
Beispiel #12
0
async def get_image(url, available_projections, lon, lat, zoom, session,
                    messages):
    """Download image (tms tile for coordinate lon,lat on level zoom and calculate image hash

    Parameters
    ----------
    url : str
    available_projections : collection
    lon : float
    lat : float
    zoom : int
    session : ClientSession
    messages : list

    Returns
    -------
    ImageHash or None

    """
    tile = list(mercantile.tiles(lon, lat, lon, lat, zooms=zoom))[0]
    bounds = list(mercantile.bounds(tile))

    img_hash = None
    status = ImageHashStatus.OTHER

    proj = None
    if "EPSG:4326" in available_projections:
        proj = "EPSG:4326"
    elif "EPSG:3857" in available_projections:
        proj = "EPSG:3857"
    else:
        for proj in sorted(available_projections):
            try:
                CRS.from_string(proj)
            except:
                continue
            break
    if proj is None:
        messages.append("No projection left: {}".format(available_projections))
        return status, img_hash

    wms_version = wms_version_from_url(url)
    bbox = _get_bbox(proj, bounds, wms_version)
    if bbox is None:
        messages.append(f"Projection {proj} could not be parsed by pyproj.")
        return status, img_hash

    formatted_url = url.format(proj=proj,
                               width=IMAGE_SIZE,
                               height=IMAGE_SIZE,
                               bbox=bbox)
    messages.append(f"Image URL: {formatted_url}")
    for i in range(3):
        try:
            # Download image
            async with session.request(method="GET",
                                       url=formatted_url,
                                       ssl=False) as response:
                if response.status == 200:
                    data = await response.read()
                    try:
                        img = Image.open(io.BytesIO(data))
                        img_hash = imagehash.average_hash(img)
                        status = ImageHashStatus.SUCCESS
                        messages.append(f"ImageHash: {img_hash}")
                        return status, img_hash
                    except Exception as e:
                        status = ImageHashStatus.IMAGE_ERROR
                        messages.append(str(e))
                        filetype = magic.from_buffer(data)
                        messages.append(
                            f"Could not open recieved data as image (Recieved filetype: {filetype} {formatted_url})"
                        )
                else:
                    status = ImageHashStatus.NETWORK_ERROR

        except Exception as e:
            status = ImageHashStatus.NETWORK_ERROR
            messages.append(f"Could not download image in try {i}: {e}")
        await asyncio.sleep(5)

    return status, img_hash
Beispiel #13
0
async def get_image(url, available_projections, lon, lat, zoom, session,
                    messages):
    """Download image (tms tile for coordinate lon,lat on level zoom and calculate image hash

    Parameters
    ----------
    url : str
    available_projections : collection
    lon : float
    lat : float
    zoom : int
    session : ClientSession
    messages : list

    Returns
    -------
    ImageHash or None

    """
    tile = list(mercantile.tiles(lon, lat, lon, lat, zooms=zoom))[0]
    bounds = list(mercantile.bounds(tile))

    img_hash = None
    status = ImageHashStatus.OTHER

    proj = None
    if "EPSG:4326" in available_projections:
        proj = "EPSG:4326"
    elif "EPSG:3857" in available_projections:
        proj = "EPSG:3857"
    else:
        for proj in sorted(available_projections):
            try:
                CRS.from_string(proj)
            except:
                continue
            break
    if proj is None:
        messages.append(f"No projection left: {available_projections}")
        return status, img_hash

    wms_version = wmshelper.wms_version_from_url(url)
    bbox = wmshelper.get_bbox(proj, bounds, wms_version)
    if bbox is None:
        messages.append(f"Projection {proj} could not be parsed by pyproj.")
        return status, img_hash

    formatted_url = url.format(proj=proj,
                               width=IMAGE_SIZE,
                               height=IMAGE_SIZE,
                               bbox=bbox)
    messages.append(f"Image URL: {formatted_url}")
    for i in range(2):
        try:
            # Download image
            async with session.request(method="GET",
                                       url=formatted_url,
                                       ssl=False) as response:
                messages.append(f"Try: {i}: HTTP CODE {response.status}")
                for header in response.headers:
                    messages.append(f"{header}: {response.headers[header]}")
                if response.status == 200:
                    data = await response.read()
                    data_length = len(data)
                    if data_length == 0:
                        messages.append(
                            f"Retrieved empty body, treat as NETWORK_ERROR: {data_length}"
                        )
                        status = ImageHashStatus.NETWORK_ERROR
                    else:
                        messages.append(f"len(data): {data_length}")
                        if "Content-Length" in response.headers:
                            advertised_length = int(
                                response.headers["Content-Length"])
                            if not data_length == advertised_length:
                                messages.append(
                                    f"Body not same size as advertised: {data_length} vs {advertised_length}"
                                )
                        try:
                            img = Image.open(io.BytesIO(data))
                            img_hash = imagehash.average_hash(img)
                            status = ImageHashStatus.SUCCESS
                            messages.append(f"ImageHash: {img_hash}")
                            return status, img_hash
                        except Exception as e:
                            status = ImageHashStatus.IMAGE_ERROR
                            messages.append(str(e))
                            filetype = magic.from_buffer(data)
                            messages.append(
                                f"Could not open received data as image (Received filetype: {filetype} Body Length: {data_length} {formatted_url})"
                            )
                else:
                    status = ImageHashStatus.NETWORK_ERROR

                if response.status == 503:  # 503 Service Unavailable
                    await asyncio.sleep(30)

        except Exception as e:
            status = ImageHashStatus.NETWORK_ERROR
            messages.append(f"Could not download image in try {i}: {e}")
        await asyncio.sleep(15)

    return status, img_hash
Beispiel #14
0
def from_string(string):
    """Returns a CRS from a proj string"""
    return CRS.from_string(string)
Beispiel #15
0
async def process_source(filename, session: ClientSession):
    async with aiofiles.open(filename, mode='r', encoding='utf-8') as f:
        contents = await f.read()
        source = json.loads(contents)

    # Exclude sources
    # Skip non wms layers
    if not source['properties']['type'] == 'wms':
        return
    # check if it is esri rest and not wms
    if 'bboxSR' in source['properties']['url']:
        return
    if 'available_projections' not in source['properties']:
        return
    if 'header' in source['properties']['url']:
        return
    if 'geometry' not in source:
        return

    if source['geometry'] is None:
        geom = box(-180, -90, 180, 90)
        pt = Point(7.44, 46.56)
    else:
        geom = parse_eli_geometry(source['geometry'])
        pt = geom.representative_point()

    # Get existing image hash
    original_img_messages = []
    image_hash = await get_image(
        url=source['properties']['url'],
        available_projections=source['properties']['available_projections'],
        lon=pt.x,
        lat=pt.y,
        zoom=ZOOM_LEVEL,
        session=session,
        messages=original_img_messages)
    if image_hash is None:
        # We are finished if it was not possible to get the image
        return

    if max_count(str(image_hash)) == 16:
        # These image hashes indicate that the downloaded image is not useful to determine
        # if the updated query returns the same image
        logging.info("ImageHash {} not useful for: {} || {}".format(
            str(image_hash), filename, " || ".join(original_img_messages)))
        return

    # Update wms
    wms_messages = []
    result = await update_wms(source['properties']['url'], session,
                              wms_messages)
    if result is None:
        logging.info("Not possible to update wms url for {}: {}".format(
            filename, " || ".join(wms_messages)))
        return

    # Test if selected projections work despite not being advertised
    for EPSG in {'EPSG:3857', 'EPSG:4326'}:
        if EPSG not in result['available_projections']:
            epsg_image_hash = await get_image(url=result['url'],
                                              available_projections=[EPSG],
                                              lon=pt.x,
                                              lat=pt.y,
                                              zoom=ZOOM_LEVEL,
                                              session=session,
                                              messages=[])
            if epsg_image_hash == image_hash:
                result['available_projections'].add(EPSG)

    # Download image for updated url
    new_img_messages = []
    new_image_hash = await get_image(
        url=result['url'],
        available_projections=result['available_projections'],
        lon=pt.x,
        lat=pt.y,
        zoom=ZOOM_LEVEL,
        session=session,
        messages=new_img_messages)

    if new_image_hash is None:
        logging.warning("Could not download new image: {}".format(
            " || ".join(new_img_messages)))
        return

    # Only sources are updated where the new query returns the same image
    if not image_hash == new_image_hash:
        logging.info("Image hash not the same for: {}: {} {} | {} | {}".format(
            filename, image_hash, new_image_hash,
            "||".join(original_img_messages), "||".join(new_img_messages)))

    # Servers might support projections that are not used in the area covered by a source
    # Keep only EPSG codes that are used in the area covered by the sources geometry
    if source['geometry'] is not None:
        epsg_outside_area_of_use = set()
        for epsg in source['properties']['available_projections']:
            try:
                if epsg == 'CRS:84':
                    continue
                crs = CRS.from_string(epsg)
                area_of_use = crs.area_of_use
                crs_box = box(
                    area_of_use.west,
                    area_of_use.south,
                    area_of_use.east,
                    area_of_use.north,
                )
                if not crs_box.intersects(geom):
                    epsg_outside_area_of_use.add(epsg)
            except Exception as e:
                logging.exception(
                    "Could not check area of use for projection {}: {}".format(
                        epsg, str(e)))
                continue
        if len(result["available_projections"]) == len(
                epsg_outside_area_of_use):
            logging.error(
                "{}: epsg_outside_area_of_use filter removes all EPSG".format(
                    filename))
        result["available_projections"] = [
            epsg for epsg in result["available_projections"]
            if epsg not in epsg_outside_area_of_use
        ]

    # Servers that report a lot of projection may be configured wrongly
    # Check for CRS:84, EPSG:3857, EPSG:4326 and keep existing projections if still advertised
    if len(result['available_projections']) > 15:
        filtered_projs = set()
        for proj in ['CRS:84', 'EPSG:3857', 'EPSG:4326']:
            if proj in result['available_projections']:
                filtered_projs.add(proj)
        for proj in source['properties']['available_projections']:
            if proj in result['available_projections']:
                filtered_projs.add(proj)
        result['available_projections'] = filtered_projs

    # Filter alias projections
    if 'EPSG:3857' in result['available_projections']:
        for epsg in epsg_3857_alias:
            if epsg in result['available_projections']:
                result['available_projections'].remove(epsg)

    # Filter deprecated projections
    result["available_projections"] = [
        epsg for epsg in result["available_projections"]
        if epsg == "CRS:84" or (
            epsg in valid_epsgs and epsg not in epsg_3857_alias)
    ]

    # Check if only formatting has changes
    url_has_changed = not compare_urls(source['properties']['url'],
                                       result['url'])
    projections_have_changed = not compare_projs(
        source['properties']['available_projections'],
        result['available_projections'])

    if url_has_changed:
        source['properties']['url'] = result['url']
    if projections_have_changed:
        source['properties']['available_projections'] = list(
            sorted(result['available_projections'],
                   key=lambda x: (x.split(':')[0], int(x.split(':')[1]))))

    if url_has_changed or projections_have_changed:
        with open(filename, 'w', encoding='utf-8') as out:
            json.dump(source,
                      out,
                      indent=4,
                      sort_keys=False,
                      ensure_ascii=False)
            out.write("\n")