Beispiel #1
0
def crs_transform(x, y, from_crs, to_crs):
    """Transform coordinate values between two coordinate systems

    Transform coordinate values between two coordinate systems. The shape
    of the input arrays are preserved.

    :param x:
        First coordinate array
    :type: numpy.ndarray
    :param y:
        Second coordinate array
    :type: numpy.ndarray
    :param from_crs:
        Source coordinates reference frame
    :type from_crs: osgeo.osr.SpatialReference
    :param to_crs:
        Transformed coordinates reference frame
    :type to_crs: osgeo.osr.SpatialReference
    :returns:
        (xp, yp), the transformed coordinates
    :rtype: (numpy.ndarray, numpy.ndarray)
    """

    xarr = np.array(x)
    yarr = np.array(y)

    if len(xarr) == 0 and len(yarr) == 0:
        return np.array([x, y])

    ct = CoordinateTransformation(from_crs, to_crs)

    xrv = xarr.ravel()
    yrv = yarr.ravel()
    points = np.stack([xrv, yrv, np.zeros_like(xrv)]).T
    result = np.array(ct.TransformPoints(points))
    xp = result[:, 0].reshape(xarr.shape)
    yp = result[:, 1].reshape(yarr.shape)
    return xp, yp
Beispiel #2
0
    def set_netcdf_metadata_attributes(
            self, to_crs='EPSG:4326', do_stats=False):
        '''
        Function to set all NetCDF metadata attributes using self.METADATA_MAPPING to map from NetCDF ACDD global attribute name to metadata path (e.g. xpath)
        Parameter:
            to_crs: EPSG or WKT for spatial metadata
            do_stats: Boolean flag indicating whether minmax stats should be determined (slow)
        '''
        assert self.METADATA_MAPPING, 'No metadata mapping defined'
        assert self._netcdf_dataset, 'NetCDF output dataset not defined.'
#        assert self._metadata_dict, 'No metadata acquired'

        # Set geospatial attributes
        try:
            grid_mapping = [variable.grid_mapping for variable in self._netcdf_dataset.variables.values(
            ) if hasattr(variable, 'grid_mapping')][0]
        except:
            logger.error(
                'Unable to determine grid_mapping for spatial reference')
            raise

        crs = self._netcdf_dataset.variables[grid_mapping]

        spatial_ref = crs.spatial_ref
        geoTransform = [float(string)
                        for string in crs.GeoTransform.strip().split(' ')]
        xpixels, ypixels = (
            dimension.size for dimension in self._netcdf_dataset.dimensions.values())
        dimension_names = (
            dimension.name for dimension in self._netcdf_dataset.dimensions.values())

        # Create nested list of bounding box corner coordinates
        bbox_corners = [[geoTransform[0] + (x_pixel_offset * geoTransform[1]) + (y_pixel_offset * geoTransform[2]),
                         geoTransform[3] + (x_pixel_offset * geoTransform[4]) + (y_pixel_offset * geoTransform[5])]
                        for x_pixel_offset in [0, xpixels]
                        for y_pixel_offset in [0, ypixels]]

        if to_crs:  # Coordinate transformation required
            from_spatial_ref = SpatialReference()
            from_spatial_ref.ImportFromWkt(spatial_ref)

            to_spatial_ref = SpatialReference()
            # Check for EPSG then Well Known Text
            epsg_match = re.match('^EPSG:(\d+)$', to_crs)
            if epsg_match:
                to_spatial_ref.ImportFromEPSG(int(epsg_match.group(1)))
            else:  # Assume valid WKT definition
                to_spatial_ref.ImportFromWkt(to_crs)

            coord_trans = CoordinateTransformation(
                from_spatial_ref, to_spatial_ref)

            extents = np.array(
                [coord[0:2] for coord in coord_trans.TransformPoints(bbox_corners)])
            spatial_ref = to_spatial_ref.ExportToWkt()

            centre_pixel_coords = [coord[0:2] for coord in coord_trans.TransformPoints(
                [[geoTransform[0] + (x_pixel_offset * geoTransform[1]) + (y_pixel_offset * geoTransform[2]),
                  geoTransform[3] + (x_pixel_offset * geoTransform[4]) + (y_pixel_offset * geoTransform[5])]
                 for x_pixel_offset in [xpixels // 2, xpixels // 2 + 1]
                 for y_pixel_offset in [ypixels // 2, ypixels // 2 + 1]]
            )
            ]

            # Use Pythagoras to compute centre pixel size in new coordinates
            # (never mind the angles)
            yres = pow(pow(centre_pixel_coords[1][0] - centre_pixel_coords[0][0], 2) + pow(
                centre_pixel_coords[1][1] - centre_pixel_coords[0][1], 2), 0.5)
            xres = pow(pow(centre_pixel_coords[2][0] - centre_pixel_coords[0][0], 2) + pow(
                centre_pixel_coords[2][1] - centre_pixel_coords[0][1], 2), 0.5)

            # TODO: Make this more robust - could pull single unit from WKT
            if to_spatial_ref.IsGeographic():
                xunits, yunits = ('degrees_east', 'degrees_north')
            elif to_spatial_ref.IsProjected():
                xunits, yunits = ('m', 'm')
            else:
                xunits, yunits = ('unknown', 'unknown')

        else:  # Use native coordinates
            extents = np.array(bbox_corners)
            xres = round(geoTransform[1], Geophys2NetCDF.DECIMAL_PLACES)
            yres = round(geoTransform[5], Geophys2NetCDF.DECIMAL_PLACES)
            xunits, yunits = (self._netcdf_dataset.variables[
                              dimension_name].units for dimension_name in dimension_names)

        xmin = np.min(extents[:, 0])
        ymin = np.min(extents[:, 1])
        xmax = np.max(extents[:, 0])
        ymax = np.max(extents[:, 1])

        attribute_dict = dict(zip(['geospatial_lon_min', 'geospatial_lat_min', 'geospatial_lon_max', 'geospatial_lat_max'],
                                  [xmin, ymin, xmax, ymax]
                                  )
                              )
        attribute_dict['geospatial_lon_resolution'] = xres
        attribute_dict['geospatial_lat_resolution'] = yres
        attribute_dict['geospatial_lon_units'] = xunits
        attribute_dict['geospatial_lat_units'] = yunits

        try:
            convex_hull = [coordinate[0:2] for coordinate in coord_trans.TransformPoints(
                netcdf2convex_hull(self.netcdf_dataset, 2000000000))]  # Process dataset in pieces <= 2GB in size
        except:
            logger.info('Unable to compute convex hull. Using rectangular bounding box instead.')
            convex_hull = [coordinate[0:2] for coordinate in coord_trans.TransformPoints(bbox_corners + [bbox_corners[0]])]

        attribute_dict['geospatial_bounds'] = 'POLYGON((' + ', '.join([' '.join(
            ['%.4f' % ordinate for ordinate in coordinates]) for coordinates in convex_hull]) + '))'

        attribute_dict['geospatial_bounds_crs'] = spatial_ref

        for key, value in attribute_dict.items():
            setattr(self._netcdf_dataset, key, value)

        # Set attributes defined in self.METADATA_MAPPING
        # Scan list in reverse to give priority to earlier entries
        #TODO: Improve this coding - it's a bit crap
        keys_read = []
        for key, metadata_path in self.METADATA_MAPPING:
            # Skip any keys already read
            if key in keys_read:
                continue

            value = self.get_metadata(metadata_path)
            if value is not None:
                logger.debug('Setting %s to %s', key, value)
                # TODO: Check whether hierarchical metadata required
                setattr(self._netcdf_dataset, key, value)
                keys_read.append(key)
            else:
                logger.warning(
                    'WARNING: Metadata path %s not found', metadata_path)

        unread_keys = sorted(
            list(set([item[0] for item in self.METADATA_MAPPING]) - set(keys_read)))
        if unread_keys:
            logger.warning(
                'WARNING: No value found for metadata attribute(s) %s' % ', '.join(unread_keys))

        # Ensure only one DOI is stored - could be multiple, comma-separated
        # entries
        if hasattr(self._netcdf_dataset, 'doi'):
            url_list = [url.strip()
                        for url in self._netcdf_dataset.doi.split(',')]
            doi_list = [url for url in url_list if url.startswith(
                'http://dx.doi.org/')]
            if len(url_list) > 1:  # If more than one URL in list
                try:  # Give preference to proper DOI URL
                    url = doi_list[0]  # Use first (preferably only) DOI URL
                except:
                    url = url_list[0]  # Just use first URL if no DOI found
                url = url.replace('&amp;', '&')
                self._netcdf_dataset.doi = url

        # Set metadata_link to NCI metadata URL
        self._netcdf_dataset.metadata_link = 'https://pid.nci.org.au/dataset/%s' % self.uuid

        self._netcdf_dataset.Conventions = 'CF-1.6, ACDD-1.3'

        if do_stats:
            datastats = DataStats(netcdf_dataset=self.netcdf_dataset,
                                  netcdf_path=None, max_bytes=2000000000)  # 2GB pieces
            datastats.data_variable.actual_range = np.array(
                [datastats.value('min'), datastats.value('max')], dtype='float32')

        # Remove old fields - remove this later
        if hasattr(self._netcdf_dataset, 'id'):
            del self._netcdf_dataset.id
        if hasattr(self._netcdf_dataset, 'ga_uuid'):
            del self._netcdf_dataset.ga_uuid
        if hasattr(self._netcdf_dataset, 'keywords_vocabulary'):
            del self._netcdf_dataset.keywords_vocabulary
Beispiel #3
0
class Transects:
    """
    Wrapper for JARKUS transects
    """

    # CHRISTA: This wrapper was build by people from Deltares. I feel like this could be done in a more clear and accessible way.
    # E.g. not working coding with 'self' so it is easier for people with less coding experience to get into or at least understand what is going on under the hood.
    # I haven't got around to rebuilding it myself though...
    def __init__(self, *args, **kwargs):
        """
        instantiate the environment
        """
        if 'url' in kwargs:
            self.url = kwargs.pop('url')
        else:
            self.url = 'https://opendap.tudelft.nl/thredds/catalog/data2/deltares/rijkswaterstaat/jarkus/profiles/catalog.html?dataset=scanDatasets2/deltares/rijkswaterstaat/jarkus/profiles/transect.nc'

        try:
            #self.ds = Dataset("C://Users//cijzendoornvan//Documents//DuneForce//JARKUS//Transect_Data//transect_r20180914.nc")
            # CHRISTA: line above is a hack I used to refer to local file. By downloading the Jarkus data and referring to the file here, the scripts become independent of internet access. This could be build in more robustly...
            self.ds = Dataset(self.url)
        except OSError as e:
            err = ('%e. "%s" not found.' % (e, self.url))
            logger.error(err)
            raise err

        self.dims = self.ds.dimensions

        # initiate filter
        self.filter = dict.fromkeys(self.dims.keys())
        self.reset_filter()
        self.set_filter(**kwargs)

    def reset_filter(self, *args):
        """
        remove filter for all dimensions (default) or for the specified dimensions only
        """
        if args == ():
            args = self.dims.keys()
        for k in args:
            self.filter[k] = np.ones((self.dims[k].__len__(), )) == 1

    def set_filter(self, **kwargs):
        """
        set filter by one or more keyword arguments
        filters can be specified as boolean (shape must match the dimension's shape), as indices or as variable specification.
        """
        for k, v in kwargs.items():
            isdim = k in self.ds.dimensions.keys()
            isvar = k in self.ds.variables.keys()
            if (isinstance(v, bool) or isinstance(v, np.ndarray)
                    and v.dtype == bool) and len(v) == len(self.dims[k]):
                self.filter[k] = np.logical_and(self.filter[k], v)
            elif isinstance(
                    v, (int, np.integer)) and k in self.dims and np.all(
                        np.abs(np.asarray(v)) < self.dims[k].__len__()):
                self.filter[k] = np.ones((self.dims[k].__len__(), )) == 0
                self.filter[k][v] = True
            elif k == 'year':
                self.filter['time'] = self.year2idx(v)
            elif isvar and not isdim:
                dimname = self.ds.variables[k].dimensions[0]
                self.filter[dimname] = np.logical_and(
                    self.filter[dimname],
                    np.in1d(self.ds.variables[k][:], np.asarray(v)))

    def get_filter(self, key):
        """
        returns filter for specified key
        """
        return self.filter[key]

    def __exit__(self):
        """
        close NetCDF file
        """
        self.close()

    def close(self):
        """
        close NetCDF file
        """
        self.ds.close()

    def get_data(self, varname):
        """
        returns data for specified variable and applies available filters
        """
        return self.ds.variables[varname][[
            self.filter[k] for k in self.ds.variables[varname].dimensions
        ]]

    def areaname2areacode(self, areaname):
        """
        returns areaname for a specified areacode as input.
        \nToDo: include in another class of the same package "jarkus_transects", eventually.
        """
        # areas according to RWS definition
        areas = {
            "Schiermonnikoog": 2,
            "Ameland": 3,
            "Terschelling": 4,
            "Vlieland": 5,
            "Texel": 6,
            "Noord-Holland": 7,
            "Rijnland": 8,
            "Delfland": 9,
            "Maasvlakte": 10,
            "Voorne": 11,
            "Goeree": 12,
            "Schouwen": 13,
            "Noord-Beveland": 15,
            "Walcheren": 16,
            "Zeeuws-Vlaanderen": 17
        }
        if type(areaname) == np.str:
            return areas.get(areaname)
        if type(areaname) == list:
            return list(map(areas.get, areaname))

    def time2year(self, t):
        """
        convert time to year
        """
        time = self.ds.variables['time']
        if type(t) == np.int:
            return num2date(t, time.units).year
        else:
            return np.asarray(
                [y.year for y in np.asarray(num2date(t, time.units))])

    def year2idx(self, year):
        """
        returns boolean index array to be applied to the time dimension
        """
        #time = self.ds.variables['time']
        #years = [y.year for y in num2date(time, time.units)]
        years = self.time2year(self.ds.variables['time'][:])
        if not year:
            year = years
        idx = np.in1d(years, np.asarray(year))
        return idx

    def cross_shore2xyRD(self, cs, transect_id, axis=None):
        """
        returns RD coordinates (epsg 28992) for cross-shore coordinate(s) (wrt to RSP)
        """
        cs = np.asarray(cs)
        transect_id = np.asarray(transect_id)
        aidx = np.in1d(self.ds.variables['id'], transect_id)
        cs_f = np.array((self.ds.variables['cross_shore'][0],
                         self.ds.variables['cross_shore'][-1]))
        x_f = np.array((self.ds.variables['x'][aidx,
                                               0], self.ds.variables['x'][aidx,
                                                                          -1]))
        y_f = np.array((self.ds.variables['y'][aidx,
                                               0], self.ds.variables['y'][aidx,
                                                                          -1]))
        px = np.polyfit(cs_f, x_f, 1)
        py = np.polyfit(cs_f, y_f, 1)
        x = np.polyval(px, cs)
        y = np.polyval(py, cs)
        return x, y

    def initcc(self):
        """
        initialize coordinate conversion
        """
        if not hasattr(self, 'rd2latlon'):
            from osgeo.osr import SpatialReference, CoordinateTransformation

            # Define the Rijksdriehoek projection system (EPSG 28992)
            epsg28992 = SpatialReference()
            epsg28992.ImportFromEPSG(28992)
            # correct the towgs84
            epsg28992.SetTOWGS84(565.237, 50.0087, 465.658, -0.406857,
                                 0.350733, -1.87035, 4.0812)

            # Define the wgs84 system (EPSG 4326)
            epsg4326 = SpatialReference()
            epsg4326.ImportFromEPSG(4326)
            self.rd2latlon = CoordinateTransformation(epsg28992, epsg4326)
            #latlon2rd = CoordinateTransformation(epsg4326, epsg28992)
            # Check the transformation (in case of a missing towgs84)
            #latlonz = rd2latlon.TransformPoint(155000.0, 446000.0)
            #print latlonz # (5.387202946158022, 52.00237563479786, 43.6057764403522)

    def cross_shore2lonlat(self, cs, transect_id, axis=None):
        """
        returns WGS84 (lat,lon) coordinates (epsg 4326) for cross-shore coordinate(s) (wrt to RSP)
        """
        x, y = self.cross_shore2xyRD(cs, transect_id, axis=axis)
        self.initcc()
        xy = zip(x, y)
        lat, lon, _ = zip(*self.rd2latlon.TransformPoints(xy))
        return lon, lat

    def MKL(self, x=None, z=None, lower=-1, upper=3):
        """
        volume based instantaneous shoreline position (momentane kustlijn ligging; MKL)
        if x and z are provided, they should be 1D arrays.
        if not, x (cross-shore) and z (altitude) are obtained using the available filter settings
        """
        if (upper - lower) <= 0:
            # boundaries have to consistent (upper>lower)
            logger.warning(
                'No MKL can be derived with inconsistent boundaries (lower=%g, upper=%g)'
                % (lower, upper))
            return None

        from shapely.geometry import asShape
        import shapely.geometry

        if x is None and z is None:
            x = self.get_data('cross_shore')
            z = self.get_data('altitude')
            xMKL = np.ones(z.shape[:2]) * np.nan
            zMKL = np.ones(z.shape[:2]) * np.nan
            for it in np.arange(z.shape[0]):
                for il in np.arange(z.shape[1]):
                    mask = z[it, il, ].mask
                    result = self.MKL(x=x[~mask],
                                      z=z[it, il, ].data[~mask],
                                      lower=lower,
                                      upper=upper)
                    if result:
                        xMKL[it, il] = result['mkl'][0]
                        zMKL[it, il] = result['mkl'][1]
            return xMKL, zMKL
#        try:
#            shapelock.acquire()
        if hasattr(z, 'mask'):
            logger.debug('only non-masked values are retained')
            x = x[z.mask]
            z = z.data[z.mask]
        if len(x) < 3:
            logger.debug(
                'x vector has only %i elements where at least 2 are required',
                len(x))
            return None
        # look up coordinates
        X = np.c_[x, z]

        # define an interpolation function
        f = interp1d(x, z, kind='linear', bounds_error=False, copy=True)

        # convert them to a shape
        # look up the bounds of the profile
        min_x = x.min()
        min_z = z.min()
        max_x = x.max()

        # we do not want any double points, cause that invalidates a polygon (SFS)
        # go down one extra, because we don't want to go backward through the same points
        coords = np.r_[X, [[max_x, min_z - 1], [min_x, min_z - 1], X[0, :]]]
        # poly_x = asShape(shapely.geometry.asPolygon(coords))
        poly_x = shapely.geometry.Polygon(coords.astype('float'))
        assert poly_x.is_valid

        # look up the lower intersections with the lower and upper boundary
        # lower
        line_lower = asShape(
            shapely.geometry.asLineString([[min_x, lower], [max_x, lower]]))
        assert line_lower.is_valid
        intersects_lower = (line_lower.intersection(poly_x))
        assert intersects_lower.is_valid
        # upper
        line_upper = asShape(
            shapely.geometry.asLineString([[min_x, upper], [max_x, upper]]))
        assert line_upper.is_valid
        intersects_upper = (line_upper.intersection(poly_x))
        assert intersects_upper.is_valid

        if intersects_lower.is_empty or intersects_upper.is_empty:
            logger.debug(
                'one or both boundaries does not intersect with profile')
            return None

        # by using the bounds, the number of intersections doesn't matter
        swb = intersects_lower.bounds[2]
        lwb = intersects_upper.bounds[2]

        # calculate mkl using maximum method
        boundary_box = shapely.geometry.asPolygon([[lwb, upper], [lwb, lower],
                                                   [swb, lower], [swb, upper],
                                                   [lwb, upper]])
        mkl_volume = boundary_box.intersection(poly_x)
        if boundary_box.area + mkl_volume.area == 0:
            return None
        mkl_x = lwb + (swb - lwb) * (mkl_volume.area /
                                     (boundary_box.area + mkl_volume.area))
        mkl_y = f(mkl_x)

        result = {}

        result['mkl'] = asarray([mkl_x, mkl_y])
        result['lwb'] = asarray([lwb, upper])
        result['swb'] = asarray([swb, lower])
        result['mkl_volume'] = mkl_volume

        result['X'] = X
        #        finally:
        #            shapelock.release()
        return result

    def get_jrk(self):
        """
        Convert current selection of data to .jrk string
        """
        fmt = '%6i %6i'
        years = self.time2year(self.get_data('time'))
        z = self.get_data('altitude')
        o = self.get_data('origin')
        aids = self.get_data('id')
        x = self.get_data('cross_shore')
        mhw = self.get_data('mean_high_water')
        mlw = self.get_data('mean_low_water')
        max_y = self.get_data('max_altitude_measurement')
        min_y = self.get_data('min_altitude_measurement')
        time_topo = self.get_data('time_topo')
        time_bathy = self.get_data('time_bathy')
        ###s = ''
        s = []
        for ia, aid in enumerate(aids):
            for i, year in enumerate(years):
                zc = np.ma.masked_invalid(np.squeeze(z[i, ia, :]))
                idx = zc.mask == False
                nx = np.count_nonzero(idx)
                if nx == 0:
                    continue
                zc = zc[idx]
                xc = x[idx]
                data = list(zip(xc, zc))
                if not nx % 5 == 0:
                    # fill incomplete rows with dummy values
                    dummyvals = [(99999, 999999)] * (5 - nx % 5)
                    data = data + dummyvals
                s.append([
                    aid, year, data, mhw, mlw, max_y, min_y, time_topo,
                    time_bathy
                ])
        return s

    def filter_jrk(self, idx, years_req=[]):
        a = []
        self.set_filter(alongshore=idx, year=years_req)
        a = self.get_jrk()

        # Convert retrieved data for easy visualisation
        x_values = []
        y_values = []
        years_included = []
        for i in range(len(a)):
            years_included.append(a[i][1])

            x = np.array([i[0] for i in a[i][2]])
            x[x ==
              99999] = np.nan  # Convert 99999 to nan values, so they are not included in visualisation
            x_values.append(x)

            y = np.array([i[1] for i in a[i][2]])
            y[y ==
              99999] = np.nan  # Convert 99999 to nan values, so they are not included in visualisation
            y_values.append(y)

        # Find difference between years included and those requested
        years_missing = list(set(years_req) - set(years_included))
        # If all years were available do nothing,
        if len(years_missing) == 0:
            print("All requested years were available")
            # If years were missing, show error
        else:
            print(
                "ERROR - For transect {} the following year(s) were not available:"
                .format(a[0][0]))
            print(years_missing)
        return a, x_values, y_values, years_included

# CHRISTA: I build this function to directly convert the data to pandas dataframes. Just because I like working with them better.

    def get_dataframe(self, transect, years_requested):
        import pandas as pd
        ids = self.get_data('id')
        idxs = np.isin(ids, transect)
        df_jrk_all = pd.DataFrame()
        for idx in np.nonzero(idxs)[0]:
            a, x_values, y_values, years_included = self.filter_jrk(
                idx, years_requested)
            df_jrk_yrs = pd.DataFrame()

            for indx, yr in enumerate(years_included):
                df_jrk = pd.DataFrame()
                x = x_values[indx]
                y = y_values[indx]
                mhw = float(a[indx][3][0])
                mlw = float(a[indx][4][0])
                max_y = float(a[indx][5][years_requested.index(yr)])
                min_y = float(a[indx][6][years_requested.index(yr)])
                time_topo = float(a[indx][7][years_requested.index(yr)])
                time_bathy = float(a[indx][8][years_requested.index(yr)])

                trsct = str(a[indx][0])
                yr = str(a[indx][1])

                df_jrk = pd.DataFrame({
                    'transect': trsct,
                    'year': yr,
                    'x': x,
                    'y': y,
                    'mhw': mhw,
                    'mlw': mlw,
                    'max_elevation': max_y,
                    'min_elevation': min_y,
                    'time_topo': time_topo,
                    'time_bathy': time_bathy
                })

                df_jrk_yrs = df_jrk_yrs.append(df_jrk)
            df_jrk_all = df_jrk_all.append(df_jrk_yrs)
        df_jrk_all.set_index(['transect', 'year'], inplace=True)
        years_included = [str(y) for y in years_included]

        return df_jrk_all, years_included