Esempio n. 1
0
    def __init__(self, file, grid=None, time=None, monthbegin=False):
        """Open the file and try to understand it.

        Parameters
        ----------
        file: path to the netcdf file
        grid: a Grid object. This will override the normal behavior of
        GeoNetcdf, which is to try to understand the grid automatically.
        time: a time array. This will override the normal behavior of
        GeoNetcdf, which is to try to understand the time automatically.
        monthbegin: set to true if you are sure that your data is monthly
        and that the data provider decided to tag the date as the center of
        the month (stupid)
        """

        self._nc = netCDF4.Dataset(file)
        self.variables = self._nc.variables
        if grid is None:
            grid = sio.grid_from_dataset(self._nc)
        if time is None:
            time = sio.netcdf_time(self._nc, monthbegin=monthbegin)
        dn = self._nc.dimensions.keys()
        self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0]
        self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0]
        dim = utils.str_in_list(dn, utils.valid_names['t_dim'])
        self.t_dim = dim[0] if dim else None
        dim = utils.str_in_list(dn, utils.valid_names['z_dim'])
        self.z_dim = dim[0] if dim else None

        GeoDataset.__init__(self, grid, time=time)
Esempio n. 2
0
def _salem_grid_from_dataset(ds):
    """Seek for coordinates that Salem might have created.

    Current convention: x_coord, y_coord, pyproj_srs as attribute
    """

    # Projection
    try:
        proj = ds.pyproj_srs
    except AttributeError:
        proj = None
    proj = gis.check_crs(proj)
    if proj is None:
        return None

    # Do we have some standard names as variable?
    vns = ds.variables.keys()
    xc = utils.str_in_list(vns, utils.valid_names['x_dim'])
    yc = utils.str_in_list(vns, utils.valid_names['y_dim'])

    # Sometimes there are more than one coordinates, one of which might have
    # more dims (e.g. lons in WRF files): take the first one with ndim = 1:
    x = None
    for xp in xc:
        if len(ds.variables[xp].shape) == 1:
            x = xp
    y = None
    for yp in yc:
        if len(ds.variables[yp].shape) == 1:
            y = yp
    if (x is None) or (y is None):
        return None

    # OK, get it
    x = ds.variables[x][:]
    y = ds.variables[y][:]

    # Make the grid
    dx = x[1] - x[0]
    dy = y[1] - y[0]
    args = dict(nxny=(x.shape[0], y.shape[0]),
                proj=proj,
                dxdy=(dx, dy),
                x0y0=(x[0], y[0]))
    return gis.Grid(**args)
Esempio n. 3
0
def _salem_grid_from_dataset(ds):
    """Seek for coordinates that Salem might have created.

    Current convention: x_coord, y_coord, pyproj_srs as attribute
    """


    # Projection
    try:
        proj = ds.pyproj_srs
    except AttributeError:
        proj = None
    proj = gis.check_crs(proj)
    if proj is None:
        return None

    # Do we have some standard names as variable?
    vns = ds.variables.keys()
    xc = utils.str_in_list(vns, utils.valid_names['x_dim'])
    yc = utils.str_in_list(vns, utils.valid_names['y_dim'])

    # Sometimes there are more than one coordinates, one of which might have
    # more dims (e.g. lons in WRF files): take the first one with ndim = 1:
    x = None
    for xp in xc:
        if len(ds.variables[xp].shape) == 1:
            x = xp
    y = None
    for yp in yc:
        if len(ds.variables[yp].shape) == 1:
            y = yp
    if (x is None) or (y is None):
        return None

    # OK, get it
    x = ds.variables[x][:]
    y = ds.variables[y][:]

    # Make the grid
    dx = x[1]-x[0]
    dy = y[1]-y[0]
    args = dict(nxny=(x.shape[0], y.shape[0]), proj=proj, dxdy=(dx, dy))
    args['corner'] = (x[0], y[0])
    return gis.Grid(**args)
Esempio n. 4
0
def _lonlat_grid_from_dataset(ds):
    """Seek for longitude and latitude coordinates."""

    # Do we have some standard names as variable?
    vns = ds.variables.keys()
    xc = utils.str_in_list(vns, utils.valid_names['x_dim'])
    yc = utils.str_in_list(vns, utils.valid_names['y_dim'])

    # Sometimes there are more than one coordinates, one of which might have
    # more dims (e.g. lons in WRF files): take the first one with ndim = 1:
    x = None
    for xp in xc:
        if len(ds.variables[xp].shape) == 1:
            x = xp
    y = None
    for yp in yc:
        if len(ds.variables[yp].shape) == 1:
            y = yp
    if (x is None) or (y is None):
        return None

    # OK, get it
    lon = ds.variables[x][:]
    lat = ds.variables[y][:]

    # double check for dubious variables
    if not utils.str_in_list([x], utils.valid_names['lon_var']) or \
            not utils.str_in_list([y], utils.valid_names['lat_var']):
        # name not usual. see if at least the range follows some conv
        if (np.max(np.abs(lon)) > 360.1) or (np.max(np.abs(lat)) > 90.1):
            return None

    # Make the grid
    dx = lon[1] - lon[0]
    dy = lat[1] - lat[0]
    args = dict(nxny=(lon.shape[0], lat.shape[0]),
                proj=wgs84,
                dxdy=(dx, dy),
                x0y0=(lon[0], lat[0]))
    return gis.Grid(**args)
Esempio n. 5
0
    def __init__(self, xarray_obj):

        self._obj = xarray_obj

        if isinstance(xarray_obj, xr.DataArray):
            xarray_obj = xarray_obj.to_dataset(name='var')
            try:  # maybe there was already some georef
                xarray_obj.attrs['pyproj_srs'] = xarray_obj['var'].pyproj_srs
            except:
                pass

        self.grid = grid_from_dataset(xarray_obj)
        if self.grid is None:
            raise RuntimeError('dataset Grid not understood.')

        dn = xarray_obj.dims.keys()
        self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0]
        self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0]
        dim = utils.str_in_list(dn, utils.valid_names['t_dim'])
        self.t_dim = dim[0] if dim else None
        dim = utils.str_in_list(dn, utils.valid_names['z_dim'])
        self.z_dim = dim[0] if dim else None
Esempio n. 6
0
    def __init__(self, xarray_obj):

        self._obj = xarray_obj

        if isinstance(xarray_obj, xr.DataArray):
            xarray_obj = xarray_obj.to_dataset(name='var')
            try:  # maybe there was already some georef
                xarray_obj.attrs['pyproj_srs'] = xarray_obj['var'].pyproj_srs
            except:
                pass

        self.grid = grid_from_dataset(xarray_obj)
        if self.grid is None:
            raise RuntimeError('dataset Grid not understood.')

        dn = xarray_obj.dims.keys()
        self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0]
        self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0]
        dim = utils.str_in_list(dn, utils.valid_names['t_dim'])
        self.t_dim = dim[0] if dim else None
        dim = utils.str_in_list(dn, utils.valid_names['z_dim'])
        self.z_dim = dim[0] if dim else None
Esempio n. 7
0
def _lonlat_grid_from_dataset(ds):
    """Seek for longitude and latitude coordinates."""

    # Do we have some standard names as variable?
    vns = ds.variables.keys()
    xc = utils.str_in_list(vns, utils.valid_names['x_dim'])
    yc = utils.str_in_list(vns, utils.valid_names['y_dim'])

    # Sometimes there are more than one coordinates, one of which might have
    # more dims (e.g. lons in WRF files): take the first one with ndim = 1:
    x = None
    for xp in xc:
        if len(ds.variables[xp].shape) == 1:
            x = xp
    y = None
    for yp in yc:
        if len(ds.variables[yp].shape) == 1:
            y = yp
    if (x is None) or (y is None):
        return None

    # OK, get it
    lon = ds.variables[x][:]
    lat = ds.variables[y][:]

    # double check for dubious variables
    if not utils.str_in_list([x], utils.valid_names['lon_var']) or \
            not utils.str_in_list([y], utils.valid_names['lat_var']):
        # name not usual. see if at least the range follows some conv
        if (np.max(np.abs(lon)) > 360.1) or (np.max(np.abs(lat)) > 90.1):
            return None

    # Make the grid
    dx = lon[1]-lon[0]
    dy = lat[1]-lat[0]
    args = dict(nxny=(lon.shape[0], lat.shape[0]), proj=wgs84, dxdy=(dx, dy))
    args['corner'] = (lon[0], lat[0])
    return gis.Grid(**args)
Esempio n. 8
0
def netcdf_time(ncobj, monthbegin=False):
    """Check if the netcdf file contains a time that Salem understands."""

    import pandas as pd

    time = None
    try:
        vt = utils.str_in_list(ncobj.variables.keys(),
                               utils.valid_names['time_var'])[0]
    except IndexError:
        # no time variable
        return None

    if hasattr(ncobj, 'TITLE') and 'GEOGRID' in ncobj.TITLE:
        # geogrid file
        pass
    elif ncobj[vt].dtype in ['|S1', '|S19']:
        # WRF file
        time = []
        try:
            stimes = ncobj.variables['Times'][:].values
        except AttributeError:
            stimes = ncobj.variables['Times'][:]
        for t in stimes:
            time.append(
                pd.to_datetime(t.tobytes().decode(),
                               errors='raise',
                               format='%Y-%m-%d_%H:%M:%S'))
    elif vt is not None:
        # CF time
        var = ncobj.variables[vt]
        try:
            # We want python times because pandas doesn't understand
            # CFtime
            time = cftime.num2date(var[:],
                                   var.units,
                                   only_use_cftime_datetimes=False,
                                   only_use_python_datetimes=True)
        except TypeError:
            # Old versions of cftime did return python times when possible
            time = cftime.num2date(var[:], var.units)

        if monthbegin:
            # sometimes monthly data is centered in the month (stupid)
            time = [datetime(t.year, t.month, 1) for t in time]

    return time
Esempio n. 9
0
def netcdf_time(ncobj, monthbegin=False):
    """Check if the netcdf file contains a time that Salem understands."""

    import pandas as pd

    time = None
    try:
        vt = utils.str_in_list(ncobj.variables.keys(),
                               utils.valid_names['time_var'])[0]
    except IndexError:
        # no time variable
        return None

    if hasattr(ncobj, 'TITLE') and 'GEOGRID' in ncobj.TITLE:
        # geogrid file
        pass
    elif ncobj[vt].dtype in ['|S1', '|S19']:
        # WRF file
        time = []
        try:
            stimes = ncobj.variables['Times'][:].values
        except AttributeError:
            stimes = ncobj.variables['Times'][:]
        for t in stimes:
            time.append(pd.to_datetime(t.tostring().decode(),
                                       errors='raise',
                                       format='%Y-%m-%d_%H:%M:%S'))
    elif vt is not None:
        # CF time
        var = ncobj.variables[vt]
        time = netCDF4.num2date(var[:], var.units)

        if monthbegin:
            # sometimes monthly data is centered in the month (stupid)
            time = [datetime(t.year, t.month, 1) for t in time]

    return time
Esempio n. 10
0
    def _apply_transform(self, transform, grid, other, return_lut=False):
        """Common transform mixin"""

        was_dataarray = False
        if not isinstance(other, xr.Dataset):
            try:
                other = other.to_dataset(name=other.name)
                was_dataarray = True
            except AttributeError:
                # must be a ndarray
                if return_lut:
                    rdata, lut = transform(other, grid=grid, return_lut=True)
                else:
                    rdata = transform(other, grid=grid)
                # let's guess
                sh = rdata.shape
                nd = len(sh)
                if nd == 2:
                    dims = (self.y_dim, self.x_dim)
                elif nd == 3:
                    newdim = 'new_dim'
                    if self.t_dim and sh[0] == self._obj.dims[self.t_dim]:
                        newdim = self.t_dim
                    if self.z_dim and sh[0] == self._obj.dims[self.z_dim]:
                        newdim = self.z_dim
                    dims = (newdim, self.y_dim, self.x_dim)
                else:
                    raise NotImplementedError('more than 3 dims not ok yet.')

                coords = {}
                for d in dims:
                    if d in self._obj:
                        coords[d] = self._obj[d]

                out = xr.DataArray(rdata, coords=coords, dims=dims)
                out.attrs['pyproj_srs'] = self.grid.proj.srs
                if return_lut:
                    return out, lut
                else:
                    return out

        # go
        out = xr.Dataset()
        for v in other.data_vars:
            var = other[v]
            if return_lut:
                rdata, lut = transform(var, return_lut=True)
            else:
                rdata = transform(var)

            # remove old coords
            dims = [d for d in var.dims]
            coords = {}
            for c in var.coords:
                n = utils.str_in_list([c], utils.valid_names['x_dim'])
                if n:
                    dims = [self.x_dim if x in n else x for x in dims]
                    continue
                n = utils.str_in_list([c], utils.valid_names['y_dim'])
                if n:
                    dims = [self.y_dim if x in n else x for x in dims]
                    continue
                coords[c] = var.coords[c]
            # add new ones
            coords[self.x_dim] = self._obj[self.x_dim]
            coords[self.y_dim] = self._obj[self.y_dim]

            rdata = xr.DataArray(rdata,
                                 coords=coords,
                                 attrs=var.attrs,
                                 dims=dims)
            rdata.attrs['pyproj_srs'] = self.grid.proj.srs
            out[v] = rdata

        if was_dataarray:
            out = out[v]
        else:
            out.attrs['pyproj_srs'] = self.grid.proj.srs

        if return_lut:
            return out, lut
        else:
            return out
Esempio n. 11
0
    def transform(self, other, grid=None, interp='nearest', ks=3):
        """Reprojects an other Dataset or DataArray onto this grid.

        Parameters
        ----------
        other: Dataset, DataArray or ndarray
            the data to project onto self
        grid: salem.Grid
            in case the input dataset does not carry georef info
        interp : str
            'nearest' (default), 'linear', or 'spline'
        ks : int
            Degree of the bivariate spline. Default is 3.

        Returns
        -------
        a dataset or a dataarray
        """

        was_dataarray = False
        if not isinstance(other, xr.Dataset):
            try:
                other = other.to_dataset(name=other.name)
                was_dataarray = True
            except AttributeError:
                # must be a ndarray
                rdata = self.grid.map_gridded_data(other, grid=grid,
                                                   interp=interp, ks=ks)
                # let's guess
                sh = rdata.shape
                nd = len(sh)
                if nd == 2:
                    dims = (self.y_dim, self.x_dim)
                elif nd == 3:
                    newdim = 'new_dim'
                    if self.t_dim and sh[0] == self._obj.dims[self.t_dim]:
                        newdim = self.t_dim
                    if self.z_dim and sh[0] == self._obj.dims[self.z_dim]:
                        newdim = self.z_dim
                    dims = (newdim, self.y_dim, self.x_dim)
                else:
                    raise NotImplementedError('more than 3 dims not ok yet.')

                coords = {}
                for d in dims:
                    if d in self._obj:
                        coords[d] = self._obj[d]

                out = xr.DataArray(rdata, coords=coords, dims=dims)
                out.attrs['pyproj_srs'] = self.grid.proj.srs
                return out

        # go
        out = xr.Dataset()
        for v in other.data_vars:
            var = other[v]
            rdata = self.grid.map_gridded_data(var, interp=interp, ks=ks)

            # remove old coords
            dims = [d for d in var.dims]
            coords = {}
            for c in var.coords:
                n = utils.str_in_list([c], utils.valid_names['x_dim'])
                if n:
                    dims = [self.x_dim if x in n else x for x in dims]
                    continue
                n = utils.str_in_list([c], utils.valid_names['y_dim'])
                if n:
                    dims = [self.y_dim if x in n else x for x in dims]
                    continue
                coords[c] = var.coords[c]
            # add new ones
            coords[self.x_dim] = self._obj[self.x_dim]
            coords[self.y_dim] = self._obj[self.y_dim]

            rdata = xr.DataArray(rdata, coords=coords, attrs=var.attrs, dims=dims)
            rdata.attrs['pyproj_srs'] = self.grid.proj.srs
            out[v] = rdata

        if was_dataarray:
            out = out[v]
        else:
            out.attrs['pyproj_srs'] = self.grid.proj.srs
        return out