def __init__(self, file, grid=None, time=None, monthbegin=False): """Open the file and try to understand it. Parameters ---------- file: path to the netcdf file grid: a Grid object. This will override the normal behavior of GeoNetcdf, which is to try to understand the grid automatically. time: a time array. This will override the normal behavior of GeoNetcdf, which is to try to understand the time automatically. monthbegin: set to true if you are sure that your data is monthly and that the data provider decided to tag the date as the center of the month (stupid) """ self._nc = netCDF4.Dataset(file) self.variables = self._nc.variables if grid is None: grid = sio.grid_from_dataset(self._nc) if time is None: time = sio.netcdf_time(self._nc, monthbegin=monthbegin) dn = self._nc.dimensions.keys() self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0] self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0] dim = utils.str_in_list(dn, utils.valid_names['t_dim']) self.t_dim = dim[0] if dim else None dim = utils.str_in_list(dn, utils.valid_names['z_dim']) self.z_dim = dim[0] if dim else None GeoDataset.__init__(self, grid, time=time)
def _salem_grid_from_dataset(ds): """Seek for coordinates that Salem might have created. Current convention: x_coord, y_coord, pyproj_srs as attribute """ # Projection try: proj = ds.pyproj_srs except AttributeError: proj = None proj = gis.check_crs(proj) if proj is None: return None # Do we have some standard names as variable? vns = ds.variables.keys() xc = utils.str_in_list(vns, utils.valid_names['x_dim']) yc = utils.str_in_list(vns, utils.valid_names['y_dim']) # Sometimes there are more than one coordinates, one of which might have # more dims (e.g. lons in WRF files): take the first one with ndim = 1: x = None for xp in xc: if len(ds.variables[xp].shape) == 1: x = xp y = None for yp in yc: if len(ds.variables[yp].shape) == 1: y = yp if (x is None) or (y is None): return None # OK, get it x = ds.variables[x][:] y = ds.variables[y][:] # Make the grid dx = x[1] - x[0] dy = y[1] - y[0] args = dict(nxny=(x.shape[0], y.shape[0]), proj=proj, dxdy=(dx, dy), x0y0=(x[0], y[0])) return gis.Grid(**args)
def _salem_grid_from_dataset(ds): """Seek for coordinates that Salem might have created. Current convention: x_coord, y_coord, pyproj_srs as attribute """ # Projection try: proj = ds.pyproj_srs except AttributeError: proj = None proj = gis.check_crs(proj) if proj is None: return None # Do we have some standard names as variable? vns = ds.variables.keys() xc = utils.str_in_list(vns, utils.valid_names['x_dim']) yc = utils.str_in_list(vns, utils.valid_names['y_dim']) # Sometimes there are more than one coordinates, one of which might have # more dims (e.g. lons in WRF files): take the first one with ndim = 1: x = None for xp in xc: if len(ds.variables[xp].shape) == 1: x = xp y = None for yp in yc: if len(ds.variables[yp].shape) == 1: y = yp if (x is None) or (y is None): return None # OK, get it x = ds.variables[x][:] y = ds.variables[y][:] # Make the grid dx = x[1]-x[0] dy = y[1]-y[0] args = dict(nxny=(x.shape[0], y.shape[0]), proj=proj, dxdy=(dx, dy)) args['corner'] = (x[0], y[0]) return gis.Grid(**args)
def _lonlat_grid_from_dataset(ds): """Seek for longitude and latitude coordinates.""" # Do we have some standard names as variable? vns = ds.variables.keys() xc = utils.str_in_list(vns, utils.valid_names['x_dim']) yc = utils.str_in_list(vns, utils.valid_names['y_dim']) # Sometimes there are more than one coordinates, one of which might have # more dims (e.g. lons in WRF files): take the first one with ndim = 1: x = None for xp in xc: if len(ds.variables[xp].shape) == 1: x = xp y = None for yp in yc: if len(ds.variables[yp].shape) == 1: y = yp if (x is None) or (y is None): return None # OK, get it lon = ds.variables[x][:] lat = ds.variables[y][:] # double check for dubious variables if not utils.str_in_list([x], utils.valid_names['lon_var']) or \ not utils.str_in_list([y], utils.valid_names['lat_var']): # name not usual. see if at least the range follows some conv if (np.max(np.abs(lon)) > 360.1) or (np.max(np.abs(lat)) > 90.1): return None # Make the grid dx = lon[1] - lon[0] dy = lat[1] - lat[0] args = dict(nxny=(lon.shape[0], lat.shape[0]), proj=wgs84, dxdy=(dx, dy), x0y0=(lon[0], lat[0])) return gis.Grid(**args)
def __init__(self, xarray_obj): self._obj = xarray_obj if isinstance(xarray_obj, xr.DataArray): xarray_obj = xarray_obj.to_dataset(name='var') try: # maybe there was already some georef xarray_obj.attrs['pyproj_srs'] = xarray_obj['var'].pyproj_srs except: pass self.grid = grid_from_dataset(xarray_obj) if self.grid is None: raise RuntimeError('dataset Grid not understood.') dn = xarray_obj.dims.keys() self.x_dim = utils.str_in_list(dn, utils.valid_names['x_dim'])[0] self.y_dim = utils.str_in_list(dn, utils.valid_names['y_dim'])[0] dim = utils.str_in_list(dn, utils.valid_names['t_dim']) self.t_dim = dim[0] if dim else None dim = utils.str_in_list(dn, utils.valid_names['z_dim']) self.z_dim = dim[0] if dim else None
def _lonlat_grid_from_dataset(ds): """Seek for longitude and latitude coordinates.""" # Do we have some standard names as variable? vns = ds.variables.keys() xc = utils.str_in_list(vns, utils.valid_names['x_dim']) yc = utils.str_in_list(vns, utils.valid_names['y_dim']) # Sometimes there are more than one coordinates, one of which might have # more dims (e.g. lons in WRF files): take the first one with ndim = 1: x = None for xp in xc: if len(ds.variables[xp].shape) == 1: x = xp y = None for yp in yc: if len(ds.variables[yp].shape) == 1: y = yp if (x is None) or (y is None): return None # OK, get it lon = ds.variables[x][:] lat = ds.variables[y][:] # double check for dubious variables if not utils.str_in_list([x], utils.valid_names['lon_var']) or \ not utils.str_in_list([y], utils.valid_names['lat_var']): # name not usual. see if at least the range follows some conv if (np.max(np.abs(lon)) > 360.1) or (np.max(np.abs(lat)) > 90.1): return None # Make the grid dx = lon[1]-lon[0] dy = lat[1]-lat[0] args = dict(nxny=(lon.shape[0], lat.shape[0]), proj=wgs84, dxdy=(dx, dy)) args['corner'] = (lon[0], lat[0]) return gis.Grid(**args)
def netcdf_time(ncobj, monthbegin=False): """Check if the netcdf file contains a time that Salem understands.""" import pandas as pd time = None try: vt = utils.str_in_list(ncobj.variables.keys(), utils.valid_names['time_var'])[0] except IndexError: # no time variable return None if hasattr(ncobj, 'TITLE') and 'GEOGRID' in ncobj.TITLE: # geogrid file pass elif ncobj[vt].dtype in ['|S1', '|S19']: # WRF file time = [] try: stimes = ncobj.variables['Times'][:].values except AttributeError: stimes = ncobj.variables['Times'][:] for t in stimes: time.append( pd.to_datetime(t.tobytes().decode(), errors='raise', format='%Y-%m-%d_%H:%M:%S')) elif vt is not None: # CF time var = ncobj.variables[vt] try: # We want python times because pandas doesn't understand # CFtime time = cftime.num2date(var[:], var.units, only_use_cftime_datetimes=False, only_use_python_datetimes=True) except TypeError: # Old versions of cftime did return python times when possible time = cftime.num2date(var[:], var.units) if monthbegin: # sometimes monthly data is centered in the month (stupid) time = [datetime(t.year, t.month, 1) for t in time] return time
def netcdf_time(ncobj, monthbegin=False): """Check if the netcdf file contains a time that Salem understands.""" import pandas as pd time = None try: vt = utils.str_in_list(ncobj.variables.keys(), utils.valid_names['time_var'])[0] except IndexError: # no time variable return None if hasattr(ncobj, 'TITLE') and 'GEOGRID' in ncobj.TITLE: # geogrid file pass elif ncobj[vt].dtype in ['|S1', '|S19']: # WRF file time = [] try: stimes = ncobj.variables['Times'][:].values except AttributeError: stimes = ncobj.variables['Times'][:] for t in stimes: time.append(pd.to_datetime(t.tostring().decode(), errors='raise', format='%Y-%m-%d_%H:%M:%S')) elif vt is not None: # CF time var = ncobj.variables[vt] time = netCDF4.num2date(var[:], var.units) if monthbegin: # sometimes monthly data is centered in the month (stupid) time = [datetime(t.year, t.month, 1) for t in time] return time
def _apply_transform(self, transform, grid, other, return_lut=False): """Common transform mixin""" was_dataarray = False if not isinstance(other, xr.Dataset): try: other = other.to_dataset(name=other.name) was_dataarray = True except AttributeError: # must be a ndarray if return_lut: rdata, lut = transform(other, grid=grid, return_lut=True) else: rdata = transform(other, grid=grid) # let's guess sh = rdata.shape nd = len(sh) if nd == 2: dims = (self.y_dim, self.x_dim) elif nd == 3: newdim = 'new_dim' if self.t_dim and sh[0] == self._obj.dims[self.t_dim]: newdim = self.t_dim if self.z_dim and sh[0] == self._obj.dims[self.z_dim]: newdim = self.z_dim dims = (newdim, self.y_dim, self.x_dim) else: raise NotImplementedError('more than 3 dims not ok yet.') coords = {} for d in dims: if d in self._obj: coords[d] = self._obj[d] out = xr.DataArray(rdata, coords=coords, dims=dims) out.attrs['pyproj_srs'] = self.grid.proj.srs if return_lut: return out, lut else: return out # go out = xr.Dataset() for v in other.data_vars: var = other[v] if return_lut: rdata, lut = transform(var, return_lut=True) else: rdata = transform(var) # remove old coords dims = [d for d in var.dims] coords = {} for c in var.coords: n = utils.str_in_list([c], utils.valid_names['x_dim']) if n: dims = [self.x_dim if x in n else x for x in dims] continue n = utils.str_in_list([c], utils.valid_names['y_dim']) if n: dims = [self.y_dim if x in n else x for x in dims] continue coords[c] = var.coords[c] # add new ones coords[self.x_dim] = self._obj[self.x_dim] coords[self.y_dim] = self._obj[self.y_dim] rdata = xr.DataArray(rdata, coords=coords, attrs=var.attrs, dims=dims) rdata.attrs['pyproj_srs'] = self.grid.proj.srs out[v] = rdata if was_dataarray: out = out[v] else: out.attrs['pyproj_srs'] = self.grid.proj.srs if return_lut: return out, lut else: return out
def transform(self, other, grid=None, interp='nearest', ks=3): """Reprojects an other Dataset or DataArray onto this grid. Parameters ---------- other: Dataset, DataArray or ndarray the data to project onto self grid: salem.Grid in case the input dataset does not carry georef info interp : str 'nearest' (default), 'linear', or 'spline' ks : int Degree of the bivariate spline. Default is 3. Returns ------- a dataset or a dataarray """ was_dataarray = False if not isinstance(other, xr.Dataset): try: other = other.to_dataset(name=other.name) was_dataarray = True except AttributeError: # must be a ndarray rdata = self.grid.map_gridded_data(other, grid=grid, interp=interp, ks=ks) # let's guess sh = rdata.shape nd = len(sh) if nd == 2: dims = (self.y_dim, self.x_dim) elif nd == 3: newdim = 'new_dim' if self.t_dim and sh[0] == self._obj.dims[self.t_dim]: newdim = self.t_dim if self.z_dim and sh[0] == self._obj.dims[self.z_dim]: newdim = self.z_dim dims = (newdim, self.y_dim, self.x_dim) else: raise NotImplementedError('more than 3 dims not ok yet.') coords = {} for d in dims: if d in self._obj: coords[d] = self._obj[d] out = xr.DataArray(rdata, coords=coords, dims=dims) out.attrs['pyproj_srs'] = self.grid.proj.srs return out # go out = xr.Dataset() for v in other.data_vars: var = other[v] rdata = self.grid.map_gridded_data(var, interp=interp, ks=ks) # remove old coords dims = [d for d in var.dims] coords = {} for c in var.coords: n = utils.str_in_list([c], utils.valid_names['x_dim']) if n: dims = [self.x_dim if x in n else x for x in dims] continue n = utils.str_in_list([c], utils.valid_names['y_dim']) if n: dims = [self.y_dim if x in n else x for x in dims] continue coords[c] = var.coords[c] # add new ones coords[self.x_dim] = self._obj[self.x_dim] coords[self.y_dim] = self._obj[self.y_dim] rdata = xr.DataArray(rdata, coords=coords, attrs=var.attrs, dims=dims) rdata.attrs['pyproj_srs'] = self.grid.proj.srs out[v] = rdata if was_dataarray: out = out[v] else: out.attrs['pyproj_srs'] = self.grid.proj.srs return out