def test_read_write(self): """ Test a simple write and read operation. """ data = np.ones((100, 100, 100), dtype=np.float32) dims = ['time', 'y', 'x'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} attr1 = {'unit': 'dB'} attr2 = {'unit': 'degree', 'fill_value': -9999} self.ds = xr.Dataset( { 'sig': (dims, data, attr1), 'inc': (dims, data, attr2), 'azi': (dims, data, attr2) }, coords=coords) with NcFile(self.filename, mode='w') as nc: nc.write(self.ds) with NcFile(self.filename) as nc: ds = nc.read() np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:]) np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:]) np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])
def test_append(self): """ Test appending to existing NetCDF file. """ data = np.ones((100, 100, 100), dtype=np.float32) dims = ['time', 'y', 'x'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} self.ds = xr.Dataset({ 'sig': (dims, data), 'inc': (dims, data) }, coords=coords) with NcFile(self.filename, mode='w') as nc: nc.write(self.ds) with NcFile(self.filename, mode='a') as nc: nc.write(self.ds) with NcFile(self.filename, mode='a') as nc: nc.write(self.ds) with NcFile(self.filename) as nc: ds = nc.read() np.testing.assert_array_equal( ds['sig'][:], np.repeat(self.ds['sig'][:], 3, axis=0))
def test_geotransform(self): """ Test computation of x and y coordinates. """ xdim = 100 ydim = 200 data = np.ones((100, ydim, xdim), dtype=np.float32) dims = ['time', 'y', 'x'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} self.ds = xr.Dataset({ 'sig': (dims, data), 'inc': (dims, data) }, coords=coords) geotrans = (3000000.0, 500.0, 0.0, 1800000.0, 0.0, -500.0) with NcFile(self.filename, mode='w', geotrans=geotrans) as nc: nc.write(self.ds) with NcFile(self.filename) as nc: ds = nc.read() x = geotrans[0] + (0.5 + np.arange(xdim)) * geotrans[1] + \ (0.5 + np.arange(xdim)) * geotrans[2] y = geotrans[3] + (0.5 + np.arange(ydim)) * geotrans[4] + \ (0.5 + np.arange(ydim)) * geotrans[5] np.testing.assert_array_equal(ds['x'].values, x) np.testing.assert_array_equal(ds['y'].values, y)
def test_chunk_cache(self): """ Test setting chunk cache. """ data = np.ones((100, 100, 100), dtype=np.float32) dims = ['time', 'y', 'x'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} self.ds = xr.Dataset({ 'sig': (dims, data), 'inc': (dims, data) }, coords=coords) size = 1024 * 64 nelems = 500 preemption = 0.75 var_chunk_cache = (size, nelems, preemption) with NcFile(self.filename, mode='w', var_chunk_cache=var_chunk_cache) as nc: nc.write(self.ds) self.assertEqual(var_chunk_cache, nc.src['sig'].get_var_chunk_cache()) with NcFile(self.filename, mode='r_netcdf', var_chunk_cache=var_chunk_cache) as nc: self.assertEqual(var_chunk_cache, nc.src['sig'].get_var_chunk_cache())
def test_auto_decoding(self): """ Test automatic decoding of data variables. """ data = np.ones((100, 100, 100), dtype=np.float32) dims = ['time', 'x', 'y'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} attr1 = { 'unit': 'dB', 'scale_factor': 2, 'add_offset': 3, 'fill_value': -9999 } attr2 = { 'unit': 'degree', 'fill_value': -9999, 'scale_factor': 2, 'add_offset': 0 } attr3 = {'unit': 'degree', 'fill_value': -9999} self.ds = xr.Dataset( { 'sig': (dims, data, attr1), 'inc': (dims, data, attr2), 'azi': (dims, data, attr3) }, coords=coords) with NcFile(self.filename, mode='w') as nc: nc.write(self.ds) with NcFile(self.filename, mode='r_xarray', auto_decode=False) as nc: ds = nc.read() np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:]) np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:]) np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:]) with NcFile(self.filename, mode='r_xarray', auto_decode=True) as nc: ds = nc.read() np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:] * 2 + 3) np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:] * 2) np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:]) with NcFile(self.filename, mode='r_netcdf', auto_decode=False) as nc: ds = nc.read() np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:]) np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:]) np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:]) with NcFile(self.filename, mode='r_netcdf', auto_decode=True) as nc: ds = nc.read() np.testing.assert_array_equal(ds['sig'][:], self.ds['sig'][:] * 2 + 3) np.testing.assert_array_equal(ds['inc'][:], self.ds['inc'][:] * 2) np.testing.assert_array_equal(ds['azi'][:], self.ds['azi'][:])
def write(self, ds): """ Write data set into raster time stack. Parameters ---------- ds : xarray.Dataset Input data set. """ if self.stack_size == 'single': fn = '{:}{:}'.format(self.fn_prefix, self.fn_suffix) full_filename = os.path.join(self.out_path, fn) if os.path.exists(full_filename): mode = 'a' else: mode = 'w' with NcFile(full_filename, mode=mode, complevel=self.compression, geotransform=self.geotransform, spatialref=self.spatialref, chunksizes=self.chunksizes) as nc: nc.write(ds) filenames = [full_filename] else: dup_stack_filenames = ds['time'].to_index().strftime( self.stack_size) stack_filenames, index = np.unique(dup_stack_filenames, return_index=True) index = np.hstack((index, len(dup_stack_filenames))) filenames = [] for i, filename in enumerate(stack_filenames): time_sel = np.arange(index[i], index[i + 1]) fn = '{:}{:}{:}'.format(self.fn_prefix, filename, self.fn_suffix) full_filename = os.path.join(self.out_path, fn) filenames.append(full_filename) if os.path.exists(full_filename): mode = 'a' else: mode = 'w' with NcFile(full_filename, mode=mode, complevel=self.compression, geotransform=self.geotransform, spatialref=self.spatialref, chunksizes=self.chunksizes) as nc: nc.write(ds.isel(time=time_sel)) return pd.DataFrame({'filenames': filenames})
def write(self, ds, filepath, band=None, encoder=None, nodataval=None, encoder_kwargs=None, auto_scale=False): """ Write data set into raster time stack. Parameters ---------- ds : xarray.Dataset Input data set. """ if os.path.exists(filepath): mode = 'a' else: mode = 'w' with NcFile(filepath, mode=mode, complevel=self.compression, geotrans=self.geotrans, sref=self.sref, chunksizes=self.chunksizes) as nc: nc.write(ds, band=band, nodataval=nodataval, encoder=encoder, encoder_kwargs=encoder_kwargs)
def _build_stack(self): """ Building file stack and initialize netCDF4.mfdataset. """ if self.inventory is not None: if self._dims == 2: self.mfdataset = xr.open_mfdataset( self.inventory.dropna()['filepath'].tolist(), chunks=self.chunks, combine="nested", concat_dim=self.inventory.index.name, mask_and_scale=self.auto_decode, use_cftime=False) self.mfdataset = self.mfdataset.assign_coords( {self.inventory.index.name: self.inventory.index}) gm_name = NcFile.get_gm_name(self.mfdataset) if gm_name is not None: self.mfdataset[gm_name] = self.mfdataset[gm_name].sel( **{self.inventory.index.name: 0}, drop=True) else: self.mfdataset = xr.open_mfdataset( self.inventory.dropna()['filepath'].tolist(), chunks=self.chunks, combine='by_coords', mask_and_scale=self.auto_decode, use_cftime=False) else: raise RuntimeError('Building stack failed')
def write_netcdfs(self, ds, dir_path, stack_size="%Y%m", fn_prefix='', fn_suffix='.nc'): #inclusive left, exclusive right #stacks are smaller than 1D if any(x in stack_size for x in ['H', 'min', 'T']): dup_stack_filenames = ds['time'].to_index().floor(stack_size) else: dup_stack_filenames = ds['time'].to_index().strftime(stack_size) stack_filenames, index = np.unique(dup_stack_filenames, return_index=True) index = np.hstack((index, len(dup_stack_filenames))) filepaths = [] timestamps = [] for i, stack_filename in enumerate(stack_filenames): time_sel = np.arange(index[i], index[i + 1]) if any(x in stack_size for x in ['H', 'min', 'T']): timestamp = ds['time'][[ index[i] ]].to_index().floor(stack_size)[0].to_datetime64() stack_filename = pd.to_datetime( str(stack_filename)).strftime('%Y%m%d_%H%M%S') else: timestamp = datetime.strptime( ds['time'][[index[i]]].to_index().strftime(stack_size)[0], stack_size) timestamps.append(timestamp) filename = '{:}{:}{:}'.format(fn_prefix, stack_filename, fn_suffix) filepath = os.path.join(dir_path, filename) filepaths.append(filepath) if os.path.exists(filepath): mode = 'a' else: mode = 'w' with NcFile(filepath, mode=mode, complevel=self.compression, geotrans=self.geotrans, sref=self.sref, chunksizes=self.chunksizes) as nc: nc.write(ds.isel(time=time_sel)) return pd.DataFrame({'filepath': filepaths}, index=timestamps)
def test_time_units(self): """ Test time series and time units. """ data = np.ones((100, 100, 100), dtype=np.float32) dims = ['time', 'y', 'x'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} self.ds = xr.Dataset({ 'sig': (dims, data), 'inc': (dims, data) }, coords=coords) time_units = 'days since 2000-01-01 00:00:00' with NcFile(self.filename, mode='w', time_units=time_units) as nc: nc.write(self.ds) with NcFile(self.filename, time_units=time_units) as nc: ds = nc.read() np.testing.assert_array_equal(pd.DatetimeIndex(ds['time'].data), coords['time'])
def test_chunksizes(self): """ Test setting chunksize. """ data = np.ones((100, 100, 100), dtype=np.float32) dims = ['time', 'y', 'x'] coords = {'time': pd.date_range('2000-01-01', periods=data.shape[0])} self.ds = xr.Dataset({ 'sig': (dims, data), 'inc': (dims, data) }, coords=coords) chunksizes = (100, 10, 10) with NcFile(self.filename, mode='w', chunksizes=chunksizes) as nc: nc.write(self.ds) with NcFile(self.filename, mode='r_netcdf') as nc: ds = nc.read() self.assertEqual(ds['sig'].data.chunksize, chunksizes) self.assertEqual(ds['inc'].data.chunksize, chunksizes)
def _open(self): if self.inventory is not None: ref_filepath = self.inventory[ self.inventory.notnull()]['filepath'][0] with NcFile(ref_filepath, mode='r') as netcdf: self.sref = netcdf.sref self.geotrans = netcdf.geotrans self.metadata = netcdf.metadata self.shape = (len(self.inventory), netcdf.shape[-2], netcdf.shape[-1]) self._dims = len(netcdf.shape) self._build_stack()
def setup_nc_single_test_data(): """ Creates test data as a multi-time and multi-variable NetCDF file. Returns ------- str NetCDF test data filepath. list of datetime List of timestamps as datetime objects. """ root_dirpath = os.path.join(dirpath_test(), 'data', 'Sentinel-1_CSAR') # create target folders dirpath = os.path.join(root_dirpath, 'IWGRDH', 'products', 'datasets', 'resampled', 'T0101', 'EQUI7_EU500M', 'E048N012T6', 'data') timestamps = [datetime(2016, 1, 1), datetime(2016, 2, 1), datetime(2017, 1, 1), datetime(2017, 2, 1)] var_names = ["SIG0", "GMR-"] directions = ["A", "D"] combs = itertools.product(var_names, directions, timestamps) rows, cols = np.meshgrid(np.arange(0, 1200), np.arange(0, 1200)) data = rows + cols equi7 = Equi7Grid(500) tile_oi = equi7.EU.tilesys.create_tile(name="E042N012T6") xr_dss = [] filepath = os.path.join(dirpath, "D20160101_20170201_PREPRO---_S1AIWGRDH1VV-_146_T0101_EU500M_E048N012T6.nc") if not os.path.exists(dirpath): os.makedirs(dirpath) if not os.path.exists(filepath): for comb in combs: var_name = comb[0] direction = comb[1] timestamp = comb[2] tags = {'direction': direction} data_i = data + timestamps.index(timestamp) xr_ar = xr.DataArray(data=data_i[None, :, :], coords={'time': [timestamp]}, dims=['time', 'y', 'x'], attrs=tags) xr_dss.append(xr.Dataset(data_vars={var_name.strip('-'): xr_ar})) nc_file = NcFile(filepath, mode='w', geotransform=tile_oi.geotransform(), spatialref=tile_oi.get_geotags()['spatialreference']) xr_ds = xr.merge(xr_dss) nc_file.write(xr_ds) nc_file.close() timestamps = [pd.Timestamp(timestamp.strftime("%Y%m%d")) for timestamp in timestamps] return filepath, timestamps
def setup_nc_multi_test_data(): """ Creates test data as single-time and single-variable NetCDF files. Returns ------- list of str List of NetCDF test data filepaths. list of datetime List of timestamps as datetime objects. """ root_dirpath = os.path.join(dirpath_test(), 'data', 'Sentinel-1_CSAR') # create target folders dirpath = os.path.join(root_dirpath, 'IWGRDH', 'parameters', 'datasets', 'resampled', 'T0101', 'EQUI7_EU500M', 'E042N012T6', 'sig0') timestamps = [datetime(2016, 1, 1), datetime(2016, 2, 1), datetime(2017, 1, 1), datetime(2017, 2, 1)] pols = ["VV", "VH"] directions = ["A", "D"] filename_fmt = "D{}_000000--_SIG0-----_S1AIWGRDH1{}{}_146_T0101_EU500M_E042N012T6.nc" combs = itertools.product(pols, directions, timestamps) rows, cols = np.meshgrid(np.arange(0, 1200), np.arange(0, 1200)) data = (rows + cols).astype(float) equi7 = Equi7Grid(500) tile_oi = equi7.EU.tilesys.create_tile(name="E042N012T6") if not os.path.exists(dirpath): os.makedirs(dirpath) filepaths = [] for comb in combs: pol = comb[0] direction = comb[1] timestamp = comb[2] filename = filename_fmt.format(timestamp.strftime("%Y%m%d"), pol, direction) filepath = os.path.join(dirpath, filename) filepaths.append(filepath) if not os.path.exists(filepath): tags = {'direction': direction} nc_file = NcFile(filepath, mode='w', geotransform=tile_oi.geotransform(), spatialref=tile_oi.get_geotags()['spatialreference']) data_i = data + timestamps.index(timestamp) xr_ar = xr.DataArray(data=data_i[None, :, :], coords={'time': [timestamp]}, dims=['time', 'y', 'x']) xr_ds = xr.Dataset(data_vars={'1': xr_ar}, attrs=tags) nc_file.write(xr_ds) nc_file.close() timestamps = [pd.Timestamp(timestamp.strftime("%Y%m%d")) for timestamp in timestamps] return filepaths, timestamps
def read(self, row=None, col=None, n_rows=1, n_cols=1, band="1", nodataval=-9999, decoder=None, decoder_kwargs=None): """ Read data from netCDF4 file. Parameters ---------- row : int, optional Row number/index. If None and `col` is not None, then `row_size` rows with the respective column number will be loaded. col : int, optional Column number/index. If None and `row` is not None, then `col_size` columns with the respective row number will be loaded. n_rows : int, optional Number of rows to read (default is 1). n_cols : int, optional Number of columns to read (default is 1). band : str or list of str, optional Band numbers/names. If None, all bands will be read. nodataval : tuple or list, optional List of no data values for each band. Default: -9999 for each band. decoder : function, optional Decoding function expecting a NumPy array as input. decoder_kwargs : dict, optional Keyword arguments for the decoder. Returns ------- data : xarray.Dataset Data set with the dimensions [time, y, x] and one data variable. """ decoder_kwargs = {} if decoder_kwargs is None else decoder_kwargs if row is None and col is None: # read whole dataset row = 0 col = 0 n_rows = self.shape[-2] n_cols = self.shape[-1] elif row is None and col is not None: # read by row row = 0 n_cols = self.shape[-1] elif row is not None and col is None: # read by column col = 0 n_rows = self.shape[-2] if len(self.shape) == 3: slices = (slice(None), slice(row, row + n_rows), slice(col, col + n_cols)) else: slices = (slice(row, row + n_rows), slice(col, col + n_cols)) data_ar = self.mfdataset[band][slices] if decoder: data_ar.data = decoder(data_ar.data, nodataval, **decoder_kwargs) data = data_ar.to_dataset() if 'time' in list( data.dims.keys()) and data.variables['time'].dtype == 'float': timestamps = netCDF4.num2date(data['time'], self.time_units, only_use_cftime_datetimes=False) data = data.assign_coords({'time': timestamps}) # add projection informations again gm_name = NcFile.get_gm_name(self.mfdataset) if gm_name is not None: data[gm_name] = self.mfdataset[gm_name] #add attributes data.attrs = self.mfdataset.attrs return self._fill_nan(data)