def intersect_grids(grids, out_path=None): """ Get a grid from common GPIs of a list of grids. Parameters ---------- grids_paths : list Either a list of grid object or of paths to grids files to load. out_path : str, optional (default: None) Path where the intersected grid is stored. If None is passed, the grid is not stored. Returns ------- common_grid : pgg.CellGrid A grid only with GPIs that were in all passed grids. """ if all([isinstance(g, str) for g in grids]): grids = [load_grid(path) for path in grids] grid_points = tuple([grid.get_grid_points()[0] for grid in grids]) common_gpis = functools.reduce(np.intersect1d, grid_points) common_grid = grids[0].subgrid_from_gpis(common_gpis) #type: pgg.BasicGrid if out_path is not None: pgg.netcdf.save_grid(os.path.join(out_path, 'common_grid.nc'), common_grid, subset_name='common_adjusted', subset_meaning='LMP HOM QCM common adjusted points') return common_grid
def __init__(self, ts_path, grid_path=None, **kwargs): if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) super(ERATs, self).__init__(ts_path, grid, **kwargs)
def __init__( self, data_path, parameters=[ 'SWI_001', 'SWI_005', 'SWI_010', 'SWI_015', 'SWI_020', 'SWI_040', 'SWI_060', 'SWI_100', 'SSF' ], dt='201612310000', version='3.0.1', grid_fname=None, read_bulk=True, fname_template='c_gls_SWI-TS_{dt}_C{{:04d}}_ASCAT_V{version}'): if grid_fname is None: grid_fname = os.path.join( data_path, 'c_gls_SWI-STATIC-DGG_201501010000_GLOBE_ASCAT_V3.0.1.nc') grid = netcdf.load_grid(grid_fname, location_var_name='location_id', subset_flag='land_flag') scale_factors = { 'SWI_001': 0.5, 'SWI_005': 0.5, 'SWI_010': 0.5, 'SWI_015': 0.5, 'SWI_020': 0.5, 'SWI_040': 0.5, 'SWI_060': 0.5, 'SWI_100': 0.5, 'SSF': 1 } dtypes = { 'SWI_001': np.uint8, 'SWI_005': np.uint8, 'SWI_010': np.uint8, 'SWI_015': np.uint8, 'SWI_020': np.uint8, 'SWI_040': np.uint8, 'SWI_060': np.uint8, 'SWI_100': np.uint8, 'SSF': np.uint8 } super(SWI_TS, self).__init__(data_path, grid, fn_format=fname_template.format(dt=dt, version=version), parameters=parameters, scale_factors=scale_factors, dtypes=dtypes, autoscale=False, automask=False, ioclass_kws={ 'read_bulk': read_bulk, 'loc_ids_name': 'locations' })
def __init__(self, ts_path, grid_path=None, remove_nans=False, drop_tz=True, **kwargs): """ Class for reading C3S SM time series after reshuffling. Parameters ---------- ts_path : str Directory where the netcdf time series files are stored grid_path : str, optional (default: None) Path to grid file, that is used to organize the location of time series to read. If None is passed, grid.nc is searched for in the ts_path. remove_nans : bool or dict, optional (default: False) Replace fill values in SM time series. Either - dict of form {parameter: {val_to_replace: replacement_val}, ... } - dict of form {parameter : val_to_set_NaN ...} - True to replace -9999 with nan anywhere - False to do nothing drop_tz: bool, optional (default: True) Drop time zone information from time series Optional keyword arguments that are passed to the Gridded Base: ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. offsets : dict, optional (default:None) Offsets (values) that are added to the parameters (keys) scale_factors : dict, optional (default:None) Offset (value) that the parameters (key) is multiplied with ioclass_kws: dict Optional keyword arguments to pass to OrthoMultiTs class: ---------------------------------------------------------------- read_bulk : boolean, optional (default:False) if set to True the data of all locations is read into memory, and subsequent calls to read_ts read from the cache and not from disk this makes reading complete files faster# read_dates : boolean, optional (default:False) if false dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets """ if isinstance(remove_nans, dict): for var, is_should in remove_nans.copy().items(): if not isinstance(is_should, dict): remove_nans[var] = {is_should: np.nan} self.remove_nans = remove_nans if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) self.drop_tz = drop_tz super(C3STs, self).__init__(ts_path, grid=grid, **kwargs)
def __init__(self, ts_path, grid_path=None): if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") else: grid_path = os.path.join(grid_path, "grid.nc") grid = netcdf.load_grid(grid_path) super(ERA5Ts, self).__init__(ts_path, grid)
def __init__(self, ts_path, exact_index=False, grid_path=None, **kwargs): if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = nc.load_grid(grid_path) super(LPRMTs, self).__init__(ts_path, grid, automask=True, **kwargs) self.exact_index = exact_index if exact_index and (self.parameters is not None): self.parameters.append(self._t0)
def __init__(self, ts_path, grid_path=None, index_add_time=False, drop_missing=True, **kwargs): """ Class for reading SMOS time series after reshuffling images. Missing images are represented in time series as lines where all variables are NaN. Parameters ---------- ts_path : str Directory where the netcdf time series files are stored grid_path : str, optional (default: None) Path to grid file, that is used to organize the location of time series to read. If None is passed, grid.nc is searched for in the ts_path. index_add_time : bool, optional (default: False) Add overpass time stamps to the data frame index. This needs the 'Days' and 'UTC_Seconds' variable available in the time series files. drop_missing : bool, optional (default: True) Drop Lines in TS where ALL variables are missing. Optional keyword arguments that are passed to the Gridded Base: ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. offsets : dict, optional (default:None) Offsets (values) that are added to the parameters (keys) scale_factors : dict, optional (default:None) Offset (value) that the parameters (key) is multiplied with ioclass_kws: dict Optional keyword arguments to pass to OrthoMultiTs class: ---------------------------------------------------------------- read_bulk : boolean, optional (default:False) if set to True the data of all locations is read into memory, and subsequent calls to read_ts read from the cache and not from disk this makes reading complete files faster# read_dates : boolean, optional (default:False) if false dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets autofill : bool, (default: True) Fill missing values with nans """ if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") self.drop_missing = drop_missing grid = load_grid(grid_path) super(SMOSTs, self).__init__(ts_path, grid, **kwargs) self.index_add_time = index_add_time if (self.parameters is not None) and self.index_add_time: for v in self._t0_vars.values(): self.parameters.append(v)
def calc_errors(gpis): outpath = Path('/work/GLEAM/errors') if not outpath.exists(): outpath.mkdir(parents=True) # fname = outpath / ('part_%i.csv' % gpis[0]) cci_gpis = np.flipud(np.arange(720 * 1440).reshape((720, 1440))).flatten() cci_grid = ncgrid.load_grid('/data_sets/ESA_CCI_L2/ESA-CCI-SOILMOISTURE-LAND_AND_RAINFOREST_MASK-fv04.2.nc', subset_flag='land', subset_value=1.) asc_io = CCIDs('/data_sets/ESA_CCI_L2/data/ascata', grid=cci_grid) ams_io = CCIDs('/data_sets/ESA_CCI_L2/data/amsr2', grid=cci_grid) sma_io = CCIDs('/data_sets/ESA_CCI_L2/data/smap', grid=cci_grid) for cnt, gpi in enumerate(np.atleast_1d(gpis)): try: gleam_io = Dataset('/data_sets/GLEAM/_output/timeseries/%i.nc' % gpi) gle_ts = pd.Series(gleam_io.variables['w1'][:, 0], index=num2date(gleam_io['time'][:], units=gleam_io['time'].units), name='GLEAM') asc_ts = asc_io.read(cci_gpis[gpi], only_valid=True)['sm'];asc_ts.name = 'ASCAT' ams_ts = ams_io.read(cci_gpis[gpi], only_valid=True)['sm'];ams_ts.name = 'AMSR2' sma_ts = sma_io.read(cci_gpis[gpi], only_valid=True)['sm'];sma_ts.name = 'SMAP' df = pd.concat((gle_ts, asc_ts, ams_ts, sma_ts), axis='columns').dropna() result = {'n': len(df)} for i,ds1 in enumerate(['GLEAM', 'ASCAT', 'AMSR2']): for ds2 in ['ASCAT', 'AMSR2', 'SMAP'][i::]: R, p = pearsonr(df[ds1].values, df[ds2].values) result['R_'+ds1+'_'+ds2] = R result['p_'+ds1+'_'+ds2] = p tc1 = TCA_calc(df[['GLEAM','ASCAT','AMSR2']], ref_ind=0) tc2 = TCA_calc(df[['GLEAM','ASCAT','SMAP']], ref_ind=0) for i,ds in enumerate(['GLEAM','ASCAT','AMSR2']): result['TC1_R2_'+ds] = tc1[0][i] result['TC1_RMSE_'+ds] = tc1[1][i] for i,ds in enumerate(['GLEAM','ASCAT','SMAP']): result['TC2_R2_'+ds] = tc2[0][i] result['TC2_RMSE_'+ds] = tc2[1][i] # write_output(fname, result, gpi) print('gpi %i finished (%i / %i).' % (gpi, cnt+1, len(np.atleast_1d(gpis)))) except: continue
def __init__(self, ts_path, grid=None, exact_index=False, clip_dates=None, ioclass_kws=None, **kwargs): """ Read ESA CCI SM in time series format from netcdf files Parameters ---------- ts_path : str Path to where the data is stored grid : str or pygeogrids.CellGrid, optional (default: None) Grid that the time series are searched on exact_index : bool, optional (default: False) Apply t0 to daily time stamps to read exact observations times. clip_dates : tuple[datetime, datetime], optional (default: None) Cut the time series to this date range (start, end) ioclass_kws : dict, optional (default: None) IO class kwargs used by pyntecf kwargs: Additional kwargs are given to pynetcf OrthoMultiTs. """ self.t0 = 't0' # observation time stamp variable if grid is None: grid = os.path.join(ts_path, "grid.nc") if ioclass_kws is None: ioclass_kws = {'read_bulk': True} else: if 'read_bulk' not in ioclass_kws.keys(): ioclass_kws['read_bulk'] = True if isinstance(grid, CellGrid): pass else: grid = nc.load_grid(grid) super(SmecvTs, self).__init__(ts_path, grid, automask=True, ioclass_kws=ioclass_kws, **kwargs) self.clip_dates = clip_dates self.exact_index = exact_index if (self.parameters is not None) and self.exact_index and \ (self.t0 not in self.parameters): self.parameters.append(self.t0)
def SMECV_Grid_v042(subset_flag='land'): """ Load ECV grid from netcdf file. This grid has 2D shape information, also a rainforest mask is included. The land mask is the same that is defined in gridv4. Returns ------- grid : pygeogrids.CellGrid CellGrid object """ return ncgrid.load_grid(get_grid_definition_filename(), subset_flag=subset_flag)
def __init__(self, ts_path=None, grid_path=None, exact_index=False, **kwargs): if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) super(SMOSTs, self).__init__(ts_path, grid, **kwargs) self.exact_index = exact_index if (self.parameters is not None) and self.exact_index: for v in self._t0_vars.values(): self.parameters.append(v)
def test_store_load_regular_2D_grid(): """ Test the storing/loading of a 2D grid when the gpis are in a custom ordering. """ londim = np.arange(-180.0, 180.0, 60) latdim = np.arange(90.0, -90.0, -30) lons, lats = np.meshgrid(londim, latdim) gpis = np.arange(lons.flatten().size).reshape(lons.shape) grid = grids.BasicGrid(lons.flatten(), lats.flatten(), gpis.flatten(), shape=lons.shape) testfile = tempfile.NamedTemporaryFile().name grid_nc.save_grid(testfile, grid) grid_loaded = grid_nc.load_grid(testfile) assert grid == grid_loaded
def __init__(self, ts_path=None, grid_path=None, exact_index=False, **kwargs): if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) super(SMAPTs, self).__init__(ts_path, grid, **kwargs) self.exact_index = exact_index if self.exact_index and \ (self.parameters is not None and self._t0_var not in self.parameters): self.parameters.append(self._t0_var)
def pd_from_2Dnetcdf(filename, grid='global'): # type: (str, str) -> pd.DataFrame ''' :param filename: Path to netcdf file :param grid: Set "global" (global grid points) or "land" (only land points) to return specific points :return: Dataframe with GPIs as index and data from netcdf in columns ''' # TODO: Delete this function when everything runs ncfile = Dataset(filename) lons_file = ncfile.variables['lon'][:] # lats in same order as glob grid ascending lats_file = np.flipud(ncfile.variables['lat'][:]) #global grid how it is in netcdf file lons_file, lats_file = np.meshgrid(lons_file, lats_file) var_names = [] for var_name in ncfile.variables.keys(): if ncfile.variables[var_name].dimensions == ('lat', 'lon'): var_names.append(var_name) # data in same order as glob grid ascending data = {name: np.flipud(ncfile.variables[name][:]) for name in var_names} data['lon'] = lons_file data['lat'] = lats_file data_flat = {} for name, data in data.iteritems(): data_flat[name] = data.flatten() dataframe = pd.DataFrame(data=data_flat) if grid == 'global': thegrid = globalCellgrid() elif grid == 'land': thegrid = nc.load_grid( r"D:\users\wpreimes\datasets\grids\qdeg_land_grid.nc") else: raise Exception("select 'land' or 'global' for returned GPIs") grid_points = thegrid.get_grid_points()[0] return dataframe.loc[grid_points]
def __init__(self, path, grid_path, grid_info_filename='TUW_WARP5_grid_info_2_1.nc', variables=None): grid = ncgrid.load_grid(os.path.join(grid_path, grid_info_filename), subset_flag='land') self.path = path self.grid_path = grid_path self.grid_info_filename = grid_info_filename self.variables = variables if self.variables is None: self.variables = ['vod'] super(AscatVodTs, self).__init__(path, grid)
def SMECV_Grid_v042(subset_flag='land'): """ Load ECV grid from netcdf file. This grid has 2D shape information, also a rainforest mask is included. Parameters ------- subset_flag : str or None, optional (default: 'land') Select a subset that should be loaded, e.g. 'land' or 'rainforest' Returns ------- grid : pygeogrids.CellGrid CellGrid object of the selected subset. In Quarter Degree """ return ncgrid.load_grid(get_grid_definition_filename(version='04.2'), subset_flag=subset_flag, subset_value=1.)
def _load_subset(subset_flag: {str, None}, subset_value: {int, list}) -> {np.array, None}: """ Load grid points for the subset from definition file""" if subset_flag is not None: subset_grid = ncgrid.load_grid( get_grid_definition_filename(version='05.2'), subset_flag=subset_flag, subset_value=subset_value) if isinstance(subset_grid.activegpis, np.ma.masked_array): subset = subset_grid.activegpis.data else: subset = subset_grid.activegpis else: subset = None return subset
def __init__(self, ts_path, grid_path=None, remove_nans=False, **kwargs): ''' Class for reading C3S SM time series after reshuffling. Parameters ---------- ts_path : str Directory where the netcdf time series files are stored grid_path : str, optional (default: None) Path to grid file, that is used to organize the location of time series to read. If None is passed, grid.nc is searched for in the ts_path. remove_nans : bool, optional (default: False) Replace -9999 with np.nan in time series Optional keyword arguments that are passed to the Gridded Base: ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. offsets : dict, optional (default:None) Offsets (values) that are added to the parameters (keys) scale_factors : dict, optional (default:None) Offset (value) that the parameters (key) is multiplied with ioclass_kws: dict Optional keyword arguments to pass to OrthoMultiTs class: ---------------------------------------------------------------- read_bulk : boolean, optional (default:False) if set to True the data of all locations is read into memory, and subsequent calls to read_ts read from the cache and not from disk this makes reading complete files faster# read_dates : boolean, optional (default:False) if false dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets ''' self.remove_nans = remove_nans if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) super(C3STs, self).__init__(ts_path, grid=grid, **kwargs)
def SMECV_Grid_v042(subset_flag='land'): """ Load a SMECV Grid as used in the production of ESA CCI SM v4. This grid has 2D shape information, also a rainforest mask is included. Parameters ---------- subset_flag : str or None, optional (default: 'land') Select a subset that should be loaded, e.g. 'land' or 'rainforest' Returns ------- grid : pygeogrids.CellGrid CellGrid object of the selected subset. In Quarter Degree resolution. """ warnings.warn( "SMECV Grid v4 is deperecated. Please use a newer grid version.", DeprecationWarning) lon, lat, gpis, cells, shape = meshgrid(resolution=0.25, cellsize=5., flip_lats=True) if subset_flag is not None: subset_grid = ncgrid.load_grid( get_grid_definition_filename(version='04.2'), subset_flag=subset_flag, subset_value=1.) subset = subset_grid.subset else: subset = None return CellGrid(lon, lat, gpis=gpis, subset=subset, cells=cells, shape=shape)
def __init__(self, ts_path, grid_path=None, **kwargs): ''' Class for reading SMAP time series after reshuffling. Parameters ---------- ts_path : str Directory where the netcdf time series files are stored grid_path : str, optional (default: None) Path to grid file, that is used to organize the location of time series to read. If None is passed, grid.nc is searched for in the ts_path. Optional keyword arguments that are passed to the Gridded Base: ------------------------------------------------------------------------ parameters : list, optional (default: None) Specific variable names to read, if None are selected, all are read. offsets : dict, optional (default:None) Offsets (values) that are added to the parameters (keys) scale_factors : dict, optional (default:None) Offset (value) that the parameters (key) is multiplied with ioclass_kws: dict, (optional) Optional keyword arguments to pass to OrthoMultiTs class: ---------------------------------------------------------------- read_bulk : boolean, optional (default:False) if set to True the data of all locations is read into memory, and subsequent calls to read_ts read from the cache and not from disk this makes reading complete files faster read_dates : boolean, optional (default:False) if false dates will not be read automatically but only on specific request useable for bulk reading because currently the netCDF num2date routine is very slow for big datasets. ''' if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = ncdf.load_grid(grid_path) super(SMAPTs, self).__init__(ts_path, grid, **kwargs)
def SMECV_Grid_v052(subset_flag='land', subset_value=1.): """ Load ECV grid from netcdf file. This grid has 2D shape information, also a rainforest mask is included. The land mask is the same that is defined in gridv4. This version contains land cover information as well that can be used for filtering. Parameters ------- subset_flag : str or None, optional (default: 'land') Select a subset that should be loaded, e.g. land, high_vod, rainforest, cci_lc subset_value : float or list, optional (default: 1.) Select one or more values of the variable that defines the subset, i.e 1. for masks (high_vod, land) or a float or list of floats for one or multiple ESA CCI Landcover classes (e.g 190 to load urban points only) Returns ------- grid : pygeogrids.CellGrid CellGrid object of the selected subset. In Quarter Degree """ return ncgrid.load_grid(get_grid_definition_filename(version='05.2'), subset_flag=subset_flag, subset_value=subset_value)
def __init__(self, data_path, parameters=[ 'SWI_001', 'SWI_005', 'SWI_010', 'SWI_015', 'SWI_020', 'SWI_040', 'SWI_060', 'SWI_100', 'SSF' ], dt=None, version=None, grid_fname=None, read_bulk=True, fname_template='c_gls_SWI-TS_{dt}_C{cell}_ASCAT_V{version}', cell_fn='{:04d}'): if grid_fname is None: grid_fname = os.path.join( data_path, 'c_gls_SWI-STATIC-DGG_201501010000_GLOBE_ASCAT_V3.0.1.nc') grid = netcdf.load_grid(grid_fname, location_var_name='location_id', subset_flag='land_flag') # detect datetime and version if not given if dt is None or version is None: globstring = fname_template.format(dt="*", cell="*", version="*") found_files = glob.glob(os.path.join(data_path, globstring)) if len(found_files) == 0: raise IOError("No data found in {}".format(data_path)) fn = found_files[0] fn = os.path.splitext(os.path.basename(fn))[0] parts = fn.split('_') if dt is None: # this only works if the files follow the CGLS naming convention # for everything else dt should be given as a keyword dt = parts[3] if version is None: version = parts[-1][1:] scale_factors = { 'SWI_001': 0.5, 'SWI_005': 0.5, 'SWI_010': 0.5, 'SWI_015': 0.5, 'SWI_020': 0.5, 'SWI_040': 0.5, 'SWI_060': 0.5, 'SWI_100': 0.5, 'QFLAG_001': 0.5, 'QFLAG_005': 0.5, 'QFLAG_010': 0.5, 'QFLAG_015': 0.5, 'QFLAG_020': 0.5, 'QFLAG_040': 0.5, 'QFLAG_060': 0.5, 'QFLAG_100': 0.5, 'SSF': 1 } dtypes = { 'SWI_001': np.uint8, 'SWI_005': np.uint8, 'SWI_010': np.uint8, 'SWI_015': np.uint8, 'SWI_020': np.uint8, 'SWI_040': np.uint8, 'SWI_060': np.uint8, 'SWI_100': np.uint8, 'QFLAG_001': np.uint8, 'QFLAG_005': np.uint8, 'QFLAG_010': np.uint8, 'QFLAG_015': np.uint8, 'QFLAG_020': np.uint8, 'QFLAG_040': np.uint8, 'QFLAG_060': np.uint8, 'QFLAG_100': np.uint8, 'SSF': np.uint8 } super(SWI_TS, self).__init__(data_path, grid, fn_format=fname_template.format(dt=dt, version=version, cell=cell_fn), parameters=parameters, scale_factors=scale_factors, dtypes=dtypes, autoscale=False, automask=False, ioclass_kws={ 'read_bulk': read_bulk, 'loc_ids_name': 'locations' })
def run_adjustment(): adjust_obj = Adjust(r"H:\HomogeneityTesting_data\output\CCI31EGU", 'adjusted_cci', 'merra2', 0.1) grid_path = r"D:\users\wpreimes\datasets\grids\qdeg_land_grid.nc" cell_grid = nc.load_grid(grid_path) cells = cells_for_continent('Australia') adjusted_data_path = r"D:\users\wpreimes\datasets\CCI_31_D\adjusted_temp" # TODO: Add own list for points should have been adjusted but could not unadjusted_gps = { 'gpi': [], 'slope': [], 'intercept': [], 'adjustment_class': [] } # 0= UNadjusted, 1=adjusted dataset = GriddedNcIndexedRaggedTs(path=adjusted_data_path, grid=cell_grid, mode='w') adjusted_gps = GriddedPointData(os.path.join(adjusted_data_path, 'adjusted_gps.nc'), cell_grid, mode='w') for cell_index, cell in enumerate(cells): gpis = cell_grid.grid_points_for_cell(cell)[0] adjustment_status = {'gpi': [], 'status': []} adjustment_stats = {} for index, gpi in enumerate(gpis): if index % 50 == 0: print('%i of %i' % (index, gpis.size)) try: adj_settings, adjusted_data = adjust_obj.adjust_ts(gpi) except: adjustment_status['gpi'].append(gpi) adjustment_status['status'].append(0) continue for breaktime_str, settings in adj_settings.iteritems(): if breaktime_str not in adjustment_stats.keys(): adjustment_stats[breaktime_str] = { 'gpi': [], 'intercept': [], 'slope': [] } adjustment_stats[breaktime_str]['gpi'].append(gpi) adjustment_stats[breaktime_str]['intercept'].append( adj_settings[breaktime_str]['intercept']) adjustment_stats[breaktime_str]['slope'].append( adj_settings[breaktime_str]['slope']) if adjusted_data.columns.values[0] == 'not_adjusted': adjustment_status['gpi'].append(gpi) adjustment_status['status'].append(0) if adjusted_data.columns.values[0] == 'adjusted': adjustment_status['gpi'].append(gpi) adjustment_status['status'].append(1) dataset.write(gpi, adjusted_data) dataset.close() points_to_netcdf(pd.DataFrame( index=adjustment_status['gpi'], data={'status': adjustment_status['status']}), path=adjusted_data_path, filename='adjustment_status') for breaktime_str in adjustment_stats.keys(): points_to_netcdf(pd.DataFrame( index=adjustment_stats[breaktime_str]['gpi'], data={ 'intercept': adjustment_stats[breaktime_str]['intercept'], 'slope': adjustment_stats[breaktime_str]['slope'] }), path=adjusted_data_path, filename=breaktime_str + '_adj_stats') ''' adjusted_gps = {} adjusted_ts_data = pd.DataFrame() if os.path.isfile(os.path.join(adjusted_data_path, str(cell) + '.nc')): continue # already processed print('cell %i of %i' % (cell_index, len(cells))) gpis = cell_grid.grid_points_for_cell(cell)[0] for index, gpi in enumerate(gpis): if index % 50 == 0: print('%i of %i' % (index, gpis.size)) try: adj_settings, adjusted_data = adjust_obj.adjust_ts(gpi) except: continue column_name = adjusted_data.columns.values[0] if column_name == 'not_adjusted': unadjusted_gps['gpi'].append(gpi) if column_name == 'adjusted': for breaktime_str, settings in adj_settings.iteritems(): if breaktime_str not in adjusted_gps.keys(): adjusted_gps[breaktime_str] = pd.DataFrame(index=cell_grid.grid_points_for_cell(cell)[0], data={'intercept':np.nan, 'slope':np.nan}) adjusted_gps[breaktime_str].loc[gpi,['intercept','slope']]=[settings['slope'], settings['intercept']] if adjusted_ts_data.index.size == 0: adjusted_ts_data = adjusted_data.rename(columns={column_name: gpi}) else: adjusted_ts_data[gpi] = adjusted_data dataset.write(gpi,adjusted_data) for breaktime_str, data in adjusted_gps.iteritems(): points_to_netcdf(dataframe=data, path=adjusted_data_path, filename='adjustment_stats_' + breaktime_str) ''' '''
def test_save_load_basicgrid_irregular(self): grid_nc.save_grid(self.testfile, self.basic_irregular) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic_irregular == loaded_grid
def test_save_load_cellgrid(self): grid_nc.save_grid(self.testfile, self.cellgrid) loaded_grid = grid_nc.load_grid(self.testfile) assert self.cellgrid == loaded_grid
def time_to_netcdf(dataframe, path, gpi, index_col_name=None, filename=None, file_meta_dict=None, var_meta_dicts=None, overwrite_gpi=None): grid = nc.load_grid( os.path.join(root.r, 'Datapool_processed', 'GLDAS', 'GLDAS_NOAH025_3H.020', 'ancillary', 'GLDASv2_025_land_grid.nc')) if index_col_name: dates = dataframe[index_col_name] else: dates = dataframe.index calendar = 'standard' units = 'days since 1900-01-01 00:00:00' dates_num = np.sort(date2num(dates.tolist(), units, calendar)) if not filename: cell, filename = create_cellfile_name(gpi, grid) else: cell, _ = create_cellfile_name(gpi, grid) grid_points = grid.grid_points_for_cell(cell)[0] filepath = os.path.join(path, filename + '.nc') lonlat = grid.gpi2lonlat(gpi) if os.path.isfile(filepath): ncfile = OrthoMultiTs(filepath, mode='a') else: ncfile = OrthoMultiTs(filepath, mode='w', n_loc=grid_points.size) ncfile.variables[ 'location_id'][:] = grid_points #without this error after 2nd file dates = [np.datetime64(date).astype(datetime) for date in dates] dates = np.asarray(dates) for var in dataframe.columns.values: ncfile.write_ts(loc_id=gpi, data={var: dataframe[var].values}, dates=dates, lon=lonlat[0], lat=lonlat[1], dates_direct=False) ''' if ncfile.get_time_variable_overlap(dates).size!=dataframe.index.size: ncfile.extend_time(np.ndarray.tolist(dates)) sort_order=np.argsort(ncfile.variables['time'][:]) if all(sort_order == np.array(range(sort_order.size)))==False: ncfile.variables['time'][:]=ncfile.variables['time'][:][sort_order] for var in dataframe.columns.values: ncfile.write_ts(loc_id=gpi,data={var:dataframe[var].values}, dates=dates, lon=lonlat[0],lat=lonlat[1],dates_direct=False) else: for var in dataframe.columns.values: ncfile.write_ts(loc_id=[gpi],data={var:dataframe[var].values}, dates=dates,lon=[lonlat[0]],lat=[lonlat[1]], dates_direct=False) for var in dataframe.columns.values: for idx in range(ncfile.variables[var].shape[0]): ncfile.variables[var][:][idx].mask=new_dates_mask new_dates_mask=np.in1d(ncfile.variables['time'][:],dates_num,invert=True) for var in dataframe.columns.values: ncfile.write_ts(loc_id=gpi,data={var:dataframe[var].values},dates=dates,lon=lonlat[0],lat=lonlat[1],dates_direct=False) #if type(ncfile.variables[var][:])!=np.ma.core.MaskedArray: #ncfile.variables[var][:]=np.ma.masked_array(data=ncfile.variables[var][:],mask=np.full((ncfile.variables[var][:].shape),False)) if sort_order: for i,ts in enumerate(ncfile.variables[var][:]): ncfile.variables[var][:][i]=ncfile.variables[var][:][i][sort_order] if ncfile.get_time_variable_overlap(dates).size==dataframe.index.size and \ var in ncfile.variables.keys(): if overwrite_gpi==False: continue else: #Calculate sort order, in case that the added time values are BEFORE the existing ones, sort time and time dependent values #TODO: Make this faster or change package sort_order=np.argsort(ncfile.variables['time'][:]) if not all(sort_order == np.array(range(sort_order.size))): ncfile.variables['time'][:]=ncfile.variables['time'][:][sort_order] for var in dataframe.columns.values: for i,ts in enumerate(ncfile.variables[var][:]): ncfile.variables[var][:][i]=ncfile.variables[var][:][i][sort_order] ''' ncfile.close()
def test_save_load_basicgrid(self): grid_nc.save_grid(self.testfile, self.basic) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic == loaded_grid
def test_save_load_basicgrid_shape_gpis(self): grid_nc.save_grid(self.testfile, self.basic_shape_gpis) loaded_grid = grid_nc.load_grid(self.testfile) assert self.basic_shape_gpis == loaded_grid
def points_to_netcdf(dataframe, path, index_col_name=None, filename=None, file_meta_dict=None, var_meta_dicts=None): ''' Write spatial data (data series, data frame) to file: -pandas object must contain GPIs as index or in the selected column (index_col_name) Parameters ---------- dataframe (mandatory): pandas data frame or data series pandas object with data for writing to file for time series data: date time as index for spatial data: gpi as index path (mandatory): string path where netcdf file is saved to index_col_name (optional): string name of the column with time/location data in the pandas object filename (optional): string for time series data: filename is automatically "*cell*.nc" for spatial data: select file name file_meta_dict (optional): dictionary additional meta information on the netcdf file var_meta_dict (optional): dictionary of dictionaries additional meta information on the written variables for each column in the dataframe, there is 1 dictionary in this list overwrite (optional): boolean If a (cell)file already exits at the chosen location, existing ground point data is overwritten ''' grid = nc.load_grid( os.path.join(root.r, 'Datapool_processed', 'GLDAS', 'GLDAS_NOAH025_3H.020', 'ancillary', 'GLDASv2_025_land_grid.nc')) if not filename: filename = 'global' #Create or open netcdf cell file if os.path.isfile(os.path.join(path, filename + '.nc')): ncfile = Dataset(os.path.join(path, filename + '.nc'), "a", format="NETCDF4") else: ncfile = Dataset(os.path.join(path, filename + '.nc'), "w", format="NETCDF4") try: globgrid = globalCellgrid() grid_points = grid.get_grid_points() global_grid_points = globgrid.get_grid_points() #TODO: Why -1 latitudes, longitudes = np.unique( global_grid_points[2])[::-1], np.unique(global_grid_points[1]) locations = grid_points[0] if index_col_name: locs = dataframe[index_col_name] else: locs = dataframe.index #glob_pos contains the indices of points to process in the overall grid pos = datamask(np.array(locations), np.array(locs)) n_gpis = locations.size #Create data dimensions for Time series and global image if not ncfile.dimensions: ncfile.createDimension(dimname='locations', size=n_gpis) ncfile.createDimension(dimname='lat', size=latitudes.size) ncfile.createDimension(dimname='lon', size=longitudes.size) #TODO: Add Metadata for netcdf file to dict if not ncfile.ncattrs(): meta_dict = { 'geospatial_lon_min': longitudes[0], 'geosptial_lon_max': longitudes[-1], 'geospatial_lat_min': latitudes[-1], 'geospatial_lat_max': latitudes[0], 'id': 'global', 'date_created': datetime.now().strftime('%Y-%m-%d %H:%M:%S') } if file_meta_dict: meta_dict.update(file_meta_dict) ncfile.setncatts(meta_dict) #Create variable for locations and add value #GPI, LAT, LON werden beim erstellen immer gefüllt je nach grid unabhängig vom GPI #Statt None: gpi_index: Nur für den prozessierten gpi werden idx,lat,lon ins file gespeichert meta = { 'long_name': 'Location Index', 'standard_name': 'GPI', 'valid_range': '[0 Grid Dependant' } update_loc_var(ncfile, locations, u'location_id', grid, pos) meta = { 'units': 'degrees_east', 'long_name': 'location longitude', 'standard_name': 'longitude', 'valid_range': '[-180. 180.]' } update_loc_var(ncfile, longitudes, u'lon', grid, None) ncfile.variables[u'lon'].setncatts(meta) meta = { 'units': 'degrees_north', 'long_name': 'location latitude', 'standard_name': 'latitude', 'valid_range': '[-90. 90.]' } update_loc_var(ncfile, latitudes, u'lat', grid, None) ncfile.variables[u'lat'].setncatts(meta) for i, var in enumerate(dataframe.columns.values): glob_pos = datamask(global_grid_points[0], locs.values) update_loc_var(ncfile, dataframe[var].values, var, [globgrid, grid], glob_pos) try: ncfile.variables[var].setncatts(var_meta_dicts[var]) except KeyError: ##TODO: Make more useful auto meta data var_meta_auto = { 'name': var, 'info': 'Automatically generated meta data' } ncfile.variables[var].setncatts(var_meta_auto) except Exception: #TODO: handle the case that no metadata was passed #print('Error during filling file %s'%filename) pass ncfile.close()
def __init__(self, timeframe, breaktime, max_depth=0.1): # Create a list of gpis nearest to the stations of the dataset # If a gpi is nearest for multiple stations, # create a list of stations for these gpis that have to be merged # when importing data for the gpi path_ismn_usa = os.path.join( 'U:\\', 'datasets', 'ISMN', 'insituUSA', 'Data_seperate_files_19500101_20170321_2365493_xzeO_20170321') self.breaktime = breaktime self.timeframe = timeframe self.max_depth = max_depth self.path_ismn = path_ismn_usa self.ISMN_reader = ismn.ISMN_Interface(self.path_ismn) networks = self.ISMN_reader.list_networks() defaultfile = r'H:\HomogeneityTesting_data\ismn_files\USA_gpinetsta_%s_%s_%s.pkl' % ( timeframe[0].strftime('%Y-%m-%d'), breaktime.strftime('%Y-%m-%d'), timeframe[1].strftime('%Y-%m-%d')) land_grid = load_grid( r"R:\Datapool_processed\GLDAS\GLDAS_NOAH025SUBP_3H\ancillary\GLDAS_025_grid.nc" ) if os.path.isfile(defaultfile): with open(defaultfile, 'rb') as f: self.gpis_with_netsta = pickle.load(f) else: print('File for stations near GPI not found. Creating...') self.gpis_with_netsta = {} # IDS of measurements of valid variable and depth for i, network in enumerate(networks): print(network, '%i of %i' % (i, len(networks) - 1)) stations = self.ISMN_reader.list_stations(network=network) for station in stations: station_obj = self.ISMN_reader.get_station( stationname=station, network=network) gpi, dist = land_grid.find_nearest_gpi( station_obj.longitude, station_obj.latitude) variables = station_obj.get_variables() if 'soil moisture' in variables: depths_from, depths_to = station_obj.get_depths( 'soil moisture') depths_from = np.unique(depths_from) depths_to = np.unique(depths_to) # Check if any sensor measured in the correct depth if any( np.around(depths_to, decimals=2) <= self.max_depth): station_timeframe = station_obj.get_min_max_obs_timestamp( ) # Check if station measured during the timeframe if (station_timeframe[0] < self.timeframe[1]) and \ (station_timeframe[1] > self.timeframe[0]): if gpi in self.gpis_with_netsta.keys(): self.gpis_with_netsta[gpi].append( (network, station)) else: self.gpis_with_netsta.update( {gpi: [(network, station)]}) with open(defaultfile, 'wb') as f: pickle.dump(self.gpis_with_netsta, f, pickle.HIGHEST_PROTOCOL)
def test_save_load_cellgrid_shape(self): grid_nc.save_grid(self.testfile, self.cellgrid_shape) loaded_grid = grid_nc.load_grid(self.testfile) assert self.cellgrid_shape == loaded_grid
def __init__(self, data_path, parameters=['SWI_001', 'SWI_005', 'SWI_010', 'SWI_015', 'SWI_020', 'SWI_040', 'SWI_060', 'SWI_100', 'SSF'], dt=None, version=None, grid_fname=None, read_bulk=True, fname_template='c_gls_SWI-TS_{dt}_C{cell}_ASCAT_V{version}', cell_fn='{:04d}'): if grid_fname is None: grid_fname = os.path.join( data_path, 'c_gls_SWI-STATIC-DGG_201501010000_GLOBE_ASCAT_V3.0.1.nc') grid = netcdf.load_grid(grid_fname, location_var_name='location_id', subset_flag='land_flag') # detect datetime and version if not given if dt is None or version is None: globstring = fname_template.format(dt="*", cell="*", version="*") found_files = glob.glob(os.path.join(data_path, globstring)) if len(found_files) == 0: raise IOError("No data found in {}".format(data_path)) fn = found_files[0] fn = os.path.splitext(os.path.basename(fn))[0] parts = fn.split('_') if dt is None: # this only works if the files follow the CGLS naming convention # for everything else dt should be given as a keyword dt = parts[3] if version is None: version = parts[-1][1:] scale_factors = {'SWI_001': 0.5, 'SWI_005': 0.5, 'SWI_010': 0.5, 'SWI_015': 0.5, 'SWI_020': 0.5, 'SWI_040': 0.5, 'SWI_060': 0.5, 'SWI_100': 0.5, 'QFLAG_001': 0.5, 'QFLAG_005': 0.5, 'QFLAG_010': 0.5, 'QFLAG_015': 0.5, 'QFLAG_020': 0.5, 'QFLAG_040': 0.5, 'QFLAG_060': 0.5, 'QFLAG_100': 0.5, 'SSF': 1} dtypes = {'SWI_001': np.uint8, 'SWI_005': np.uint8, 'SWI_010': np.uint8, 'SWI_015': np.uint8, 'SWI_020': np.uint8, 'SWI_040': np.uint8, 'SWI_060': np.uint8, 'SWI_100': np.uint8, 'QFLAG_001': np.uint8, 'QFLAG_005': np.uint8, 'QFLAG_010': np.uint8, 'QFLAG_015': np.uint8, 'QFLAG_020': np.uint8, 'QFLAG_040': np.uint8, 'QFLAG_060': np.uint8, 'QFLAG_100': np.uint8, 'SSF': np.uint8} super(SWI_TS, self).__init__( data_path, grid, fn_format=fname_template.format(dt=dt, version=version, cell=cell_fn), parameters=parameters, scale_factors=scale_factors, dtypes=dtypes, autoscale=False, automask=False, ioclass_kws={'read_bulk': read_bulk, 'loc_ids_name': 'locations'})
def __init__(self, ts_path, grid_path=None): if grid_path is None: grid_path = os.path.join(ts_path, "grid.nc") grid = load_grid(grid_path) super(CCITs, self).__init__(ts_path, grid)