Example #1
0
def intersect_grids(grids, out_path=None):
    """
    Get a grid from common GPIs of a list of grids.

    Parameters
    ----------
    grids_paths : list
        Either a list of grid object or of paths to grids files to load.
    out_path : str, optional (default: None)
        Path where the intersected grid is stored. If None is passed, the grid
        is not stored.

    Returns
    -------
    common_grid : pgg.CellGrid
        A grid only with GPIs that were in all passed grids.
    """
    if all([isinstance(g, str) for g in grids]):
        grids = [load_grid(path) for path in grids]
    grid_points = tuple([grid.get_grid_points()[0] for grid in grids])

    common_gpis = functools.reduce(np.intersect1d, grid_points)
    common_grid = grids[0].subgrid_from_gpis(common_gpis)  #type: pgg.BasicGrid

    if out_path is not None:
        pgg.netcdf.save_grid(os.path.join(out_path, 'common_grid.nc'), common_grid,
                             subset_name='common_adjusted',
                             subset_meaning='LMP HOM QCM common adjusted points')
    return common_grid
Example #2
0
    def __init__(self, ts_path, grid_path=None, **kwargs):

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = load_grid(grid_path)
        super(ERATs, self).__init__(ts_path, grid, **kwargs)
Example #3
0
    def __init__(
            self,
            data_path,
            parameters=[
                'SWI_001', 'SWI_005', 'SWI_010', 'SWI_015', 'SWI_020',
                'SWI_040', 'SWI_060', 'SWI_100', 'SSF'
            ],
            dt='201612310000',
            version='3.0.1',
            grid_fname=None,
            read_bulk=True,
            fname_template='c_gls_SWI-TS_{dt}_C{{:04d}}_ASCAT_V{version}'):

        if grid_fname is None:
            grid_fname = os.path.join(
                data_path,
                'c_gls_SWI-STATIC-DGG_201501010000_GLOBE_ASCAT_V3.0.1.nc')
        grid = netcdf.load_grid(grid_fname,
                                location_var_name='location_id',
                                subset_flag='land_flag')

        scale_factors = {
            'SWI_001': 0.5,
            'SWI_005': 0.5,
            'SWI_010': 0.5,
            'SWI_015': 0.5,
            'SWI_020': 0.5,
            'SWI_040': 0.5,
            'SWI_060': 0.5,
            'SWI_100': 0.5,
            'SSF': 1
        }

        dtypes = {
            'SWI_001': np.uint8,
            'SWI_005': np.uint8,
            'SWI_010': np.uint8,
            'SWI_015': np.uint8,
            'SWI_020': np.uint8,
            'SWI_040': np.uint8,
            'SWI_060': np.uint8,
            'SWI_100': np.uint8,
            'SSF': np.uint8
        }

        super(SWI_TS,
              self).__init__(data_path,
                             grid,
                             fn_format=fname_template.format(dt=dt,
                                                             version=version),
                             parameters=parameters,
                             scale_factors=scale_factors,
                             dtypes=dtypes,
                             autoscale=False,
                             automask=False,
                             ioclass_kws={
                                 'read_bulk': read_bulk,
                                 'loc_ids_name': 'locations'
                             })
Example #4
0
    def __init__(self, ts_path, grid_path=None, remove_nans=False, drop_tz=True,
                 **kwargs):

        """
        Class for reading C3S SM time series after reshuffling.

        Parameters
        ----------
        ts_path : str
            Directory where the netcdf time series files are stored
        grid_path : str, optional (default: None)
            Path to grid file, that is used to organize the location of time
            series to read. If None is passed, grid.nc is searched for in the
            ts_path.
        remove_nans : bool or dict, optional (default: False)
            Replace fill values in SM time series. Either
                - dict of form {parameter: {val_to_replace: replacement_val}, ... }
                - dict of form {parameter : val_to_set_NaN ...}
                - True to replace -9999 with nan anywhere
                - False to do nothing
        drop_tz: bool, optional (default: True)
            Drop time zone information from time series

        Optional keyword arguments that are passed to the Gridded Base:
        ------------------------------------------------------------------------
            parameters : list, optional (default: None)
                Specific variable names to read, if None are selected, all are read.
            offsets : dict, optional (default:None)
                Offsets (values) that are added to the parameters (keys)
            scale_factors : dict, optional (default:None)
                Offset (value) that the parameters (key) is multiplied with
            ioclass_kws: dict
                Optional keyword arguments to pass to OrthoMultiTs class:
                ----------------------------------------------------------------
                    read_bulk : boolean, optional (default:False)
                        if set to True the data of all locations is read into memory,
                        and subsequent calls to read_ts read from the cache and not from disk
                        this makes reading complete files faster#
                    read_dates : boolean, optional (default:False)
                        if false dates will not be read automatically but only on specific
                        request useable for bulk reading because currently the netCDF
                        num2date routine is very slow for big datasets
        """

        if isinstance(remove_nans, dict):
            for var, is_should in remove_nans.copy().items():
                if not isinstance(is_should, dict):
                    remove_nans[var] = {is_should: np.nan}

        self.remove_nans = remove_nans

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = load_grid(grid_path)

        self.drop_tz = drop_tz
        super(C3STs, self).__init__(ts_path, grid=grid, **kwargs)
Example #5
0
    def __init__(self, ts_path, grid_path=None):

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")
        else:
            grid_path = os.path.join(grid_path, "grid.nc")

        grid = netcdf.load_grid(grid_path)
        super(ERA5Ts, self).__init__(ts_path, grid)
Example #6
0
    def __init__(self, ts_path, exact_index=False, grid_path=None, **kwargs):
        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = nc.load_grid(grid_path)
        super(LPRMTs, self).__init__(ts_path, grid, automask=True, **kwargs)

        self.exact_index = exact_index
        if exact_index and (self.parameters is not None):
            self.parameters.append(self._t0)
Example #7
0
    def __init__(self, ts_path, grid_path=None, index_add_time=False,
                 drop_missing=True, **kwargs):
        """
        Class for reading SMOS time series after reshuffling images.
        Missing images are represented in time series as lines where all
        variables are NaN.

        Parameters
        ----------
        ts_path : str
            Directory where the netcdf time series files are stored
        grid_path : str, optional (default: None)
            Path to grid file, that is used to organize the location of time
            series to read. If None is passed, grid.nc is searched for in the
            ts_path.
        index_add_time : bool, optional (default: False)
            Add overpass time stamps to the data frame index. This needs the
            'Days' and 'UTC_Seconds' variable available in the time series files.
        drop_missing : bool, optional (default: True)
            Drop Lines in TS where ALL variables are missing.

        Optional keyword arguments that are passed to the Gridded Base:
        ------------------------------------------------------------------------
            parameters : list, optional (default: None)
                Specific variable names to read, if None are selected, all are read.
            offsets : dict, optional (default:None)
                Offsets (values) that are added to the parameters (keys)
            scale_factors : dict, optional (default:None)
                Offset (value) that the parameters (key) is multiplied with
            ioclass_kws: dict
                Optional keyword arguments to pass to OrthoMultiTs class:
                ----------------------------------------------------------------
                    read_bulk : boolean, optional (default:False)
                        if set to True the data of all locations is read into memory,
                        and subsequent calls to read_ts read from the cache and not from disk
                        this makes reading complete files faster#
                    read_dates : boolean, optional (default:False)
                        if false dates will not be read automatically but only on specific
                        request useable for bulk reading because currently the netCDF
                        num2date routine is very slow for big datasets
                    autofill : bool, (default: True)
                        Fill missing values with nans
        """

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        self.drop_missing = drop_missing
        grid = load_grid(grid_path)
        super(SMOSTs, self).__init__(ts_path, grid, **kwargs)

        self.index_add_time = index_add_time
        if (self.parameters is not None) and self.index_add_time:
            for v in self._t0_vars.values():
                self.parameters.append(v)
Example #8
0
def calc_errors(gpis):

    outpath = Path('/work/GLEAM/errors')

    if not outpath.exists():
        outpath.mkdir(parents=True)

    # fname = outpath / ('part_%i.csv' % gpis[0])

    cci_gpis = np.flipud(np.arange(720 * 1440).reshape((720, 1440))).flatten()
    cci_grid = ncgrid.load_grid('/data_sets/ESA_CCI_L2/ESA-CCI-SOILMOISTURE-LAND_AND_RAINFOREST_MASK-fv04.2.nc',
                            subset_flag='land', subset_value=1.)

    asc_io = CCIDs('/data_sets/ESA_CCI_L2/data/ascata', grid=cci_grid)
    ams_io = CCIDs('/data_sets/ESA_CCI_L2/data/amsr2', grid=cci_grid)
    sma_io = CCIDs('/data_sets/ESA_CCI_L2/data/smap', grid=cci_grid)

    for cnt, gpi in enumerate(np.atleast_1d(gpis)):

        try:
            gleam_io = Dataset('/data_sets/GLEAM/_output/timeseries/%i.nc' % gpi)
            gle_ts = pd.Series(gleam_io.variables['w1'][:, 0],
                               index=num2date(gleam_io['time'][:], units=gleam_io['time'].units), name='GLEAM')

            asc_ts = asc_io.read(cci_gpis[gpi], only_valid=True)['sm'];asc_ts.name = 'ASCAT'
            ams_ts = ams_io.read(cci_gpis[gpi], only_valid=True)['sm'];ams_ts.name = 'AMSR2'
            sma_ts = sma_io.read(cci_gpis[gpi], only_valid=True)['sm'];sma_ts.name = 'SMAP'

            df = pd.concat((gle_ts, asc_ts, ams_ts, sma_ts), axis='columns').dropna()

            result = {'n': len(df)}

            for i,ds1 in enumerate(['GLEAM', 'ASCAT', 'AMSR2']):
                for ds2 in ['ASCAT', 'AMSR2', 'SMAP'][i::]:
                    R, p = pearsonr(df[ds1].values, df[ds2].values)
                    result['R_'+ds1+'_'+ds2] = R
                    result['p_'+ds1+'_'+ds2] = p

            tc1 = TCA_calc(df[['GLEAM','ASCAT','AMSR2']], ref_ind=0)
            tc2 = TCA_calc(df[['GLEAM','ASCAT','SMAP']], ref_ind=0)

            for i,ds in enumerate(['GLEAM','ASCAT','AMSR2']):
                result['TC1_R2_'+ds] = tc1[0][i]
                result['TC1_RMSE_'+ds] = tc1[1][i]

            for i,ds in enumerate(['GLEAM','ASCAT','SMAP']):
                result['TC2_R2_'+ds] = tc2[0][i]
                result['TC2_RMSE_'+ds] = tc2[1][i]

            # write_output(fname, result, gpi)

            print('gpi %i finished (%i / %i).' % (gpi, cnt+1, len(np.atleast_1d(gpis))))

        except:
            continue
Example #9
0
    def __init__(self,
                 ts_path,
                 grid=None,
                 exact_index=False,
                 clip_dates=None,
                 ioclass_kws=None,
                 **kwargs):
        """
        Read ESA CCI SM in time series format from netcdf files

        Parameters
        ----------
        ts_path : str
            Path to where the data is stored
        grid : str or pygeogrids.CellGrid, optional (default: None)
            Grid that the time series are searched on
        exact_index : bool, optional (default: False)
            Apply t0 to daily time stamps to read exact observations times.
        clip_dates : tuple[datetime, datetime], optional (default: None)
            Cut the time series to this date range (start, end)
        ioclass_kws : dict, optional (default: None)
            IO class kwargs used by pyntecf
        kwargs:
            Additional kwargs are given to pynetcf OrthoMultiTs.
        """
        self.t0 = 't0'  # observation time stamp variable

        if grid is None:
            grid = os.path.join(ts_path, "grid.nc")

        if ioclass_kws is None:
            ioclass_kws = {'read_bulk': True}
        else:
            if 'read_bulk' not in ioclass_kws.keys():
                ioclass_kws['read_bulk'] = True

        if isinstance(grid, CellGrid):
            pass
        else:
            grid = nc.load_grid(grid)

        super(SmecvTs, self).__init__(ts_path,
                                      grid,
                                      automask=True,
                                      ioclass_kws=ioclass_kws,
                                      **kwargs)

        self.clip_dates = clip_dates

        self.exact_index = exact_index

        if (self.parameters is not None) and self.exact_index and \
                (self.t0 not in self.parameters):
            self.parameters.append(self.t0)
Example #10
0
def SMECV_Grid_v042(subset_flag='land'):
    """
    Load ECV grid from netcdf file.
    This grid has 2D shape information, also a rainforest mask is included.
    The land mask is the same that is defined in gridv4.
    Returns
    -------
    grid : pygeogrids.CellGrid
        CellGrid object
    """
    return ncgrid.load_grid(get_grid_definition_filename(),
                            subset_flag=subset_flag)
Example #11
0
    def __init__(self, ts_path=None, grid_path=None, exact_index=False,
                 **kwargs):

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = load_grid(grid_path)
        super(SMOSTs, self).__init__(ts_path, grid, **kwargs)

        self.exact_index = exact_index
        if (self.parameters is not None) and self.exact_index:
            for v in self._t0_vars.values():
                self.parameters.append(v)
Example #12
0
def test_store_load_regular_2D_grid():
    """
    Test the storing/loading of a 2D grid when the gpis are in a custom
    ordering.
    """
    londim = np.arange(-180.0, 180.0, 60)
    latdim = np.arange(90.0, -90.0, -30)
    lons, lats = np.meshgrid(londim, latdim)
    gpis = np.arange(lons.flatten().size).reshape(lons.shape)
    grid = grids.BasicGrid(lons.flatten(), lats.flatten(),
                           gpis.flatten(), shape=lons.shape)
    testfile = tempfile.NamedTemporaryFile().name
    grid_nc.save_grid(testfile, grid)
    grid_loaded = grid_nc.load_grid(testfile)
    assert grid == grid_loaded
Example #13
0
    def __init__(self,
                 ts_path=None,
                 grid_path=None,
                 exact_index=False,
                 **kwargs):
        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = load_grid(grid_path)
        super(SMAPTs, self).__init__(ts_path, grid, **kwargs)

        self.exact_index = exact_index

        if self.exact_index and \
                (self.parameters is not None and self._t0_var not in self.parameters):
            self.parameters.append(self._t0_var)
def pd_from_2Dnetcdf(filename, grid='global'):
    # type: (str, str) -> pd.DataFrame
    '''
    :param filename: Path to netcdf file
    :param grid: Set "global" (global grid points) or "land" (only land points) to return specific points
    :return: Dataframe with GPIs as index and data from netcdf in columns
    '''
    # TODO: Delete this function when everything runs
    ncfile = Dataset(filename)

    lons_file = ncfile.variables['lon'][:]
    # lats in same order as glob grid ascending
    lats_file = np.flipud(ncfile.variables['lat'][:])

    #global grid how it is in netcdf file
    lons_file, lats_file = np.meshgrid(lons_file, lats_file)

    var_names = []
    for var_name in ncfile.variables.keys():
        if ncfile.variables[var_name].dimensions == ('lat', 'lon'):
            var_names.append(var_name)

    # data in same order as glob grid ascending

    data = {name: np.flipud(ncfile.variables[name][:]) for name in var_names}

    data['lon'] = lons_file
    data['lat'] = lats_file

    data_flat = {}
    for name, data in data.iteritems():
        data_flat[name] = data.flatten()

    dataframe = pd.DataFrame(data=data_flat)

    if grid == 'global':
        thegrid = globalCellgrid()
    elif grid == 'land':
        thegrid = nc.load_grid(
            r"D:\users\wpreimes\datasets\grids\qdeg_land_grid.nc")
    else:
        raise Exception("select 'land' or 'global' for returned GPIs")

    grid_points = thegrid.get_grid_points()[0]

    return dataframe.loc[grid_points]
Example #15
0
File: tuw.py Project: TUW-GEO/ascat
    def __init__(self, path, grid_path,
                 grid_info_filename='TUW_WARP5_grid_info_2_1.nc',
                 variables=None):

        grid = ncgrid.load_grid(os.path.join(grid_path, grid_info_filename),
                                subset_flag='land')

        self.path = path
        self.grid_path = grid_path
        self.grid_info_filename = grid_info_filename

        self.variables = variables

        if self.variables is None:
            self.variables = ['vod']

        super(AscatVodTs, self).__init__(path, grid)
Example #16
0
def SMECV_Grid_v042(subset_flag='land'):
    """
    Load ECV grid from netcdf file.
    This grid has 2D shape information, also a rainforest mask is included.

    Parameters
    -------
    subset_flag : str or None, optional (default: 'land')
        Select a subset that should be loaded, e.g. 'land' or 'rainforest'

    Returns
    -------
    grid : pygeogrids.CellGrid
        CellGrid object of the selected subset. In Quarter Degree
    """
    return ncgrid.load_grid(get_grid_definition_filename(version='04.2'),
                            subset_flag=subset_flag,
                            subset_value=1.)
Example #17
0
    def _load_subset(subset_flag: {str, None},
                     subset_value: {int, list}) -> {np.array, None}:
        """ Load grid points for the subset from definition file"""

        if subset_flag is not None:
            subset_grid = ncgrid.load_grid(
                get_grid_definition_filename(version='05.2'),
                subset_flag=subset_flag,
                subset_value=subset_value)

            if isinstance(subset_grid.activegpis, np.ma.masked_array):
                subset = subset_grid.activegpis.data
            else:
                subset = subset_grid.activegpis
        else:
            subset = None

        return subset
Example #18
0
    def __init__(self, ts_path, grid_path=None, remove_nans=False, **kwargs):

        '''
        Class for reading C3S SM time series after reshuffling.

        Parameters
        ----------
        ts_path : str
            Directory where the netcdf time series files are stored
        grid_path : str, optional (default: None)
            Path to grid file, that is used to organize the location of time
            series to read. If None is passed, grid.nc is searched for in the
            ts_path.
        remove_nans : bool, optional (default: False)
            Replace -9999 with np.nan in time series

        Optional keyword arguments that are passed to the Gridded Base:
        ------------------------------------------------------------------------
            parameters : list, optional (default: None)
                Specific variable names to read, if None are selected, all are read.
            offsets : dict, optional (default:None)
                Offsets (values) that are added to the parameters (keys)
            scale_factors : dict, optional (default:None)
                Offset (value) that the parameters (key) is multiplied with
            ioclass_kws: dict
                Optional keyword arguments to pass to OrthoMultiTs class:
                ----------------------------------------------------------------
                    read_bulk : boolean, optional (default:False)
                        if set to True the data of all locations is read into memory,
                        and subsequent calls to read_ts read from the cache and not from disk
                        this makes reading complete files faster#
                    read_dates : boolean, optional (default:False)
                        if false dates will not be read automatically but only on specific
                        request useable for bulk reading because currently the netCDF
                        num2date routine is very slow for big datasets
        '''
        self.remove_nans = remove_nans

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = load_grid(grid_path)

        super(C3STs, self).__init__(ts_path, grid=grid, **kwargs)
Example #19
0
    def __init__(self,
                 path,
                 grid_path,
                 grid_info_filename='TUW_WARP5_grid_info_2_1.nc',
                 variables=None):

        grid = ncgrid.load_grid(os.path.join(grid_path, grid_info_filename),
                                subset_flag='land')

        self.path = path
        self.grid_path = grid_path
        self.grid_info_filename = grid_info_filename

        self.variables = variables

        if self.variables is None:
            self.variables = ['vod']

        super(AscatVodTs, self).__init__(path, grid)
Example #20
0
def SMECV_Grid_v042(subset_flag='land'):
    """
    Load a SMECV Grid as used in the production of ESA CCI SM v4.
    This grid has 2D shape information, also a rainforest mask is included.

    Parameters
    ----------
    subset_flag : str or None, optional (default: 'land')
        Select a subset that should be loaded, e.g. 'land' or 'rainforest'

    Returns
    -------
    grid : pygeogrids.CellGrid
        CellGrid object of the selected subset. In Quarter Degree resolution.
    """

    warnings.warn(
        "SMECV Grid v4 is deperecated. Please use a newer grid version.",
        DeprecationWarning)

    lon, lat, gpis, cells, shape = meshgrid(resolution=0.25,
                                            cellsize=5.,
                                            flip_lats=True)

    if subset_flag is not None:
        subset_grid = ncgrid.load_grid(
            get_grid_definition_filename(version='04.2'),
            subset_flag=subset_flag,
            subset_value=1.)
        subset = subset_grid.subset
    else:
        subset = None

    return CellGrid(lon,
                    lat,
                    gpis=gpis,
                    subset=subset,
                    cells=cells,
                    shape=shape)
Example #21
0
    def __init__(self, ts_path, grid_path=None, **kwargs):
        '''
        Class for reading SMAP time series after reshuffling.

        Parameters
        ----------
        ts_path : str
            Directory where the netcdf time series files are stored
        grid_path : str, optional (default: None)
            Path to grid file, that is used to organize the location of time
            series to read. If None is passed, grid.nc is searched for in the
            ts_path.

        Optional keyword arguments that are passed to the Gridded Base:
        ------------------------------------------------------------------------
            parameters : list, optional (default: None)
                Specific variable names to read, if None are selected, all are read.
            offsets : dict, optional (default:None)
                Offsets (values) that are added to the parameters (keys)
            scale_factors : dict, optional (default:None)
                Offset (value) that the parameters (key) is multiplied with
            ioclass_kws: dict, (optional)
                Optional keyword arguments to pass to OrthoMultiTs class:
                ----------------------------------------------------------------
                    read_bulk : boolean, optional (default:False)
                        if set to True the data of all locations is read into memory,
                        and subsequent calls to read_ts read from the cache and
                        not from disk this makes reading complete files faster
                    read_dates : boolean, optional (default:False)
                        if false dates will not be read automatically but only on
                        specific request useable for bulk reading because currently
                        the netCDF num2date routine is very slow for big datasets.
        '''

        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = ncdf.load_grid(grid_path)
        super(SMAPTs, self).__init__(ts_path, grid, **kwargs)
Example #22
0
def SMECV_Grid_v052(subset_flag='land', subset_value=1.):
    """
    Load ECV grid from netcdf file.
    This grid has 2D shape information, also a rainforest mask is included.
    The land mask is the same that is defined in gridv4. This version contains
    land cover information as well that can be used for filtering.

    Parameters
    -------
    subset_flag : str or None, optional (default: 'land')
        Select a subset that should be loaded, e.g. land, high_vod, rainforest, cci_lc
    subset_value : float or list, optional (default: 1.)
        Select one or more values of the variable that defines the subset,
        i.e 1. for masks (high_vod, land) or a float or list of floats for one or
        multiple ESA CCI Landcover classes (e.g 190 to load urban points only)

    Returns
    -------
    grid : pygeogrids.CellGrid
        CellGrid object of the selected subset. In Quarter Degree
    """
    return ncgrid.load_grid(get_grid_definition_filename(version='05.2'),
                            subset_flag=subset_flag,
                            subset_value=subset_value)
Example #23
0
    def __init__(self,
                 data_path,
                 parameters=[
                     'SWI_001', 'SWI_005', 'SWI_010', 'SWI_015', 'SWI_020',
                     'SWI_040', 'SWI_060', 'SWI_100', 'SSF'
                 ],
                 dt=None,
                 version=None,
                 grid_fname=None,
                 read_bulk=True,
                 fname_template='c_gls_SWI-TS_{dt}_C{cell}_ASCAT_V{version}',
                 cell_fn='{:04d}'):

        if grid_fname is None:
            grid_fname = os.path.join(
                data_path,
                'c_gls_SWI-STATIC-DGG_201501010000_GLOBE_ASCAT_V3.0.1.nc')
        grid = netcdf.load_grid(grid_fname,
                                location_var_name='location_id',
                                subset_flag='land_flag')

        # detect datetime and version if not given
        if dt is None or version is None:
            globstring = fname_template.format(dt="*", cell="*", version="*")
            found_files = glob.glob(os.path.join(data_path, globstring))
            if len(found_files) == 0:
                raise IOError("No data found in {}".format(data_path))
            fn = found_files[0]
            fn = os.path.splitext(os.path.basename(fn))[0]
            parts = fn.split('_')
        if dt is None:
            # this only works if the files follow the CGLS naming convention
            # for everything else dt should be given as a keyword
            dt = parts[3]
        if version is None:
            version = parts[-1][1:]

        scale_factors = {
            'SWI_001': 0.5,
            'SWI_005': 0.5,
            'SWI_010': 0.5,
            'SWI_015': 0.5,
            'SWI_020': 0.5,
            'SWI_040': 0.5,
            'SWI_060': 0.5,
            'SWI_100': 0.5,
            'QFLAG_001': 0.5,
            'QFLAG_005': 0.5,
            'QFLAG_010': 0.5,
            'QFLAG_015': 0.5,
            'QFLAG_020': 0.5,
            'QFLAG_040': 0.5,
            'QFLAG_060': 0.5,
            'QFLAG_100': 0.5,
            'SSF': 1
        }

        dtypes = {
            'SWI_001': np.uint8,
            'SWI_005': np.uint8,
            'SWI_010': np.uint8,
            'SWI_015': np.uint8,
            'SWI_020': np.uint8,
            'SWI_040': np.uint8,
            'SWI_060': np.uint8,
            'SWI_100': np.uint8,
            'QFLAG_001': np.uint8,
            'QFLAG_005': np.uint8,
            'QFLAG_010': np.uint8,
            'QFLAG_015': np.uint8,
            'QFLAG_020': np.uint8,
            'QFLAG_040': np.uint8,
            'QFLAG_060': np.uint8,
            'QFLAG_100': np.uint8,
            'SSF': np.uint8
        }

        super(SWI_TS,
              self).__init__(data_path,
                             grid,
                             fn_format=fname_template.format(dt=dt,
                                                             version=version,
                                                             cell=cell_fn),
                             parameters=parameters,
                             scale_factors=scale_factors,
                             dtypes=dtypes,
                             autoscale=False,
                             automask=False,
                             ioclass_kws={
                                 'read_bulk': read_bulk,
                                 'loc_ids_name': 'locations'
                             })
Example #24
0
def run_adjustment():

    adjust_obj = Adjust(r"H:\HomogeneityTesting_data\output\CCI31EGU",
                        'adjusted_cci', 'merra2', 0.1)

    grid_path = r"D:\users\wpreimes\datasets\grids\qdeg_land_grid.nc"
    cell_grid = nc.load_grid(grid_path)
    cells = cells_for_continent('Australia')

    adjusted_data_path = r"D:\users\wpreimes\datasets\CCI_31_D\adjusted_temp"
    # TODO: Add own list for points should have been adjusted but could not
    unadjusted_gps = {
        'gpi': [],
        'slope': [],
        'intercept': [],
        'adjustment_class': []
    }  # 0= UNadjusted, 1=adjusted

    dataset = GriddedNcIndexedRaggedTs(path=adjusted_data_path,
                                       grid=cell_grid,
                                       mode='w')
    adjusted_gps = GriddedPointData(os.path.join(adjusted_data_path,
                                                 'adjusted_gps.nc'),
                                    cell_grid,
                                    mode='w')

    for cell_index, cell in enumerate(cells):
        gpis = cell_grid.grid_points_for_cell(cell)[0]
        adjustment_status = {'gpi': [], 'status': []}
        adjustment_stats = {}
        for index, gpi in enumerate(gpis):
            if index % 50 == 0:
                print('%i of %i' % (index, gpis.size))
            try:
                adj_settings, adjusted_data = adjust_obj.adjust_ts(gpi)
            except:
                adjustment_status['gpi'].append(gpi)
                adjustment_status['status'].append(0)
                continue
            for breaktime_str, settings in adj_settings.iteritems():
                if breaktime_str not in adjustment_stats.keys():
                    adjustment_stats[breaktime_str] = {
                        'gpi': [],
                        'intercept': [],
                        'slope': []
                    }

                adjustment_stats[breaktime_str]['gpi'].append(gpi)
                adjustment_stats[breaktime_str]['intercept'].append(
                    adj_settings[breaktime_str]['intercept'])
                adjustment_stats[breaktime_str]['slope'].append(
                    adj_settings[breaktime_str]['slope'])

            if adjusted_data.columns.values[0] == 'not_adjusted':
                adjustment_status['gpi'].append(gpi)
                adjustment_status['status'].append(0)
            if adjusted_data.columns.values[0] == 'adjusted':
                adjustment_status['gpi'].append(gpi)
                adjustment_status['status'].append(1)

            dataset.write(gpi, adjusted_data)
        dataset.close()

        points_to_netcdf(pd.DataFrame(
            index=adjustment_status['gpi'],
            data={'status': adjustment_status['status']}),
                         path=adjusted_data_path,
                         filename='adjustment_status')
        for breaktime_str in adjustment_stats.keys():
            points_to_netcdf(pd.DataFrame(
                index=adjustment_stats[breaktime_str]['gpi'],
                data={
                    'intercept': adjustment_stats[breaktime_str]['intercept'],
                    'slope': adjustment_stats[breaktime_str]['slope']
                }),
                             path=adjusted_data_path,
                             filename=breaktime_str + '_adj_stats')
        '''
        adjusted_gps = {}
        adjusted_ts_data = pd.DataFrame()
        if os.path.isfile(os.path.join(adjusted_data_path, str(cell) + '.nc')): continue  # already processed
        print('cell %i of %i' % (cell_index, len(cells)))
        gpis = cell_grid.grid_points_for_cell(cell)[0]
        for index, gpi in enumerate(gpis):
            if index % 50 == 0:
                print('%i of %i' % (index, gpis.size))
            try:
                adj_settings, adjusted_data = adjust_obj.adjust_ts(gpi)
            except:
                continue
            column_name = adjusted_data.columns.values[0]
            if column_name == 'not_adjusted':
                unadjusted_gps['gpi'].append(gpi)
            if column_name == 'adjusted':
                for breaktime_str, settings in adj_settings.iteritems():
                    if breaktime_str not in adjusted_gps.keys():
                        adjusted_gps[breaktime_str] = pd.DataFrame(index=cell_grid.grid_points_for_cell(cell)[0],
                                                                   data={'intercept':np.nan, 'slope':np.nan})

                    adjusted_gps[breaktime_str].loc[gpi,['intercept','slope']]=[settings['slope'], settings['intercept']]

            if adjusted_ts_data.index.size == 0:
                adjusted_ts_data = adjusted_data.rename(columns={column_name: gpi})
            else:
                adjusted_ts_data[gpi] = adjusted_data

            dataset.write(gpi,adjusted_data)

        for breaktime_str, data in adjusted_gps.iteritems():
            points_to_netcdf(dataframe=data, path=adjusted_data_path,
                             filename='adjustment_stats_' + breaktime_str)

        '''
        '''
Example #25
0
    def test_save_load_basicgrid_irregular(self):
        grid_nc.save_grid(self.testfile,
                          self.basic_irregular)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.basic_irregular == loaded_grid
Example #26
0
    def test_save_load_cellgrid(self):
        grid_nc.save_grid(self.testfile,
                          self.cellgrid)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.cellgrid == loaded_grid
def time_to_netcdf(dataframe,
                   path,
                   gpi,
                   index_col_name=None,
                   filename=None,
                   file_meta_dict=None,
                   var_meta_dicts=None,
                   overwrite_gpi=None):

    grid = nc.load_grid(
        os.path.join(root.r, 'Datapool_processed', 'GLDAS',
                     'GLDAS_NOAH025_3H.020', 'ancillary',
                     'GLDASv2_025_land_grid.nc'))

    if index_col_name:
        dates = dataframe[index_col_name]
    else:
        dates = dataframe.index

    calendar = 'standard'
    units = 'days since 1900-01-01 00:00:00'

    dates_num = np.sort(date2num(dates.tolist(), units, calendar))

    if not filename:
        cell, filename = create_cellfile_name(gpi, grid)
    else:
        cell, _ = create_cellfile_name(gpi, grid)

    grid_points = grid.grid_points_for_cell(cell)[0]

    filepath = os.path.join(path, filename + '.nc')

    lonlat = grid.gpi2lonlat(gpi)

    if os.path.isfile(filepath):
        ncfile = OrthoMultiTs(filepath, mode='a')
    else:
        ncfile = OrthoMultiTs(filepath, mode='w', n_loc=grid_points.size)
        ncfile.variables[
            'location_id'][:] = grid_points  #without this error after 2nd file

    dates = [np.datetime64(date).astype(datetime) for date in dates]
    dates = np.asarray(dates)

    for var in dataframe.columns.values:
        ncfile.write_ts(loc_id=gpi,
                        data={var: dataframe[var].values},
                        dates=dates,
                        lon=lonlat[0],
                        lat=lonlat[1],
                        dates_direct=False)
    '''
    if ncfile.get_time_variable_overlap(dates).size!=dataframe.index.size:
        ncfile.extend_time(np.ndarray.tolist(dates))
        sort_order=np.argsort(ncfile.variables['time'][:])
        if all(sort_order == np.array(range(sort_order.size)))==False:
            ncfile.variables['time'][:]=ncfile.variables['time'][:][sort_order]
        
        for var in dataframe.columns.values: 
           ncfile.write_ts(loc_id=gpi,data={var:dataframe[var].values},
                           dates=dates,
                           lon=lonlat[0],lat=lonlat[1],dates_direct=False)
    else:
        for var in dataframe.columns.values: 
           ncfile.write_ts(loc_id=[gpi],data={var:dataframe[var].values},
                           dates=dates,lon=[lonlat[0]],lat=[lonlat[1]],
                           dates_direct=False) 
    
    
        for var in dataframe.columns.values:
            for idx in range(ncfile.variables[var].shape[0]):
                ncfile.variables[var][:][idx].mask=new_dates_mask
    
    new_dates_mask=np.in1d(ncfile.variables['time'][:],dates_num,invert=True)
    for var in dataframe.columns.values:
        ncfile.write_ts(loc_id=gpi,data={var:dataframe[var].values},dates=dates,lon=lonlat[0],lat=lonlat[1],dates_direct=False)
    
        #if type(ncfile.variables[var][:])!=np.ma.core.MaskedArray:
            #ncfile.variables[var][:]=np.ma.masked_array(data=ncfile.variables[var][:],mask=np.full((ncfile.variables[var][:].shape),False))

        
        if sort_order:
            for i,ts in enumerate(ncfile.variables[var][:]):
                ncfile.variables[var][:][i]=ncfile.variables[var][:][i][sort_order]
        if ncfile.get_time_variable_overlap(dates).size==dataframe.index.size and \
           var in ncfile.variables.keys():
            if overwrite_gpi==False: 
                continue
        else:
        
            
         
        #Calculate sort order, in case that the added time values are BEFORE the existing ones, sort time and time dependent values     
        #TODO: Make this faster or change package
        sort_order=np.argsort(ncfile.variables['time'][:])
        if not all(sort_order == np.array(range(sort_order.size))):
            ncfile.variables['time'][:]=ncfile.variables['time'][:][sort_order]  
            for var in dataframe.columns.values:
                for i,ts in enumerate(ncfile.variables[var][:]):
                    ncfile.variables[var][:][i]=ncfile.variables[var][:][i][sort_order]
    '''

    ncfile.close()
Example #28
0
    def test_save_load_basicgrid(self):
        grid_nc.save_grid(self.testfile,
                          self.basic)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.basic == loaded_grid
Example #29
0
    def test_save_load_basicgrid_shape_gpis(self):
        grid_nc.save_grid(self.testfile,
                          self.basic_shape_gpis)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.basic_shape_gpis == loaded_grid
def points_to_netcdf(dataframe,
                     path,
                     index_col_name=None,
                     filename=None,
                     file_meta_dict=None,
                     var_meta_dicts=None):
    '''
    Write spatial data (data series, data frame) to file:
        -pandas object must contain GPIs as index or in the selected 
        column (index_col_name)
    Parameters
    ----------
    dataframe (mandatory): pandas data frame or data series
        pandas object with data for writing to file
        for time series data: date time as index
        for spatial data: gpi as index
    path (mandatory): string
        path where netcdf file is saved to
    index_col_name (optional): string
        name of the column with time/location data in the pandas object
    filename (optional): string
        for time series data: filename is automatically "*cell*.nc"
        for spatial data: select file name
    file_meta_dict (optional): dictionary
        additional meta information on the netcdf file
    var_meta_dict (optional): dictionary of dictionaries
        additional meta information on the written variables
        for each column in the dataframe, there is 1 dictionary in this list
    overwrite (optional): boolean
        If a (cell)file already exits at the chosen location, existing ground 
        point data is overwritten
    '''

    grid = nc.load_grid(
        os.path.join(root.r, 'Datapool_processed', 'GLDAS',
                     'GLDAS_NOAH025_3H.020', 'ancillary',
                     'GLDASv2_025_land_grid.nc'))

    if not filename:
        filename = 'global'

    #Create or open netcdf cell file
    if os.path.isfile(os.path.join(path, filename + '.nc')):
        ncfile = Dataset(os.path.join(path, filename + '.nc'),
                         "a",
                         format="NETCDF4")
    else:
        ncfile = Dataset(os.path.join(path, filename + '.nc'),
                         "w",
                         format="NETCDF4")
    try:
        globgrid = globalCellgrid()
        grid_points = grid.get_grid_points()
        global_grid_points = globgrid.get_grid_points()

        #TODO: Why -1
        latitudes, longitudes = np.unique(
            global_grid_points[2])[::-1], np.unique(global_grid_points[1])
        locations = grid_points[0]

        if index_col_name:
            locs = dataframe[index_col_name]
        else:
            locs = dataframe.index
        #glob_pos contains the indices of points to process in the overall grid
        pos = datamask(np.array(locations), np.array(locs))

        n_gpis = locations.size

        #Create data dimensions for Time series and global image
        if not ncfile.dimensions:
            ncfile.createDimension(dimname='locations', size=n_gpis)
            ncfile.createDimension(dimname='lat', size=latitudes.size)
            ncfile.createDimension(dimname='lon', size=longitudes.size)
        #TODO: Add Metadata for netcdf file to dict
        if not ncfile.ncattrs():
            meta_dict = {
                'geospatial_lon_min': longitudes[0],
                'geosptial_lon_max': longitudes[-1],
                'geospatial_lat_min': latitudes[-1],
                'geospatial_lat_max': latitudes[0],
                'id': 'global',
                'date_created': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }
            if file_meta_dict:
                meta_dict.update(file_meta_dict)
            ncfile.setncatts(meta_dict)

        #Create variable for locations and add value
        #GPI, LAT, LON werden beim erstellen immer gefüllt je nach grid unabhängig vom GPI
        #Statt None: gpi_index: Nur für den prozessierten gpi werden idx,lat,lon ins file gespeichert
        meta = {
            'long_name': 'Location Index',
            'standard_name': 'GPI',
            'valid_range': '[0 Grid Dependant'
        }
        update_loc_var(ncfile, locations, u'location_id', grid, pos)
        meta = {
            'units': 'degrees_east',
            'long_name': 'location longitude',
            'standard_name': 'longitude',
            'valid_range': '[-180. 180.]'
        }
        update_loc_var(ncfile, longitudes, u'lon', grid, None)
        ncfile.variables[u'lon'].setncatts(meta)
        meta = {
            'units': 'degrees_north',
            'long_name': 'location latitude',
            'standard_name': 'latitude',
            'valid_range': '[-90. 90.]'
        }
        update_loc_var(ncfile, latitudes, u'lat', grid, None)
        ncfile.variables[u'lat'].setncatts(meta)

        for i, var in enumerate(dataframe.columns.values):
            glob_pos = datamask(global_grid_points[0], locs.values)
            update_loc_var(ncfile, dataframe[var].values, var,
                           [globgrid, grid], glob_pos)
            try:
                ncfile.variables[var].setncatts(var_meta_dicts[var])
            except KeyError:
                ##TODO: Make more useful auto meta data
                var_meta_auto = {
                    'name': var,
                    'info': 'Automatically generated meta data'
                }
                ncfile.variables[var].setncatts(var_meta_auto)

    except Exception:
        #TODO: handle the case that no metadata was passed
        #print('Error during filling file %s'%filename)
        pass

    ncfile.close()
    def __init__(self, timeframe, breaktime, max_depth=0.1):
        # Create a list of gpis nearest to the stations of the dataset
        # If a gpi is nearest for multiple stations,
        # create a list of stations for these gpis that have to be merged
        # when importing data for the gpi
        path_ismn_usa = os.path.join(
            'U:\\', 'datasets', 'ISMN', 'insituUSA',
            'Data_seperate_files_19500101_20170321_2365493_xzeO_20170321')

        self.breaktime = breaktime
        self.timeframe = timeframe
        self.max_depth = max_depth
        self.path_ismn = path_ismn_usa
        self.ISMN_reader = ismn.ISMN_Interface(self.path_ismn)
        networks = self.ISMN_reader.list_networks()

        defaultfile = r'H:\HomogeneityTesting_data\ismn_files\USA_gpinetsta_%s_%s_%s.pkl' % (
            timeframe[0].strftime('%Y-%m-%d'), breaktime.strftime('%Y-%m-%d'),
            timeframe[1].strftime('%Y-%m-%d'))

        land_grid = load_grid(
            r"R:\Datapool_processed\GLDAS\GLDAS_NOAH025SUBP_3H\ancillary\GLDAS_025_grid.nc"
        )

        if os.path.isfile(defaultfile):
            with open(defaultfile, 'rb') as f:
                self.gpis_with_netsta = pickle.load(f)
        else:
            print('File for stations near GPI not found. Creating...')
            self.gpis_with_netsta = {}
            # IDS of measurements of valid variable and depth

            for i, network in enumerate(networks):
                print(network, '%i of %i' % (i, len(networks) - 1))
                stations = self.ISMN_reader.list_stations(network=network)
                for station in stations:
                    station_obj = self.ISMN_reader.get_station(
                        stationname=station, network=network)
                    gpi, dist = land_grid.find_nearest_gpi(
                        station_obj.longitude, station_obj.latitude)

                    variables = station_obj.get_variables()
                    if 'soil moisture' in variables:
                        depths_from, depths_to = station_obj.get_depths(
                            'soil moisture')
                        depths_from = np.unique(depths_from)
                        depths_to = np.unique(depths_to)

                        # Check if any sensor measured in the correct depth
                        if any(
                                np.around(depths_to, decimals=2) <=
                                self.max_depth):
                            station_timeframe = station_obj.get_min_max_obs_timestamp(
                            )
                            # Check if station measured during the timeframe

                            if (station_timeframe[0] < self.timeframe[1]) and \
                                    (station_timeframe[1] > self.timeframe[0]):

                                if gpi in self.gpis_with_netsta.keys():
                                    self.gpis_with_netsta[gpi].append(
                                        (network, station))
                                else:
                                    self.gpis_with_netsta.update(
                                        {gpi: [(network, station)]})

            with open(defaultfile, 'wb') as f:
                pickle.dump(self.gpis_with_netsta, f, pickle.HIGHEST_PROTOCOL)
Example #32
0
    def test_save_load_basicgrid_shape_gpis(self):
        grid_nc.save_grid(self.testfile,
                          self.basic_shape_gpis)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.basic_shape_gpis == loaded_grid
Example #33
0
    def test_save_load_basicgrid_irregular(self):
        grid_nc.save_grid(self.testfile,
                          self.basic_irregular)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.basic_irregular == loaded_grid
Example #34
0
    def test_save_load_cellgrid_shape(self):
        grid_nc.save_grid(self.testfile,
                          self.cellgrid_shape)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.cellgrid_shape == loaded_grid
Example #35
0
    def __init__(self, data_path, parameters=['SWI_001', 'SWI_005', 'SWI_010',
                                              'SWI_015', 'SWI_020', 'SWI_040',
                                              'SWI_060', 'SWI_100', 'SSF'],
                 dt=None, version=None,
                 grid_fname=None, read_bulk=True,
                 fname_template='c_gls_SWI-TS_{dt}_C{cell}_ASCAT_V{version}',
                 cell_fn='{:04d}'):

        if grid_fname is None:
            grid_fname = os.path.join(
                data_path, 'c_gls_SWI-STATIC-DGG_201501010000_GLOBE_ASCAT_V3.0.1.nc')
        grid = netcdf.load_grid(grid_fname, location_var_name='location_id',
                                subset_flag='land_flag')

        # detect datetime and version if not given
        if dt is None or version is None:
            globstring = fname_template.format(dt="*",
                                               cell="*",
                                               version="*")
            found_files = glob.glob(os.path.join(data_path, globstring))
            if len(found_files) == 0:
                raise IOError("No data found in {}".format(data_path))
            fn = found_files[0]
            fn = os.path.splitext(os.path.basename(fn))[0]
            parts = fn.split('_')
        if dt is None:
            # this only works if the files follow the CGLS naming convention
            # for everything else dt should be given as a keyword
            dt = parts[3]
        if version is None:
            version = parts[-1][1:]

        scale_factors = {'SWI_001': 0.5,
                         'SWI_005': 0.5,
                         'SWI_010': 0.5,
                         'SWI_015': 0.5,
                         'SWI_020': 0.5,
                         'SWI_040': 0.5,
                         'SWI_060': 0.5,
                         'SWI_100': 0.5,
                         'QFLAG_001': 0.5,
                         'QFLAG_005': 0.5,
                         'QFLAG_010': 0.5,
                         'QFLAG_015': 0.5,
                         'QFLAG_020': 0.5,
                         'QFLAG_040': 0.5,
                         'QFLAG_060': 0.5,
                         'QFLAG_100': 0.5,
                         'SSF': 1}

        dtypes = {'SWI_001': np.uint8,
                  'SWI_005': np.uint8,
                  'SWI_010': np.uint8,
                  'SWI_015': np.uint8,
                  'SWI_020': np.uint8,
                  'SWI_040': np.uint8,
                  'SWI_060': np.uint8,
                  'SWI_100': np.uint8,
                  'QFLAG_001': np.uint8,
                  'QFLAG_005': np.uint8,
                  'QFLAG_010': np.uint8,
                  'QFLAG_015': np.uint8,
                  'QFLAG_020': np.uint8,
                  'QFLAG_040': np.uint8,
                  'QFLAG_060': np.uint8,
                  'QFLAG_100': np.uint8,
                  'SSF': np.uint8}

        super(SWI_TS, self).__init__(
            data_path, grid,
            fn_format=fname_template.format(dt=dt, version=version,
                                            cell=cell_fn),
            parameters=parameters, scale_factors=scale_factors,
            dtypes=dtypes, autoscale=False,
            automask=False, ioclass_kws={'read_bulk': read_bulk,
                                         'loc_ids_name': 'locations'})
Example #36
0
    def test_save_load_basicgrid(self):
        grid_nc.save_grid(self.testfile,
                          self.basic)

        loaded_grid = grid_nc.load_grid(self.testfile)
        assert self.basic == loaded_grid
Example #37
0
    def __init__(self, ts_path, grid_path=None):
        if grid_path is None:
            grid_path = os.path.join(ts_path, "grid.nc")

        grid = load_grid(grid_path)
        super(CCITs, self).__init__(ts_path, grid)