Example #1
0
    def test_save_basicgrid_irregular_nc(self):
        grid_nc.save_grid(self.testfilename, self.basic_irregular, global_attrs={"test": "test_attribute"})

        with Dataset(self.testfilename) as nc_data:
            nptest.assert_array_equal(self.lats, nc_data.variables["lat"][:])
            nptest.assert_array_equal(self.lons, nc_data.variables["lon"][:])
            nptest.assert_array_equal(self.subset, np.where(nc_data.variables["subset_flag"][:] == 1)[0])
            assert nc_data.test == "test_attribute"
            assert nc_data.shape == 64800
Example #2
0
    def test_save_cellgrid_nc(self):
        grid_nc.save_grid(self.testfilename, self.cellgrid, global_attrs={"test": "test_attribute"})

        with Dataset(self.testfilename) as nc_data:
            nptest.assert_array_equal(self.lats, nc_data.variables["lat"][:])
            nptest.assert_array_equal(self.lons, nc_data.variables["lon"][:])
            nptest.assert_array_equal(self.cells, nc_data.variables["cell"][:])
            nptest.assert_array_equal(self.subset, np.where(nc_data.variables["subset_flag"][:] == 1)[0])
            assert nc_data.test == "test_attribute"
            assert nc_data.gpidirect == 0x1B
Example #3
0
    def test_save_basicgrid_nc(self):
        grid_nc.save_grid(self.testfilename, self.basic, global_attrs={"test": "test_attribute"})

        with Dataset(self.testfilename) as nc_data:
            nptest.assert_array_equal(np.unique(self.lats)[::-1], nc_data.variables["lat"][:])
            nptest.assert_array_equal(np.unique(self.lons), nc_data.variables["lon"][:])

            nptest.assert_array_equal(self.subset, np.where(nc_data.variables["subset_flag"][:].flatten() == 1)[0])
            assert nc_data.test == "test_attribute"
            assert nc_data.shape[0] == 360
            assert nc_data.shape[1] == 180
Example #4
0
    def CDItoNetCDF(self,
                    region=None,
                    ip=None,
                    separatefile=True,
                    exclude=None):
        """
        Creates NetCDF that contains CDI for all timestamps.

        Parameters
        ----------
        region : str, list of str, optional
            Region(s) of of interest; must be one of the regions as set in the
            CDIPoet instance; Defaults the regions attribute value of the
            CDIPoet instance.
        ip : int, list of int, optional
            Interest period for calculating the DI; must be one of the ip
            as set in the CDIPoet instance; Defaults to the ip  attribute value
            in the CDIPoet instance.
        separatefile : bool
            If True, writes weights to separate file; If False, writes weights
            to NetCDF database file.
        exclude : string, optional
            Variable which should not be used for calculation of CDI.
        """

        if region is None:
            region = self.regions
        else:
            if isinstance(region, str):
                region = [region]

        if ip is None:
            ip = self.ip
        else:
            if isinstance(ip, int):
                ip = [ip]

        if not os.path.exists(self.cdi_path):
            os.mkdir(self.cdi_path)

        for reg in region:
            grid = grids.ShapeGrid(reg, self.spatial_resolution)
            gps = grid.get_gridpoints().index

            for ipe in ip:
                key = 'ECDI_' + str(ipe)

                print('[INFO] calc ECDI ' + reg + ' IP' + str(ipe))

                if separatefile:
                    dest_file = os.path.join(self.cdi_path,
                                             reg + '_' + key + '.nc')
                else:
                    dest_file = os.path.join(
                        self.data_path,
                        reg + '_' + str(self.spatial_resolution) + '_' +
                        self.temporal_resolution + '.nc')

                wfile = os.path.join(self.weights_path,
                                     reg + '_weights_' + str(ipe) + '.nc')

                if not os.path.isfile(dest_file):
                    grid = grids.ShapeGrid(reg, self.spatial_resolution)
                    save_grid(dest_file, grid)

                with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile:

                    if 'time' not in cdifile.dimensions.keys():
                        dt = get_dtindex(self.temporal_resolution,
                                         self.start_date)
                        cdifile.createDimension("time", None)

                        times = cdifile.createVariable('time', 'uint16',
                                                       ('time', ))

                        times.units = 'days since ' + str(self.start_date)
                        times.calendar = 'standard'
                        times[:] = date2num(dt.tolist(),
                                            units=times.units,
                                            calendar=times.calendar)

                    else:
                        times = cdifile.variables['time']

                    if key not in cdifile.variables.keys():
                        dim = ('time', 'lat', 'lon')
                        cdi = cdifile.createVariable(key,
                                                     'f8',
                                                     dim,
                                                     fill_value=-99)
                    else:
                        cdi = cdifile.variables[key]

                    for k, gp in enumerate(gps):

                        if k % 100 == 0:
                            print '.',

                        position = np.where(cdifile.variables['gpi'][:] == gp)
                        lat_pos = position[0][0]
                        lon_pos = position[1][0]

                        weights = {}

                        parnum = (len(self.sources.keys()) -
                                  len(self.staticsources))

                        if exclude is not None:
                            parnum = parnum - 1

                        dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float)

                        # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]),
                        #               dtype=np.float)
                        dat[dat == 0] = self.nan_value
                        dat = np.ma.masked_values(dat, self.nan_value)

                        # extract data from DI files and calc weights
                        i = 0

                        for param in self.sources.keys():
                            if param in self.staticsources:
                                continue
                            if param == exclude:
                                continue

                            difile = os.path.join(
                                self.di_path,
                                reg + '_' + param + '_DI_' + str(ipe) + '.nc')

                            with Dataset(difile, 'r', format='NETCDF4') as nc:
                                for var in nc.variables.keys():
                                    if param in var:
                                        for j in range(
                                                0, nc.variables[var].shape[0]):
                                            dat[i, j] = (
                                                nc.variables[var][j, lat_pos,
                                                                  lon_pos])

                            with Dataset(wfile, 'r', format='NETCDF4') as nc:
                                for var in nc.variables.keys():
                                    if param in var:
                                        weights[param] = (
                                            nc.variables[var][lat_pos,
                                                              lon_pos])
                            i += 1

                        dat = np.ma.masked_where(dat == self.nan_value, dat)
                        dat = np.nan_to_num(dat)
                        dat = np.ma.masked_where(dat == 0., dat)

                        avg = np.ma.average(dat,
                                            axis=0,
                                            weights=weights.values())

                        cdi[:, lat_pos, lon_pos] = avg

                    print 'Done!'

        print 'Done!'
Example #5
0
    def __writeWeight(self, gp, region, refparam, ip, exclude=None):
        """
        Parameters
        ----------
        exclude : string, optional
            Variable which should not be used for calculation of the weights.
        """

        refparam += '_' + str(ip)

        df = pd.DataFrame()

        for param in self.sources.keys():

            difile = os.path.join(
                self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc')

            if not os.path.exists(difile):
                continue

            with Dataset(difile, 'r', format='NETCDF4') as nc:
                if len(df.index.values) == 0:
                    time = nc.variables['time']
                    dates = num2date(time[:],
                                     units=time.units,
                                     calendar=time.calendar)
                    df = pd.DataFrame(index=pd.DatetimeIndex(dates))

                ncvar = None
                for var in nc.variables.keys():
                    if param in var:
                        ncvar = var
                        continue

                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = position[0][0]
                lon_pos = position[1][0]

                df[ncvar] = np.NAN
                for i in range(0, nc.variables[ncvar].shape[0] - 1):
                    df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos]

                    if 'scaling_factor' in nc.variables[ncvar].ncattrs():
                        vvar = nc.variables[ncvar]
                        if vvar.getncattr('scaling_factor') < 0:
                            df[ncvar] = (
                                df[ncvar] *
                                float(vvar.getncattr('scaling_factor')))
                        else:
                            df[ncvar] = (
                                df[ncvar] /
                                float(vvar.getncattr('scaling_factor')))

        weights = cdi.calc_weights(df,
                                   refparam,
                                   lags=self.lags,
                                   exclude=exclude)

        dest_file = os.path.join(self.weights_path,
                                 region + '_weights_' + str(ip) + '.nc')

        if not os.path.isfile(dest_file):
            grid = grids.ShapeGrid(region, self.spatial_resolution)
            save_grid(dest_file, grid)

        with Dataset(dest_file, 'r+', format='NETCDF4') as nc:
            dim = ('lat', 'lon')

            position = np.where(nc.variables['gpi'][:] == gp)
            lat_pos = position[0][0]
            lon_pos = position[1][0]

            keys = []
            if exclude is not None:
                for par in df.keys():
                    if exclude in par:
                        continue
                    keys.append(par)
            else:
                keys = df.keys()

            for i, dataset in enumerate(keys):

                if dataset not in nc.variables.keys():
                    var = nc.createVariable(dataset,
                                            'd',
                                            dim,
                                            fill_value=self.nan_value)
                else:
                    var = nc.variables[dataset]

                var[lat_pos, lon_pos] = weights[i]
Example #6
0
    def __writeDI(self,
                  region,
                  src,
                  gridpoints,
                  grid,
                  ip,
                  suffix='',
                  scaled=True,
                  modf_all=True,
                  start=None):

        if start is not None:
            dt = get_dtindex('dekad', start)
        else:
            dt = get_dtindex('dekad', self.start_date)

        dest_file = os.path.join(
            self.di_path, region + '_' + src + '_DI' + '_' + str(ip) + '.nc')

        if not os.path.isfile(dest_file):
            save_grid(dest_file, grid)

        for i, gp in enumerate(gridpoints):

            if i % 100 == 0:
                print '.',

            ts = self.read_timeseries(src, gp, region)
            if start is not None:
                sel = (ts.index >= start)
                ts = ts[sel]

            inverse = False
            if src == 'MODIS_LST':
                inverse = True

            ts_di = cdi.calc_DI(ts.copy(),
                                inverse, [ip],
                                scale_zero=False,
                                scaled=scaled,
                                modf_all=modf_all)

            with Dataset(dest_file, 'r+', format='NETCDF4') as nc:
                if 'time' not in nc.dimensions.keys():
                    nc.createDimension("time", None)

                    times = nc.createVariable('time', 'uint16', ('time', ))

                    times.units = 'days since ' + str(self.start_date)
                    times.calendar = 'standard'
                    times[:] = date2num(dt.tolist(),
                                        units=times.units,
                                        calendar=times.calendar)

                else:
                    times = nc.variables['time']

                dim = ('time', 'lat', 'lon')

                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = position[0][0]
                lon_pos = position[1][0]

                # extend times variable in NetCDF
                tsdates = date2num(ts_di.index.tolist(),
                                   units=times.units,
                                   calendar=times.calendar).astype(int)
                begin = np.where(times == tsdates[0])[0][0]
                times[begin:] = tsdates

                for dataset in ts_di.keys():

                    if dataset not in nc.variables.keys():
                        var = nc.createVariable(dataset,
                                                ts_di[dataset].dtype.char,
                                                dim,
                                                fill_value=self.nan_value)
                    else:
                        var = nc.variables[dataset]

                    var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
Example #7
0
    def test_save_load_cellgrid(self):
        grid_nc.save_grid(self.testfilename, self.cellgrid)

        loaded_grid = grid_nc.load_grid(self.testfilename)
        assert self.cellgrid == loaded_grid
Example #8
0
    def test_save_load_basicgrid_irregular(self):
        grid_nc.save_grid(self.testfilename, self.basic_irregular)

        loaded_grid = grid_nc.load_grid(self.testfilename)
        assert self.basic_irregular == loaded_grid
Example #9
0
    def test_save_load_basicgrid(self):
        grid_nc.save_grid(self.testfilename, self.basic)

        loaded_grid = grid_nc.load_grid(self.testfilename)
        assert self.basic == loaded_grid
Example #10
0
    def CDItoNetCDF(self, region=None, ip=None, separatefile=True,
                    exclude=None):
        """
        Creates NetCDF that contains CDI for all timestamps.

        Parameters
        ----------
        region : str, list of str, optional
            Region(s) of of interest; must be one of the regions as set in the
            CDIPoet instance; Defaults the regions attribute value of the
            CDIPoet instance.
        ip : int, list of int, optional
            Interest period for calculating the DI; must be one of the ip
            as set in the CDIPoet instance; Defaults to the ip  attribute value
            in the CDIPoet instance.
        separatefile : bool
            If True, writes weights to separate file; If False, writes weights
            to NetCDF database file.
        exclude : string, optional
            Variable which should not be used for calculation of CDI.
        """

        if region is None:
            region = self.regions
        else:
            if isinstance(region, str):
                region = [region]

        if ip is None:
            ip = self.ip
        else:
            if isinstance(ip, int):
                ip = [ip]

        if not os.path.exists(self.cdi_path):
            os.mkdir(self.cdi_path)

        for reg in region:
            grid = grids.ShapeGrid(reg, self.spatial_resolution)
            gps = grid.get_gridpoints().index

            for ipe in ip:
                key = 'ECDI_' + str(ipe)

                print ('[INFO] calc ECDI ' + reg + ' IP' + str(ipe))

                if separatefile:
                    dest_file = os.path.join(self.cdi_path,
                                             reg + '_' + key + '.nc')
                else:
                    dest_file = os.path.join(self.data_path, reg + '_' +
                                             str(self.spatial_resolution) +
                                             '_' + self.temporal_resolution +
                                             '.nc')

                wfile = os.path.join(self.weights_path, reg + '_weights_'
                                     + str(ipe) + '.nc')

                if not os.path.isfile(dest_file):
                    grid = grids.ShapeGrid(reg, self.spatial_resolution)
                    save_grid(dest_file, grid)

                with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile:

                    if 'time' not in cdifile.dimensions.keys():
                        dt = get_dtindex(self.temporal_resolution,
                                         self.start_date)
                        cdifile.createDimension("time", None)

                        times = cdifile.createVariable('time', 'uint16',
                                                       ('time',))

                        times.units = 'days since ' + str(self.start_date)
                        times.calendar = 'standard'
                        times[:] = date2num(dt.tolist(), units=times.units,
                                            calendar=times.calendar)

                    else:
                        times = cdifile.variables['time']

                    if key not in cdifile.variables.keys():
                        dim = ('time', 'lat', 'lon')
                        cdi = cdifile.createVariable(key, 'f8',
                                                     dim, fill_value=-99)
                    else:
                        cdi = cdifile.variables[key]

                    for k, gp in enumerate(gps):

                        if k % 100 == 0:
                            print '.',

                        position = np.where(cdifile.variables['gpi'][:] == gp)
                        lat_pos = position[0][0]
                        lon_pos = position[1][0]

                        weights = {}

                        parnum = (len(self.sources.keys()) -
                                  len(self.staticsources))

                        if exclude is not None:
                            parnum = parnum - 1

                        dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float)

                        # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]),
                        #               dtype=np.float)
                        dat[dat == 0] = self.nan_value
                        dat = np.ma.masked_values(dat, self.nan_value)

                        # extract data from DI files and calc weights
                        i = 0

                        for param in self.sources.keys():
                            if param in self.staticsources:
                                continue
                            if param == exclude:
                                continue

                            difile = os.path.join(self.di_path,
                                                  reg + '_' + param
                                                  + '_DI_' + str(ipe) + '.nc')

                            with Dataset(difile, 'r', format='NETCDF4') as nc:
                                for var in nc.variables.keys():
                                    if param in var:
                                        for j in range(0,
                                                       nc.variables[var].shape[0]):
                                            dat[i, j] = (nc.variables[var]
                                                         [j, lat_pos, lon_pos])

                            with Dataset(wfile, 'r', format='NETCDF4') as nc:
                                for var in nc.variables.keys():
                                    if param in var:
                                        weights[param] = (nc.variables[var]
                                                          [lat_pos, lon_pos])
                            i += 1

                        dat = np.ma.masked_where(dat == self.nan_value, dat)
                        dat = np.nan_to_num(dat)
                        dat = np.ma.masked_where(dat == 0., dat)

                        avg = np.ma.average(dat, axis=0,
                                            weights=weights.values())

                        cdi[:, lat_pos, lon_pos] = avg

                    print 'Done!'

        print 'Done!'
Example #11
0
    def __writeWeight(self, gp, region, refparam, ip, exclude=None):
        """
        Parameters
        ----------
        exclude : string, optional
            Variable which should not be used for calculation of the weights.
        """

        refparam += '_' + str(ip)

        df = pd.DataFrame()

        for param in self.sources.keys():

            difile = os.path.join(self.di_path,
                                  region + '_' + param + '_DI_' + str(ip) +
                                  '.nc')

            if not os.path.exists(difile):
                continue

            with Dataset(difile, 'r', format='NETCDF4') as nc:
                if len(df.index.values) == 0:
                    time = nc.variables['time']
                    dates = num2date(time[:], units=time.units,
                                     calendar=time.calendar)
                    df = pd.DataFrame(index=pd.DatetimeIndex(dates))

                ncvar = None
                for var in nc.variables.keys():
                    if param in var:
                        ncvar = var
                        continue

                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = position[0][0]
                lon_pos = position[1][0]

                df[ncvar] = np.NAN
                for i in range(0, nc.variables[ncvar].shape[0] - 1):
                    df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos]

                    if 'scaling_factor' in nc.variables[ncvar].ncattrs():
                        vvar = nc.variables[ncvar]
                        if vvar.getncattr('scaling_factor') < 0:
                            df[ncvar] = (df[ncvar] *
                                         float(vvar.getncattr('scaling_factor')))
                        else:
                            df[ncvar] = (df[ncvar] /
                                         float(vvar.getncattr('scaling_factor')))

        weights = cdi.calc_weights(df, refparam, lags=self.lags,
                                   exclude=exclude)

        dest_file = os.path.join(self.weights_path,
                                 region + '_weights_' + str(ip) + '.nc')

        if not os.path.isfile(dest_file):
            grid = grids.ShapeGrid(region, self.spatial_resolution)
            save_grid(dest_file, grid)

        with Dataset(dest_file, 'r+', format='NETCDF4') as nc:
            dim = ('lat', 'lon')

            position = np.where(nc.variables['gpi'][:] == gp)
            lat_pos = position[0][0]
            lon_pos = position[1][0]

            keys = []
            if exclude is not None:
                for par in df.keys():
                    if exclude in par:
                        continue
                    keys.append(par)
            else:
                keys = df.keys()

            for i, dataset in enumerate(keys):

                if dataset not in nc.variables.keys():
                    var = nc.createVariable(dataset, 'd', dim,
                                            fill_value=self.nan_value)
                else:
                    var = nc.variables[dataset]

                var[lat_pos, lon_pos] = weights[i]
Example #12
0
    def __writeDI(self, region, src, gridpoints, grid, ip, suffix='',
                  scaled=True, modf_all=True, start=None):

        if start is not None:
            dt = get_dtindex('dekad', start)
        else:
            dt = get_dtindex('dekad', self.start_date)

        dest_file = os.path.join(self.di_path, region + '_' + src + '_DI'
                                 + '_' + str(ip) + '.nc')

        if not os.path.isfile(dest_file):
            save_grid(dest_file, grid)

        for i, gp in enumerate(gridpoints):

            if i % 100 == 0:
                print '.',

            ts = self.read_timeseries(src, gp, region)
            if start is not None:
                sel = (ts.index >= start)
                ts = ts[sel]

            inverse = False
            if src == 'MODIS_LST':
                inverse = True

            ts_di = cdi.calc_DI(ts.copy(), inverse, [ip], scale_zero=False,
                                scaled=scaled, modf_all=modf_all)

            with Dataset(dest_file, 'r+', format='NETCDF4') as nc:
                if 'time' not in nc.dimensions.keys():
                    nc.createDimension("time", None)

                    times = nc.createVariable('time', 'uint16', ('time',))

                    times.units = 'days since ' + str(self.start_date)
                    times.calendar = 'standard'
                    times[:] = date2num(dt.tolist(), units=times.units,
                                        calendar=times.calendar)

                else:
                    times = nc.variables['time']

                dim = ('time', 'lat', 'lon')

                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = position[0][0]
                lon_pos = position[1][0]

                # extend times variable in NetCDF
                tsdates = date2num(ts_di.index.tolist(), units=times.units,
                                   calendar=times.calendar).astype(int)
                begin = np.where(times == tsdates[0])[0][0]
                times[begin:] = tsdates

                for dataset in ts_di.keys():

                    if dataset not in nc.variables.keys():
                        var = nc.createVariable(dataset,
                                                ts_di[dataset].dtype.char,
                                                dim, fill_value=self.nan_value)
                    else:
                        var = nc.variables[dataset]

                    var[begin:, lat_pos, lon_pos] = ts_di[dataset].values
Example #13
0
def save_image(image, timestamp, region, metadata, dest_file, start_date,
               sp_res, nan_value=-99, shapefile=None, temp_res='dekad',
               compression=False):
    """Saves numpy.ndarray images as multidimensional netCDF4 file.

    Creates a datetimeindex over the whole period defined in the settings file

    Parameters
    ----------
    image : dict of numpy.ndarrays
        Input image.
    timestamp : datetime.datetime
        Timestamp of image.
    region : str, optional
        Identifier of the region in the shapefile. If the default shapefile is
        used, this would be the FIPS country code.
    metadata : dict
        NetCDF metadata from source file.
    dest_file : str
        Path to the output file.
    start_date : datetime.datetime
        First date of available data.
    sp_res :  int or float
        Spatial resolution of the grid.
    nan_value : int, optional
        Not a number value for dataset, defaults to -99.
    shapefile : str, optional
        Path to shape file, uses "world country admin boundary shapefile" by
        default.
    temp_res : string or int, optional
        Temporal resolution of the output NetCDF4 file, defaults to dekad.
    compression : bool, optional
        If True, ncfile compression is active.
    """

    if region == 'global':
        grid = grids.RegularGrid(sp_res)
    else:
        grid = grids.ShapeGrid(region, sp_res, shapefile)

    dest_file = dest_file

    if not os.path.isfile(dest_file):
        save_grid(dest_file, grid)

    dt = get_dtindex(temp_res, start_date)

    with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile:

        if 'time' not in ncfile.dimensions.keys():
            ncfile.createDimension("time", None)

            if compression:
                times = ncfile.createVariable('time', 'uint16', ('time',),
                                              zlib=True, complevel=4)

            else:
                times = ncfile.createVariable('time', 'uint16', ('time',))

            times.units = 'days since ' + str(start_date)
            times.calendar = 'standard'
            times[:] = date2num(dt.tolist(), units=times.units,
                                calendar=times.calendar)

        else:
            times = ncfile.variables['time']

        dim = ('time', 'lat', 'lon')

        numdate = date2num(timestamp, units=times.units,
                           calendar=times.calendar)

        for key in image.keys():

            if key not in ncfile.variables.keys():

                if compression:
                    var = ncfile.createVariable(key, image[key].dtype.char,
                                                dim, zlib=True, complevel=4,
                                                fill_value=nan_value)
                else:
                    var = ncfile.createVariable(key, image[key].dtype.char,
                                                dim, fill_value=nan_value)
            else:
                var = ncfile.variables[key]

            if numdate in times[:]:
                var_index = np.where(times[:] == numdate)[0][0]
            else:
                times[times[:].size] = numdate
                var_index = times[:].size - 1

            var[var_index] = image[key]

            if metadata is not None:
                for item in metadata[key]:
                    if item in var.ncattrs():
                        continue
                    else:
                        var.setncattr(str(item), metadata[key][item])