Esempio n. 1
0
    def WeightsToNetCDF(self, refparam, region=None, ip=None, exclude=None):
        """
        Parameters
        ----------
        exclude : string, optional
            Variable which should not be used for calculation of the weights.
        """

        if region is None:
            region = self.regions
        else:
            if isinstance(region, str):
                region = [region]

        if ip is None:
            ip = self.ip
        else:
            if isinstance(ip, int):
                ip = [ip]

        if not os.path.exists(self.weights_path):
            os.mkdir(self.weights_path)

        for reg in region:

            grid = grids.ShapeGrid(reg, self.spatial_resolution)
            gps = grid.get_gridpoints().index

            for ipe in ip:
                print '[INFO] calc weights ' + reg + ' IP' + str(ipe),
                for i, gp in enumerate(gps):
                    if i % 100 == 0:
                        print '.',
                    self.__writeWeight(gp, reg, refparam, ipe, exclude)
                print ' done!'
Esempio n. 2
0
    def DItoNetCDF(self, region=None, source=None, ip=None):
        """
        Calculates the Drought Index (DI) for a given source over one or more
        regions and stores them as NetCDF files.

        Parameters
        ----------
        region : str, list of str, optional
            Region(s) of of interest; must be one of the regions as set in the
            CDIPoet instance; Defaults the regions attribute value of the
            CDIPoet instance.
        source : str, list of str, optional
            Source parameter(s) for which to calculate the DI; must be one of
            the sources set in the CDIPoet instance; Defaults to all
            sources set in the CDIPoet instance.
        ip : int, list of int, optional
            Interest period for calculating the DI; must be one of the ip
            as set in the CDIPoet instance; Defaults to the ip  attribute value
            in the CDIPoet instance.
        """

        if region is None:
            region = self.regions
        else:
            if isinstance(region, str):
                region = [region]

        if source is None:
            source = self.sources.keys()
        else:
            if isinstance(source, str):
                source = [source]

        if ip is None:
            ip = self.ip
        else:
            if isinstance(ip, int):
                ip = [ip]

        if not os.path.exists(self.di_path):
            os.mkdir(self.di_path)

        for reg in region:

            grid = grids.ShapeGrid(reg, self.spatial_resolution)
            gps = grid.get_gridpoints().index

            for ipe in ip:
                for src in source:
                    if src in self.staticsources:
                        continue
                    if reg not in self.sources[src].valid_regions:
                        continue
                    print('[INFO] calc DI ' + reg + ' IP' + str(ipe) + ' ' +
                          src),
                    self.__writeDI(reg, src, gps, grid, ipe)
                    print ' done!'
    def setUp(self):
        self.sp_res = 60
        self.region = 'UG'
        self.timestamp = datetime.today()
        self.start_date = datetime.today()
        self.temp_res = 'day'
        self.fill_value = -99
        self.variable = 'data'

        # create image
        self.shape = (3, 6)
        self.mask = np.array([[1, 0, 1, 0, 1, 0], [0, 1, 0, 1, 0, 1],
                              [1, 0, 1, 0, 1, 0]])

        self.image = {}
        self.data = np.ma.array(np.ones(self.shape),
                                mask=self.mask,
                                fill_value=self.fill_value)
        self.data.data[np.where(self.mask == 1)] = self.fill_value

        self.image['data'] = self.data
        self.image['data2'] = self.data * 2

        # create metadata
        self.metadata = {
            'data': {
                'Attribute1': 'Value1'
            },
            'data2': {
                'Attribut2': 'Value2'
            },
            'data3': {
                'Attribut3': 'Value3'
            }
        }

        if not os.path.exists(os.path.join(curpath(), 'data')):
            os.mkdir(os.path.join(curpath(), 'data'))

        self.grid = gr.ShapeGrid(self.region, self.sp_res)
        self.globalgrid = gr.RegularGrid(sp_res=self.sp_res)

        # Build NetCDF testfile
        self.ncfile = os.path.join(curpath(), 'data', 'test_nc.nc')
        if os.path.exists(self.ncfile):
            os.remove(self.ncfile)

        save_image(self.image,
                   self.timestamp,
                   'global',
                   self.metadata,
                   self.ncfile,
                   self.start_date,
                   self.sp_res,
                   temp_res=self.temp_res)

        # Build HDF5 testfile
        self.h5file = os.path.join(curpath(), 'data', 'tests_hdf5.h5')
        if os.path.exists(self.h5file):
            os.remove(self.h5file)

        with h5py.File(self.h5file, 'w') as hdf5_file:

            group = hdf5_file.create_group('group')
            for dataset_name in self.image.keys():
                attributes = self.metadata[dataset_name]
                write_data = self.image[dataset_name]
                dataset = group.create_dataset(dataset_name, write_data.shape,
                                               write_data.dtype, write_data)
                for key in attributes:
                    dataset.attrs[key] = attributes[key]

        # Build png Testfile
        self.pngfile = os.path.join(curpath(), 'data', 'test_png.png')
        if os.path.exists(self.pngfile):
            os.remove(self.pngfile)

        n = 60
        pngimg = np.kron(np.copy(self.data), np.ones((n, n)))
        pngimg[pngimg == self.fill_value] = np.NAN
        plt.imsave(self.pngfile, pngimg)
Esempio n. 4
0
    def CDItoNetCDF(self,
                    region=None,
                    ip=None,
                    separatefile=True,
                    exclude=None):
        """
        Creates NetCDF that contains CDI for all timestamps.

        Parameters
        ----------
        region : str, list of str, optional
            Region(s) of of interest; must be one of the regions as set in the
            CDIPoet instance; Defaults the regions attribute value of the
            CDIPoet instance.
        ip : int, list of int, optional
            Interest period for calculating the DI; must be one of the ip
            as set in the CDIPoet instance; Defaults to the ip  attribute value
            in the CDIPoet instance.
        separatefile : bool
            If True, writes weights to separate file; If False, writes weights
            to NetCDF database file.
        exclude : string, optional
            Variable which should not be used for calculation of CDI.
        """

        if region is None:
            region = self.regions
        else:
            if isinstance(region, str):
                region = [region]

        if ip is None:
            ip = self.ip
        else:
            if isinstance(ip, int):
                ip = [ip]

        if not os.path.exists(self.cdi_path):
            os.mkdir(self.cdi_path)

        for reg in region:
            grid = grids.ShapeGrid(reg, self.spatial_resolution)
            gps = grid.get_gridpoints().index

            for ipe in ip:
                key = 'ECDI_' + str(ipe)

                print('[INFO] calc ECDI ' + reg + ' IP' + str(ipe))

                if separatefile:
                    dest_file = os.path.join(self.cdi_path,
                                             reg + '_' + key + '.nc')
                else:
                    dest_file = os.path.join(
                        self.data_path,
                        reg + '_' + str(self.spatial_resolution) + '_' +
                        self.temporal_resolution + '.nc')

                wfile = os.path.join(self.weights_path,
                                     reg + '_weights_' + str(ipe) + '.nc')

                if not os.path.isfile(dest_file):
                    grid = grids.ShapeGrid(reg, self.spatial_resolution)
                    save_grid(dest_file, grid)

                with Dataset(dest_file, 'r+', format='NETCDF4') as cdifile:

                    if 'time' not in cdifile.dimensions.keys():
                        dt = get_dtindex(self.temporal_resolution,
                                         self.start_date)
                        cdifile.createDimension("time", None)

                        times = cdifile.createVariable('time', 'uint16',
                                                       ('time', ))

                        times.units = 'days since ' + str(self.start_date)
                        times.calendar = 'standard'
                        times[:] = date2num(dt.tolist(),
                                            units=times.units,
                                            calendar=times.calendar)

                    else:
                        times = cdifile.variables['time']

                    if key not in cdifile.variables.keys():
                        dim = ('time', 'lat', 'lon')
                        cdi = cdifile.createVariable(key,
                                                     'f8',
                                                     dim,
                                                     fill_value=-99)
                    else:
                        cdi = cdifile.variables[key]

                    for k, gp in enumerate(gps):

                        if k % 100 == 0:
                            print '.',

                        position = np.where(cdifile.variables['gpi'][:] == gp)
                        lat_pos = position[0][0]
                        lon_pos = position[1][0]

                        weights = {}

                        parnum = (len(self.sources.keys()) -
                                  len(self.staticsources))

                        if exclude is not None:
                            parnum = parnum - 1

                        dat = np.zeros((parnum, cdi.shape[0]), dtype=np.float)

                        # dat = np.zeros((len(self.sources.keys()), cdi.shape[0]),
                        #               dtype=np.float)
                        dat[dat == 0] = self.nan_value
                        dat = np.ma.masked_values(dat, self.nan_value)

                        # extract data from DI files and calc weights
                        i = 0

                        for param in self.sources.keys():
                            if param in self.staticsources:
                                continue
                            if param == exclude:
                                continue

                            difile = os.path.join(
                                self.di_path,
                                reg + '_' + param + '_DI_' + str(ipe) + '.nc')

                            with Dataset(difile, 'r', format='NETCDF4') as nc:
                                for var in nc.variables.keys():
                                    if param in var:
                                        for j in range(
                                                0, nc.variables[var].shape[0]):
                                            dat[i, j] = (
                                                nc.variables[var][j, lat_pos,
                                                                  lon_pos])

                            with Dataset(wfile, 'r', format='NETCDF4') as nc:
                                for var in nc.variables.keys():
                                    if param in var:
                                        weights[param] = (
                                            nc.variables[var][lat_pos,
                                                              lon_pos])
                            i += 1

                        dat = np.ma.masked_where(dat == self.nan_value, dat)
                        dat = np.nan_to_num(dat)
                        dat = np.ma.masked_where(dat == 0., dat)

                        avg = np.ma.average(dat,
                                            axis=0,
                                            weights=weights.values())

                        cdi[:, lat_pos, lon_pos] = avg

                    print 'Done!'

        print 'Done!'
Esempio n. 5
0
    def __writeWeight(self, gp, region, refparam, ip, exclude=None):
        """
        Parameters
        ----------
        exclude : string, optional
            Variable which should not be used for calculation of the weights.
        """

        refparam += '_' + str(ip)

        df = pd.DataFrame()

        for param in self.sources.keys():

            difile = os.path.join(
                self.di_path, region + '_' + param + '_DI_' + str(ip) + '.nc')

            if not os.path.exists(difile):
                continue

            with Dataset(difile, 'r', format='NETCDF4') as nc:
                if len(df.index.values) == 0:
                    time = nc.variables['time']
                    dates = num2date(time[:],
                                     units=time.units,
                                     calendar=time.calendar)
                    df = pd.DataFrame(index=pd.DatetimeIndex(dates))

                ncvar = None
                for var in nc.variables.keys():
                    if param in var:
                        ncvar = var
                        continue

                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = position[0][0]
                lon_pos = position[1][0]

                df[ncvar] = np.NAN
                for i in range(0, nc.variables[ncvar].shape[0] - 1):
                    df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos]

                    if 'scaling_factor' in nc.variables[ncvar].ncattrs():
                        vvar = nc.variables[ncvar]
                        if vvar.getncattr('scaling_factor') < 0:
                            df[ncvar] = (
                                df[ncvar] *
                                float(vvar.getncattr('scaling_factor')))
                        else:
                            df[ncvar] = (
                                df[ncvar] /
                                float(vvar.getncattr('scaling_factor')))

        weights = cdi.calc_weights(df,
                                   refparam,
                                   lags=self.lags,
                                   exclude=exclude)

        dest_file = os.path.join(self.weights_path,
                                 region + '_weights_' + str(ip) + '.nc')

        if not os.path.isfile(dest_file):
            grid = grids.ShapeGrid(region, self.spatial_resolution)
            save_grid(dest_file, grid)

        with Dataset(dest_file, 'r+', format='NETCDF4') as nc:
            dim = ('lat', 'lon')

            position = np.where(nc.variables['gpi'][:] == gp)
            lat_pos = position[0][0]
            lon_pos = position[1][0]

            keys = []
            if exclude is not None:
                for par in df.keys():
                    if exclude in par:
                        continue
                    keys.append(par)
            else:
                keys = df.keys()

            for i, dataset in enumerate(keys):

                if dataset not in nc.variables.keys():
                    var = nc.createVariable(dataset,
                                            'd',
                                            dim,
                                            fill_value=self.nan_value)
                else:
                    var = nc.variables[dataset]

                var[lat_pos, lon_pos] = weights[i]
Esempio n. 6
0
    def _resample_spatial(self,
                          region,
                          begin,
                          end,
                          delete_rawdata,
                          shapefile=None):
        """Helper method that calls spatial resampling routines.

        Parameters:
        region : str
            FIPS country code (https://en.wikipedia.org/wiki/FIPS_country_code)
        begin : datetime
            Start date of resampling
        end : datetime
            End date of resampling
        delete_rawdata : bool
            True if original downloaded files should be deleted after
            resampling
        """

        dest_file = self._get_tmp_filepath('spatial', region)

        dirList = os.listdir(self.rawdata_path)
        dirList.sort()

        if region == 'global':
            grid = gr.RegularGrid(sp_res=self.dest_sp_res)
        else:
            grid = gr.ShapeGrid(region, self.dest_sp_res, shapefile)

        for item in dirList:

            src_file = os.path.join(self.rawdata_path, item)

            fdate = get_file_date(item, self.filedate)

            if begin is not None:
                if fdate < begin:
                    continue

            if end is not None:
                if fdate > end:
                    continue

            if check_compressed(src_file):
                dirname = os.path.splitext(item)[0]
                dirpath = os.path.join(self.rawdata_path, dirname)
                unpack(src_file)
                src_file = select_file(os.listdir(dirpath))
                src_file = os.path.join(dirpath, src_file)

            if begin is not None:
                if fdate < begin:
                    if check_compressed(item):
                        shutil.rmtree(
                            os.path.join(self.rawdata_path,
                                         os.path.splitext(item)[0]))
                    continue
            if end is not None:
                if fdate > end:
                    if check_compressed(item):
                        shutil.rmtree(
                            os.path.join(self.rawdata_path,
                                         os.path.splitext(item)[0]))
                    continue

            print '.',

            try:
                image, _, _, _, timestamp, metadata = \
                    resample_to_shape(src_file, region, self.dest_sp_res, grid,
                                      self.name, self.nan_value,
                                      self.dest_nan_value, self.variables,
                                      shapefile)
            except ValueError:
                print "[INFO] no data available for that region."
                return "[INFO] no data available for that region."

            if timestamp is None:
                timestamp = get_file_date(item, self.filedate)

            if self.temp_res == self.dest_temp_res:
                filename = (region + '_' + str(self.dest_sp_res) + '_' +
                            str(self.dest_temp_res) + '.nc')
                dfile = os.path.join(self.data_path, filename)
                nc.save_image(image, timestamp, region, metadata, dfile,
                              self.dest_start_date, self.dest_sp_res,
                              self.dest_nan_value, shapefile,
                              self.dest_temp_res)
            else:
                nc.write_tmp_file(image, timestamp, region, metadata,
                                  dest_file, self.dest_start_date,
                                  self.dest_sp_res, self.dest_nan_value,
                                  shapefile)

            # deletes unpacked files if existing
            if check_compressed(item):
                shutil.rmtree(
                    os.path.join(self.rawdata_path,
                                 os.path.splitext(item)[0]))

        print ''
Esempio n. 7
0
def save_image(image, timestamp, region, metadata, dest_file, start_date,
               sp_res, nan_value=-99, shapefile=None, temp_res='dekad',
               compression=False):
    """Saves numpy.ndarray images as multidimensional netCDF4 file.

    Creates a datetimeindex over the whole period defined in the settings file

    Parameters
    ----------
    image : dict of numpy.ndarrays
        Input image.
    timestamp : datetime.datetime
        Timestamp of image.
    region : str, optional
        Identifier of the region in the shapefile. If the default shapefile is
        used, this would be the FIPS country code.
    metadata : dict
        NetCDF metadata from source file.
    dest_file : str
        Path to the output file.
    start_date : datetime.datetime
        First date of available data.
    sp_res :  int or float
        Spatial resolution of the grid.
    nan_value : int, optional
        Not a number value for dataset, defaults to -99.
    shapefile : str, optional
        Path to shape file, uses "world country admin boundary shapefile" by
        default.
    temp_res : string or int, optional
        Temporal resolution of the output NetCDF4 file, defaults to dekad.
    compression : bool, optional
        If True, ncfile compression is active.
    """

    if region == 'global':
        grid = grids.RegularGrid(sp_res)
    else:
        grid = grids.ShapeGrid(region, sp_res, shapefile)

    dest_file = dest_file

    if not os.path.isfile(dest_file):
        save_grid(dest_file, grid)

    dt = get_dtindex(temp_res, start_date)

    with Dataset(dest_file, 'r+', format='NETCDF4') as ncfile:

        if 'time' not in ncfile.dimensions.keys():
            ncfile.createDimension("time", None)

            if compression:
                times = ncfile.createVariable('time', 'uint16', ('time',),
                                              zlib=True, complevel=4)

            else:
                times = ncfile.createVariable('time', 'uint16', ('time',))

            times.units = 'days since ' + str(start_date)
            times.calendar = 'standard'
            times[:] = date2num(dt.tolist(), units=times.units,
                                calendar=times.calendar)

        else:
            times = ncfile.variables['time']

        dim = ('time', 'lat', 'lon')

        numdate = date2num(timestamp, units=times.units,
                           calendar=times.calendar)

        for key in image.keys():

            if key not in ncfile.variables.keys():

                if compression:
                    var = ncfile.createVariable(key, image[key].dtype.char,
                                                dim, zlib=True, complevel=4,
                                                fill_value=nan_value)
                else:
                    var = ncfile.createVariable(key, image[key].dtype.char,
                                                dim, fill_value=nan_value)
            else:
                var = ncfile.variables[key]

            if numdate in times[:]:
                var_index = np.where(times[:] == numdate)[0][0]
            else:
                times[times[:].size] = numdate
                var_index = times[:].size - 1

            var[var_index] = image[key]

            if metadata is not None:
                for item in metadata[key]:
                    if item in var.ncattrs():
                        continue
                    else:
                        var.setncattr(str(item), metadata[key][item])