Example #1
0
    def average_timeseries(self, source, region, variable=None):
        """
        Calculates mean of all time series in a region.

        Parameters
        ----------
        source : str
            Data source from which time series should be read.
        region : str, optional
            Region of interest, set to first defined region if None.
        variable : str, optional
            Variable to display, set to first variable of source if None.

        Returns
        -------
        ts : pd.DataFrame
            Timeseries for the selected data.
        """

        grid = ShapeGrid(region, self.spatial_resolution, self.shapefile)
        points = grid.get_gridpoints()

        df = pd.DataFrame()

        locations = []

        if region in self.regions:
            region = region
            locations = list(points.index)
        else:
            for i, sr in enumerate(self.sub_regions):
                if region in sr:
                    idx = i
                    break
            region = self.regions[idx]
            lat = points['lat'].tolist()
            lon = points['lon'].tolist()
            for i in range(0, points.shape[0]):
                locations.append((lon[i], lat[i]))

        if len(points) < 1:
            return 'ERROR: No points available in the selected region.'

        df, gpis = self.bulkread_timeseries(source,
                                            locations,
                                            region,
                                            variable=variable)

        df_mean = pd.DataFrame()

        for var in df.keys():
            df_mean[var + '_mean'] = df[var].mean(axis=1)

        return df_mean, len(gpis)
Example #2
0
    def average_timeseries(self, source, region, variable=None):
        """
        Calculates mean of all time series in a region.

        Parameters
        ----------
        source : str
            Data source from which time series should be read.
        region : str, optional
            Region of interest, set to first defined region if None.
        variable : str, optional
            Variable to display, set to first variable of source if None.

        Returns
        -------
        ts : pd.DataFrame
            Timeseries for the selected data.
        """

        grid = ShapeGrid(region, self.spatial_resolution, self.shapefile)
        points = grid.get_gridpoints()

        df = pd.DataFrame()

        locations = []

        if region in self.regions:
            region = region
            locations = list(points.index)
        else:
            for i, sr in enumerate(self.sub_regions):
                if region in sr:
                    idx = i
                    break
            region = self.regions[idx]
            lat = points['lat'].tolist()
            lon = points['lon'].tolist()
            for i in range(0, points.shape[0]):
                locations.append((lon[i], lat[i]))

        if len(points) < 1:
            return 'ERROR: No points available in the selected region.'

        df, gpis = self.bulkread_timeseries(source, locations, region,
                                            variable=variable)

        df_mean = pd.DataFrame()

        for var in df.keys():
            df_mean[var + '_mean'] = df[var].mean(axis=1)

        return df_mean, len(gpis)
Example #3
0
    def test_ShapeGrid(self):
        # general case
        cgrid = ShapeGrid(self.region, sp_res=self.sp_res)
        bbox = (cgrid.arrlat.min(), cgrid.arrlat.max(),
                cgrid.arrlon.min(), cgrid.arrlon.max())

        assert cgrid.get_grid_points()[0].size == 290
        assert bbox == (46.625, 48.875, 9.875, 16.875)
        assert cgrid.get_gridpoints().shape == (158, 2)

        # test special case NZ
        cgrid = ShapeGrid('NZ', sp_res=self.sp_res1)
        bbox = (cgrid.arrlat[0], cgrid.arrlat[-1],
                cgrid.arrlon[0], cgrid.arrlon[-1])

        assert cgrid.shape == (34, 24)
        assert cgrid.get_gridpoints().shape == (116, 2)
        assert bbox == (-46.75, -35.25, 167.37, -176.5)
Example #4
0
    def get_gridpoints(self):
        """Returns gridpoints from NetCDF file.

        Parameters
        ----------
        region : str
            Identifier of the region in the NetCDF file.

        Returns
        -------
        gridpoints : dict of pandas.DataFrame
            Dict containing Dataframes with gridpoint index as index,
            longitutes and latitudes as columns for each region.
        """

        gridpoints = {}

        if self.regions == ['global']:
            filename = (self.regions[0] + '_' + str(self.spatial_resolution) +
                        '_' + str(self.temporal_resolution) + '.nc')
            ncfile = os.path.join(self.data_path, filename)

            with Dataset(ncfile, 'r+', format='NETCDF4') as nc:
                gpis = nc.variables['gpi'][:]
                lons = nc.variables['lon'][:]
                lats = nc.variables['lat'][:]
                gpis = gpis.flatten()
                lons, lats = np.meshgrid(lons, lats)
                lons = lons.flatten()
                lats = lats.flatten()

            points = pd.DataFrame(index=gpis)
            points['lon'] = lons
            points['lat'] = lats
            gridpoints['global'] = points
        else:
            for region in self.regions:
                grid = ShapeGrid(region, self.spatial_resolution,
                                 self.shapefile)
                points = grid.get_gridpoints()
                gridpoints[region] = points

        return gridpoints
Example #5
0
    def get_gridpoints(self):
        """Returns gridpoints from NetCDF file.

        Parameters
        ----------
        region : str
            Identifier of the region in the NetCDF file.

        Returns
        -------
        gridpoints : dict of pandas.DataFrame
            Dict containing Dataframes with gridpoint index as index,
            longitutes and latitudes as columns for each region.
        """

        gridpoints = {}

        if self.regions == ['global']:
            filename = (self.regions[0] + '_' + str(self.spatial_resolution)
                        + '_' + str(self.temporal_resolution) + '.nc')
            ncfile = os.path.join(self.data_path, filename)

            with Dataset(ncfile, 'r+', format='NETCDF4') as nc:
                gpis = nc.variables['gpi'][:]
                lons = nc.variables['lon'][:]
                lats = nc.variables['lat'][:]
                gpis = gpis.flatten()
                lons, lats = np.meshgrid(lons, lats)
                lons = lons.flatten()
                lats = lats.flatten()

            points = pd.DataFrame(index=gpis)
            points['lon'] = lons
            points['lat'] = lats
            gridpoints['global'] = points
        else:
            for region in self.regions:
                grid = ShapeGrid(region, self.spatial_resolution,
                                 self.shapefile)
                points = grid.get_gridpoints()
                gridpoints[region] = points

        return gridpoints
Example #6
0
def image_bounds(country, sp_res, shapefile=None):
    """
    Calculates bounding box, center coordinates and zoom level of an image
    for web overlay purposes.

    Parameters
    ----------
    country : str
        FIPS country code (https://en.wikipedia.org/wiki/FIPS_country_code)
    sp_res : numeric
        Spatial resolution of the image
    shapefile : str, optional
        Paht to a custom shapefile.

    Returns
    -------
    lon_min : int
        Minimum longitude.
    lon_max : int
        Maximum longitude.
    lat_min : int
        Minimum latitude.
    lat_max : int
        Maximum latitude.
    c_lat : int
        Center latidute of image.
    c_lon : int
        Center longitude of image.
    zoom : int
        Zoom level for openlayers.
    """
    shp = ShapeGrid(country, sp_res, shapefile=shapefile)

    lon_min = shp.arrlon.min() - (sp_res / 2)
    lon_max = shp.arrlon.max() + (sp_res / 2)
    lat_min = shp.arrlat.min() - (sp_res / 2)
    lat_max = shp.arrlat.max() + (sp_res / 2)

    e_lon = lon_max - lon_min
    e_lat = lat_max - lat_min
    c_lon = lon_min + e_lon / 2
    c_lat = lat_min + e_lat / 2

    zoom = 0
    i = 1024  # To be replaced with the width of the map container!
    while i / 2 > e_lon:
        zoom += 1
        i = i / 2

    return lon_min, lon_max, lat_min, lat_max, c_lat, c_lon, zoom
Example #7
0
    def test_ShapeGrid(self):
        # general case
        cgrid = ShapeGrid(self.region, sp_res=self.sp_res)
        bbox = (cgrid.arrlat.min(), cgrid.arrlat.max(), cgrid.arrlon.min(),
                cgrid.arrlon.max())

        assert cgrid.get_grid_points()[0].size == 290
        assert bbox == (46.625, 48.875, 9.875, 16.875)
        assert cgrid.get_gridpoints().shape == (158, 2)

        # test special case NZ
        cgrid = ShapeGrid('NZ', sp_res=self.sp_res1)
        bbox = (cgrid.arrlat[0], cgrid.arrlat[-1], cgrid.arrlon[0],
                cgrid.arrlon[-1])

        assert cgrid.shape == (34, 24)
        assert cgrid.get_gridpoints().shape == (116, 2)
        assert bbox == (-46.75, -35.25, 167.37, -176.5)
Example #8
0
    def bulkread_ts(self,
                    locations,
                    region=None,
                    variable=None,
                    shapefile=None,
                    scaled=True,
                    grid=None):
        """Gets timeseries from netCDF file for a number of gridpoints.

        Parameters
        ----------
        location : list of int or list of tuples
            Either a list of Grid point indices given as integer value [0,1,2]
            or a list of Longitude/Latitude tuples [(0.0, 0.0),(45.0, 45.0)].
        region : str, optional
            Region of interest, set to first defined region if not set.
        variable : str, optional
            Variable to display, selects all available variables if None.
        shapefile : str, optional
            Path to custom shapefile.
        scaled : bool, optional
            If true, data will be scaled to a predefined range; if false, data
            will be shown as given in rawdata file; defaults to True
        grid : poets.grid.grids RegularGrid or ShapeGrid, optional
            Grid with point and lon/lat information; defaults to None.

        Returns
        -------
        df_list : list of pd.DataFrames
            List with timeseries for selected variables.
        gpis : list of int
            List of gpi values.
        """

        if region is None:
            region = self.valid_regions[0]

        if type(locations[0]) is tuple:
            if grid is None:
                if region == 'global':
                    grid = RegularGrid(self.dest_sp_res)
                else:
                    grid = ShapeGrid(region, self.dest_sp_res, shapefile)

            gpis = []
            for loc in locations:
                gp, _ = grid.find_nearest_gpi(loc[0], loc[1])
                gpis.append(gp)
        else:
            gpis = locations

        variable = self._set_variable(variable)

        source_file = self.src_file[region]

        var_dates = self._check_current_date()

        df_list = {}
        lat_pos = []
        lon_pos = []

        with Dataset(source_file, 'r', format='NETCDF4') as nc:

            time = nc.variables['time']
            dates = num2date(time[:], units=time.units, calendar=time.calendar)

            for gp in gpis:
                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = np.append(lat_pos, int(position[0][0]))
                lon_pos = np.append(lon_pos, int(position[1][0]))
            lat_pos = map(int, lat_pos)
            lon_pos = map(int, lon_pos)
            df = pd.DataFrame(index=pd.DatetimeIndex(dates))

            for ncv in variable:
                begin = np.where(dates == var_dates[region][ncv][0])[0][0]
                end = np.where(dates == var_dates[region][ncv][1])[0][0]

                ts = nc.variables[ncv][begin:end + 1, :, :]
                ts = ts[:, lat_pos, lon_pos]
                ts = np.swapaxes(ts, 1, 0)

                for idx, gp in enumerate(gpis):
                    df['gpi_' + str(gp)] = np.NAN
                    df['gpi_' + str(gp)][begin:end + 1] = ts[idx, :]

                if nc.variables[ncv]._FillValue is not None:
                    df = df.replace(nc.variables[ncv]._FillValue, np.NAN)

                if 'scaling_factor' in nc.variables[ncv].ncattrs():
                    nv = nc.variables[ncv]
                    if nv.getncattr('scaling_factor') < 0:
                        df[ncv] = (df[ncv] *
                                   float(nv.getncattr('scaling_factor')))
                    else:
                        df[ncv] = (df[ncv] /
                                   float(nv.getncattr('scaling_factor')))
                if scaled:
                    if self.valid_range is not None:
                        if self.data_range is not None:
                            df[ncv] = self._scale_values(df[ncv])

                df_list[ncv] = df

        return df_list, gpis
Example #9
0
    def read_ts(self,
                location,
                region=None,
                variable=None,
                shapefile=None,
                scaled=True):
        """Gets timeseries from netCDF file for one gridpoint.

        Parameters
        ----------
        location : int or tuple of floats
            Either Grid point index as integer value or Longitude/Latitude
            given as tuple.
        region : str, optional
            Region of interest, set to first defined region if not set.
        variable : str, optional
            Variable to display, selects all available variables if None.
        shapefile : str, optional
            Path to custom shapefile.
        scaled : bool, optional
            If true, data will be scaled to a predefined range; if false, data
            will be shown as given in rawdata file; defaults to True

        Returns
        -------
        df : pd.DataFrame
            Timeseries for selected variables.
        """

        if region is None:
            region = self.valid_regions[0]

        if type(location) is tuple:
            if region == 'global':
                grid = RegularGrid(self.dest_sp_res)
            else:
                grid = ShapeGrid(region, self.dest_sp_res, shapefile)

            gp, _ = grid.find_nearest_gpi(location[0], location[1])
        else:
            gp = location

        variable = self._set_variable(variable)

        source_file = self.src_file[region]

        var_dates = self._check_current_date()

        with Dataset(source_file, 'r', format='NETCDF4') as nc:

            time = nc.variables['time']
            dates = num2date(time[:], units=time.units, calendar=time.calendar)
            position = np.where(nc.variables['gpi'][:] == gp)
            lat_pos = position[0][0]
            lon_pos = position[1][0]
            df = pd.DataFrame(index=pd.DatetimeIndex(dates))

            for ncvar in variable:
                begin = np.where(dates == var_dates[region][ncvar][0])[0][0]
                end = np.where(dates == var_dates[region][ncvar][1])[0][0]
                df[ncvar] = np.NAN

                ts = nc.variables[ncvar][begin:end + 1, lat_pos, lon_pos]
                df[ncvar][begin:end + 1] = ts

                if nc.variables[ncvar]._FillValue is not None:
                    df = df.replace(nc.variables[ncvar]._FillValue, np.NAN)

                if 'scaling_factor' in nc.variables[ncvar].ncattrs():
                    vvar = nc.variables[ncvar]
                    if vvar.getncattr('scaling_factor') < 0:
                        df[ncvar] = (df[ncvar] *
                                     float(vvar.getncattr('scaling_factor')))
                    else:
                        df[ncvar] = (df[ncvar] /
                                     float(vvar.getncattr('scaling_factor')))
                if scaled:
                    if self.valid_range is not None:
                        if self.data_range is not None:
                            df[ncvar] = self._scale_values(df[ncvar])

        return df