Example #1
0
    def read_ts(self, location, region=None, variable=None, shapefile=None,
                scaled=True):
        """Gets timeseries from netCDF file for a gridpoint.

        Parameters
        ----------
        location : int or tuple of floats
            Either Grid point index as integer value or Longitude/Latitude
            given as tuple.
        region : str, optional
            Region of interest, set to first defined region if not set.
        variable : str, optional
            Variable to display, selects all available variables if None.
        shapefile : str, optional
            Path to custom shapefile.
        scaled : bool, optional
            If true, data will be scaled to a predefined range; if false, data
            will be shown as given in rawdata file; defaults to True

        Returns
        -------
        df : pd.DataFrame
            Timeseries for selected variables.
        """

        if region is None:
            region = self.dest_regions[0]

        if type(location) is tuple:
            if region == 'global':
                grid = RegularGrid(self.dest_sp_res)
            else:
                grid = ShapeGrid(region, self.dest_sp_res, shapefile)

            gp, _ = grid.find_nearest_gpi(location[0], location[1])
        else:
            gp = location

        if variable is None:
            variable = self.get_variables()
        else:
            variable = self.check_variable(variable)
            variable = [variable]

        source_file = self.src_file[region]

        var_dates = self._check_current_date()

        with Dataset(source_file, 'r', format='NETCDF4') as nc:

            time = nc.variables['time']
            dates = num2date(time[:], units=time.units, calendar=time.calendar)
            position = np.where(nc.variables['gpi'][:] == gp)
            lat_pos = position[0][0]
            lon_pos = position[1][0]
            df = pd.DataFrame(index=pd.DatetimeIndex(dates))

            for ncvar in variable:
                begin = np.where(dates == var_dates[region][ncvar][0])[0][0]
                end = np.where(dates == var_dates[region][ncvar][1])[0][0]
                df[ncvar] = np.NAN
                for i in range(begin, end + 1):
                    df[ncvar][i] = nc.variables[ncvar][i, lat_pos, lon_pos]

                if 'scaling_factor' in nc.variables[ncvar].ncattrs():
                    vvar = nc.variables[ncvar]
                    if vvar.getncattr('scaling_factor') < 0:
                        df[ncvar] = (df[ncvar] *
                                     float(vvar.getncattr('scaling_factor')))
                    else:
                        df[ncvar] = (df[ncvar] /
                                     float(vvar.getncattr('scaling_factor')))
                if scaled:
                    if self.valid_range is not None:
                        if self.data_range is not None:
                            df[ncvar] = self._scale_values(df[ncvar])

        return df
Example #2
0
    def bulkread_ts(self,
                    locations,
                    region=None,
                    variable=None,
                    shapefile=None,
                    scaled=True,
                    grid=None):
        """Gets timeseries from netCDF file for a number of gridpoints.

        Parameters
        ----------
        location : list of int or list of tuples
            Either a list of Grid point indices given as integer value [0,1,2]
            or a list of Longitude/Latitude tuples [(0.0, 0.0),(45.0, 45.0)].
        region : str, optional
            Region of interest, set to first defined region if not set.
        variable : str, optional
            Variable to display, selects all available variables if None.
        shapefile : str, optional
            Path to custom shapefile.
        scaled : bool, optional
            If true, data will be scaled to a predefined range; if false, data
            will be shown as given in rawdata file; defaults to True
        grid : poets.grid.grids RegularGrid or ShapeGrid, optional
            Grid with point and lon/lat information; defaults to None.

        Returns
        -------
        df_list : list of pd.DataFrames
            List with timeseries for selected variables.
        gpis : list of int
            List of gpi values.
        """

        if region is None:
            region = self.valid_regions[0]

        if type(locations[0]) is tuple:
            if grid is None:
                if region == 'global':
                    grid = RegularGrid(self.dest_sp_res)
                else:
                    grid = ShapeGrid(region, self.dest_sp_res, shapefile)

            gpis = []
            for loc in locations:
                gp, _ = grid.find_nearest_gpi(loc[0], loc[1])
                gpis.append(gp)
        else:
            gpis = locations

        variable = self._set_variable(variable)

        source_file = self.src_file[region]

        var_dates = self._check_current_date()

        df_list = {}
        lat_pos = []
        lon_pos = []

        with Dataset(source_file, 'r', format='NETCDF4') as nc:

            time = nc.variables['time']
            dates = num2date(time[:], units=time.units, calendar=time.calendar)

            for gp in gpis:
                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = np.append(lat_pos, int(position[0][0]))
                lon_pos = np.append(lon_pos, int(position[1][0]))
            lat_pos = map(int, lat_pos)
            lon_pos = map(int, lon_pos)
            df = pd.DataFrame(index=pd.DatetimeIndex(dates))

            for ncv in variable:
                begin = np.where(dates == var_dates[region][ncv][0])[0][0]
                end = np.where(dates == var_dates[region][ncv][1])[0][0]

                ts = nc.variables[ncv][begin:end + 1, :, :]
                ts = ts[:, lat_pos, lon_pos]
                ts = np.swapaxes(ts, 1, 0)

                for idx, gp in enumerate(gpis):
                    df['gpi_' + str(gp)] = np.NAN
                    df['gpi_' + str(gp)][begin:end + 1] = ts[idx, :]

                if nc.variables[ncv]._FillValue is not None:
                    df = df.replace(nc.variables[ncv]._FillValue, np.NAN)

                if 'scaling_factor' in nc.variables[ncv].ncattrs():
                    nv = nc.variables[ncv]
                    if nv.getncattr('scaling_factor') < 0:
                        df[ncv] = (df[ncv] *
                                   float(nv.getncattr('scaling_factor')))
                    else:
                        df[ncv] = (df[ncv] /
                                   float(nv.getncattr('scaling_factor')))
                if scaled:
                    if self.valid_range is not None:
                        if self.data_range is not None:
                            df[ncv] = self._scale_values(df[ncv])

                df_list[ncv] = df

        return df_list, gpis
Example #3
0
    def bulkread_ts(self, locations, region=None, variable=None,
                    shapefile=None, scaled=True, grid=None):
        """Gets timeseries from netCDF file for a number of gridpoints.

        Parameters
        ----------
        location : list of int or list of tuples
            Either a list of Grid point indices given as integer value [0,1,2]
            or a list of Longitude/Latitude tuples [(0.0, 0.0),(45.0, 45.0)].
        region : str, optional
            Region of interest, set to first defined region if not set.
        variable : str, optional
            Variable to display, selects all available variables if None.
        shapefile : str, optional
            Path to custom shapefile.
        scaled : bool, optional
            If true, data will be scaled to a predefined range; if false, data
            will be shown as given in rawdata file; defaults to True
        grid : poets.grid.grids RegularGrid or ShapeGrid, optional
            Grid with point and lon/lat information; defaults to None.

        Returns
        -------
        df_list : list of pd.DataFrames
            List with timeseries for selected variables.
        gpis : list of int
            List of gpi values.
        """

        if region is None:
            region = self.valid_regions[0]

        if type(locations[0]) is tuple:
            if grid is None:
                if region == 'global':
                    grid = RegularGrid(self.dest_sp_res)
                else:
                    grid = ShapeGrid(region, self.dest_sp_res, shapefile)

            gpis = []
            for loc in locations:
                gp, _ = grid.find_nearest_gpi(loc[0], loc[1])
                gpis.append(gp)
        else:
            gpis = locations

        variable = self._set_variable(variable)

        source_file = self.src_file[region]

        var_dates = self._check_current_date()

        df_list = {}
        lat_pos = []
        lon_pos = []

        with Dataset(source_file, 'r', format='NETCDF4') as nc:

            time = nc.variables['time']
            dates = num2date(time[:], units=time.units, calendar=time.calendar)

            for gp in gpis:
                position = np.where(nc.variables['gpi'][:] == gp)
                lat_pos = np.append(lat_pos, int(position[0][0]))
                lon_pos = np.append(lon_pos, int(position[1][0]))
            lat_pos = map(int, lat_pos)
            lon_pos = map(int, lon_pos)
            df = pd.DataFrame(index=pd.DatetimeIndex(dates))

            for ncv in variable:
                begin = np.where(dates == var_dates[region][ncv][0])[0][0]
                end = np.where(dates == var_dates[region][ncv][1])[0][0]

                ts = nc.variables[ncv][begin:end + 1, :, :]
                ts = ts[:, lat_pos, lon_pos]
                ts = np.swapaxes(ts, 1, 0)

                for idx, gp in enumerate(gpis):
                    df['gpi_' + str(gp)] = ts[idx, :]

                if nc.variables[ncv]._FillValue is not None:
                    df = df.replace(nc.variables[ncv]._FillValue, np.NAN)

                if 'scaling_factor' in nc.variables[ncv].ncattrs():
                    nv = nc.variables[ncv]
                    if nv.getncattr('scaling_factor') < 0:
                        df[ncv] = (df[ncv] *
                                   float(nv.getncattr('scaling_factor')))
                    else:
                        df[ncv] = (df[ncv] /
                                   float(nv.getncattr('scaling_factor')))
                if scaled:
                    if self.valid_range is not None:
                        if self.data_range is not None:
                            df[ncv] = self._scale_values(df[ncv])

                df_list[ncv] = df

        return df_list, gpis
Example #4
0
    def read_ts(self,
                location,
                region=None,
                variable=None,
                shapefile=None,
                scaled=True):
        """Gets timeseries from netCDF file for one gridpoint.

        Parameters
        ----------
        location : int or tuple of floats
            Either Grid point index as integer value or Longitude/Latitude
            given as tuple.
        region : str, optional
            Region of interest, set to first defined region if not set.
        variable : str, optional
            Variable to display, selects all available variables if None.
        shapefile : str, optional
            Path to custom shapefile.
        scaled : bool, optional
            If true, data will be scaled to a predefined range; if false, data
            will be shown as given in rawdata file; defaults to True

        Returns
        -------
        df : pd.DataFrame
            Timeseries for selected variables.
        """

        if region is None:
            region = self.valid_regions[0]

        if type(location) is tuple:
            if region == 'global':
                grid = RegularGrid(self.dest_sp_res)
            else:
                grid = ShapeGrid(region, self.dest_sp_res, shapefile)

            gp, _ = grid.find_nearest_gpi(location[0], location[1])
        else:
            gp = location

        variable = self._set_variable(variable)

        source_file = self.src_file[region]

        var_dates = self._check_current_date()

        with Dataset(source_file, 'r', format='NETCDF4') as nc:

            time = nc.variables['time']
            dates = num2date(time[:], units=time.units, calendar=time.calendar)
            position = np.where(nc.variables['gpi'][:] == gp)
            lat_pos = position[0][0]
            lon_pos = position[1][0]
            df = pd.DataFrame(index=pd.DatetimeIndex(dates))

            for ncvar in variable:
                begin = np.where(dates == var_dates[region][ncvar][0])[0][0]
                end = np.where(dates == var_dates[region][ncvar][1])[0][0]
                df[ncvar] = np.NAN

                ts = nc.variables[ncvar][begin:end + 1, lat_pos, lon_pos]
                df[ncvar][begin:end + 1] = ts

                if nc.variables[ncvar]._FillValue is not None:
                    df = df.replace(nc.variables[ncvar]._FillValue, np.NAN)

                if 'scaling_factor' in nc.variables[ncvar].ncattrs():
                    vvar = nc.variables[ncvar]
                    if vvar.getncattr('scaling_factor') < 0:
                        df[ncvar] = (df[ncvar] *
                                     float(vvar.getncattr('scaling_factor')))
                    else:
                        df[ncvar] = (df[ncvar] /
                                     float(vvar.getncattr('scaling_factor')))
                if scaled:
                    if self.valid_range is not None:
                        if self.data_range is not None:
                            df[ncvar] = self._scale_values(df[ncvar])

        return df