def _format_series_data(data_series): """ The purpose of this function is to convert the series data into a rasterizeable format if possible. Parameters ---------- data_series: :obj:`geopandas.GeoSeries` The series to be converted. Returns ------- :obj:`geopandas.GeoSeries`: The series that was converted if possible. """ if "datetime" in str(data_series.dtype): data_series = pandas.to_numeric(data_series).astype(numpy.float64) get_logger().warning( f"The series '{data_series.name}' was converted from a date to a number to " "rasterize the data. To load the data back in as a date, " "use 'pandas.to_datetime()'." ) elif str(data_series.dtype) == "category": data_series = data_series.cat.codes return data_series
def test_log_to_console(capsys): log_to_console(False) # clear any other loggers log_to_console(level="INFO") logm = get_logger() logm.info("here") captured = capsys.readouterr() assert "INFO-geocube: here" in captured.err
def load_vector_data(vector_data): """ Parameters ---------- vector_data: str or :obj:`geopandas.GeoDataFrame` A file path to an OGR supported source or GeoDataFrame containing the vector data. Returns ------- :obj:`geopandas.GeoDataFrame` containing the vector data. """ logger = get_logger() if isinstance(vector_data, str): vector_data = gpd.read_file(vector_data) elif not isinstance(vector_data, gpd.GeoDataFrame): vector_data = gpd.GeoDataFrame(vector_data) if vector_data.empty: raise VectorDataError("Empty GeoDataFrame.") if "geometry" not in vector_data.columns: raise VectorDataError( "'geometry' column missing. Columns in file: {}".format( vector_data.columns.values.tolist())) # make sure projection is set if not vector_data.crs: vector_data.crs = {"init": "epsg:4326"} logger.warning("Projection not defined in `vector_data`." " Setting to geographic (EPSG:4326).") return vector_data.fillna(numpy.nan)
def load_vector_data(vector_data): """ Parameters ---------- vector_data: str, path-like object or :obj:`geopandas.GeoDataFrame` A file path to an OGR supported source or GeoDataFrame containing the vector data. Returns ------- :obj:`geopandas.GeoDataFrame` containing the vector data. """ logger = get_logger() if isinstance(vector_data, (str, os.PathLike)): vector_data = geopandas.read_file(vector_data) elif not isinstance(vector_data, geopandas.GeoDataFrame): vector_data = geopandas.GeoDataFrame(vector_data) if vector_data.empty: raise VectorDataError("Empty GeoDataFrame.") if "geometry" not in vector_data.columns: raise VectorDataError("'geometry' column missing. Columns in file: " f"{vector_data.columns.values.tolist()}") # make sure projection is set if not vector_data.crs: vector_data.crs = "EPSG:4326" logger.warning("Projection not defined in `vector_data`." " Setting to geographic (EPSG:4326).") return vector_data
def _get_grouped_grid(self, grouped_dataframe, measurement_name, group_by): """Retrieve the variable data to append to the ssurgo :obj:`xarray.Dataset`. This method is designed specifically to work on a dataframe that has been grouped. Parameters ---------- grouped_dataframe: pandas GroupBy object A pandas dataframe in as a GroupBy object. measurement_name: str Attributes name or list of names to be included. If a list is specified, the measurements will be returned in the order requested. By default all available measurements are included. group_by: str When specified, perform basic combining/reducing of the data on this column. Returns ------- tuple: Options needed to create an :obj:`xarray.DataArray`. """ logger = get_logger() image_data = [] df_group = None fill_value = self._fill for _, df_group in grouped_dataframe: fill_value = ( self._fill if str(df_group[measurement_name].dtype) != "category" else -1 ) image = self._rasterize_function( geometry_array=df_group.geometry, data_values=_format_series_data(df_group[measurement_name]).values, geobox=self._geobox, grid_coords=self._grid_coords, fill=fill_value, ) if image is None: logger.warning( f"Skipping attribute {measurement_name} due to missing data..." ) return None image_data.append(image) attrs = self._get_attrs(measurement_name, fill_value) image_data = numpy.array(image_data) # it was converted to numeric date value if df_group is not None and "datetime" in str(df_group[measurement_name].dtype): self._update_time_attrs(attrs, image_data) return ( (group_by, "y", "x"), image_data, attrs, {"grid_mapping": DEFAULT_GRID_MAP}, )
def rasterize_image( geometry_array, data_values, geobox, fill, merge_alg=MergeAlg.replace, filter_nan=False, **ignored_kwargs, ): """ Rasterize a list of shapes+values for a given GeoBox. Parameters ----------- geometry_array: geopandas.GeometryArray A geometry array of points. data_values: list Data values associated with the list of geojson shapes geobox: :obj:`datacube.utils.geometry.GeoBox` Transform of the resulting image. fill: float The value to fill in the grid with for nodata. merge_alg: `rasterio.enums.MergeAlg`, optional The algorithm for merging values into one cell. Default is `MergeAlg.replace`. filter_nan: bool, optional If True, will remove nodata values from the data before rasterization. Default is False. **ignored_kwargs: These are there to be flexible with additional rasterization methods and will be ignored. Returns ------- :obj:`numpy.ndarray` or None The vector data in the rasterized format. """ logger = get_logger() try: if filter_nan: data_values, geometry_array = _remove_missing_data( data_values, geometry_array ) image = rasterio.features.rasterize( zip(geometry_array.apply(mapping).values, data_values), out_shape=(geobox.height, geobox.width), transform=geobox.affine, fill=fill, merge_alg=merge_alg, dtype=numpy.float64, ) return image except TypeError as ter: if "cannot perform reduce with flexible type" in str(ter): logger.warning(f"{ter}") return None raise
def test_log_to_file(mock_user_log_dir, tmpdir): mock_user_log_dir.return_value = str(tmpdir) log_to_file(False) # clear any other loggers log_to_file(level="INFO") logm = get_logger() logm.info("here") with open(tmpdir.join("geocube.log")) as logf: captured = logf.read() assert "INFO-geocube: here" in captured
def test_log_to_console__warning(capsys): log_to_console(False) # clear any other loggers log_to_console() logm = get_logger() logm.info("here") logm.warning("there") captured = capsys.readouterr() assert "INFO-geocube: here" not in captured.err assert "WARNING-geocube: there" in captured.err
def test_log_to_file__warning(mock_user_log_dir, tmpdir): mock_user_log_dir.return_value = str(tmpdir) log_to_file(False) # clear any other loggers log_to_file() logm = get_logger() logm.info("here") logm.warning("there") with open(tmpdir.join("geocube.log")) as logf: captured = logf.read() assert "INFO-geocube: here" not in captured assert "WARNING-geocube: there" in captured
def rasterize_points_radial( geometry_array, data_values, grid_coords, method="linear", filter_nan=False, **ignored_kwargs, ): """ This method uses scipy.interpolate.Rbf to interpolate point data to a grid. Parameters ---------- geometry_array: geopandas.GeometryArray A geometry array of points. data_values: list Data values associated with the list of geojson shapes grid_coords: dict Output from `rioxarray.rioxarray.affine_to_coords` method: str, optional The function to use for interpolation in `scipy.interpolate.Rbf`. {'multiquadric', 'inverse', 'gaussian', 'linear', 'cubic', 'quintic', 'thin_plate'} filter_nan: bool, optional If True, will remove nodata values from the data before rasterization. Default is False. **ignored_kwargs: These are there to be flexible with additional rasterization methods and will be ignored. Returns ------- :class:`numpy.ndarray`: An interpolated :class:`numpy.ndarray`. """ logger = get_logger() try: if filter_nan: data_values, geometry_array = _remove_missing_data( data_values, geometry_array) interp = Rbf(geometry_array.x, geometry_array.y, data_values, function=method) return interp(*numpy.meshgrid(grid_coords["x"], grid_coords["y"])) except ValueError as ter: if "object arrays are not supported" in str(ter): logger.warning(f"{ter}") return None raise
def _get_grid(self, dataframe, measurement_name): """Retrieve the variable data to append to the ssurgo :obj:`xarray.Dataset` from a regular :obj:`geopandas.GeoDataFrame`. Parameters ---------- dataframe: :obj:`geopandas.GeoDataFrame` A geopandas GeoDataFrame object to rasterize. measurement_name: str Attributes name or list of names to be included. If a list is specified, the measurements will be returned in the order requested. By default all available measurements are included. Returns ------- tuple: Options needed to create an :obj:`xarray.DataArray`. """ logger = get_logger() fill_value = ( self._fill if str(dataframe[measurement_name].dtype) != "category" else -1 ) image_data = self._rasterize_function( geometry_array=dataframe.geometry, data_values=_format_series_data(dataframe[measurement_name]).values, geobox=self._geobox, grid_coords=self._grid_coords, fill=fill_value, ) if image_data is None: logger.warning( f"Skipping attribute {measurement_name} due to missing data..." ) return None attrs = self._get_attrs(measurement_name, fill_value) # it was converted to numeric date value if "datetime" in str(dataframe[measurement_name].dtype): self._update_time_attrs(attrs, image_data) return ( ("y", "x"), numpy.array(image_data), attrs, {"grid_mapping": DEFAULT_GRID_MAP}, )
def rasterize_points_radial(geometry_array, data_values, grid_coords, method="linear", **ignored_kwargs): """ This method uses scipy.interpolate.Rbf to interpolate point data to a grid. Parameters ---------- geometry_array: geopandas.GeometryArray A geometry array of points. data_values: list Data values associated with the list of geojson shapes grid_coords: dict Output from `rioxarray.rioxarray.affine_to_coords` fill: float, optional The value to fill in the grid with for nodata. Default is -9999.0. method: str, optional The function to use for interpolation in `scipy.interpolate.Rbf`. {'multiquadric', 'inverse', 'gaussian', 'linear', 'cubic', 'quintic', 'thin_plate'} **ignored_kwargs: These are there to be flexible with additional rasterization methods and will be ignored. Returns ------- :class:`numpy.ndarray`: An interpolated :class:`numpy.ndarray`. """ logger = get_logger() try: interp = Rbf(geometry_array.x, geometry_array.y, data_values, function=method) return interp(*numpy.meshgrid(grid_coords["x"], grid_coords["y"])) except ValueError as ter: if "object arrays are not supported" in str(ter): logger.warning("{warning}".format(warning=ter)) return None raise
def rasterize_image(geojson_shapes, data_values, geobox, fill=-9999.0): """ Rasterize a list of shapes+values for a given GeoBox. Parameters ----------- geojson_shapes: list List of geojson shapes to rasterize. data_values: list Data values associated with the list of geojson shapes geobox: :obj:`datacube.utils.geometry.GeoBox` Transform of the resulting image. fill: float, optional The value to fill in the grid with for nodata. Default is -9999.0. Returns ------- :obj:`numpy.ndarray` or None The vector data in the rasterized format. """ logger = get_logger() try: image = rasterio.features.rasterize( zip(geojson_shapes, data_values), out_shape=(geobox.height, geobox.width), transform=geobox.affine, fill=fill, dtype=numpy.float64, ) return image except TypeError as ter: if "cannot perform reduce with flexible type" in str(ter): logger.warning("{warning}".format(warning=ter)) return None raise