Esempio n. 1
0
def _format_series_data(data_series):
    """
    The purpose of this function is to convert the series data into a rasterizeable
    format if possible.

    Parameters
    ----------
    data_series: :obj:`geopandas.GeoSeries`
        The series to be converted.

    Returns
    -------
    :obj:`geopandas.GeoSeries`: The series that was converted if possible.

    """
    if "datetime" in str(data_series.dtype):
        data_series = pandas.to_numeric(data_series).astype(numpy.float64)
        get_logger().warning(
            f"The series '{data_series.name}' was converted from a date to a number to "
            "rasterize the data. To load the data back in as a date, "
            "use 'pandas.to_datetime()'."
        )
    elif str(data_series.dtype) == "category":
        data_series = data_series.cat.codes
    return data_series
Esempio n. 2
0
def test_log_to_console(capsys):
    log_to_console(False)  # clear any other loggers
    log_to_console(level="INFO")
    logm = get_logger()
    logm.info("here")
    captured = capsys.readouterr()
    assert "INFO-geocube: here" in captured.err
Esempio n. 3
0
def load_vector_data(vector_data):
    """
    Parameters
    ----------
    vector_data: str or :obj:`geopandas.GeoDataFrame`
        A file path to an OGR supported source or GeoDataFrame containing
        the vector data.

    Returns
    -------
    :obj:`geopandas.GeoDataFrame` containing the vector data.

    """
    logger = get_logger()

    if isinstance(vector_data, str):
        vector_data = gpd.read_file(vector_data)
    elif not isinstance(vector_data, gpd.GeoDataFrame):
        vector_data = gpd.GeoDataFrame(vector_data)

    if vector_data.empty:
        raise VectorDataError("Empty GeoDataFrame.")
    if "geometry" not in vector_data.columns:
        raise VectorDataError(
            "'geometry' column missing. Columns in file: {}".format(
                vector_data.columns.values.tolist()))

    # make sure projection is set
    if not vector_data.crs:
        vector_data.crs = {"init": "epsg:4326"}
        logger.warning("Projection not defined in `vector_data`."
                       " Setting to geographic (EPSG:4326).")
    return vector_data.fillna(numpy.nan)
Esempio n. 4
0
def load_vector_data(vector_data):
    """
    Parameters
    ----------
    vector_data: str, path-like object or :obj:`geopandas.GeoDataFrame`
        A file path to an OGR supported source or GeoDataFrame containing
        the vector data.

    Returns
    -------
    :obj:`geopandas.GeoDataFrame` containing the vector data.

    """
    logger = get_logger()

    if isinstance(vector_data, (str, os.PathLike)):
        vector_data = geopandas.read_file(vector_data)
    elif not isinstance(vector_data, geopandas.GeoDataFrame):
        vector_data = geopandas.GeoDataFrame(vector_data)

    if vector_data.empty:
        raise VectorDataError("Empty GeoDataFrame.")
    if "geometry" not in vector_data.columns:
        raise VectorDataError("'geometry' column missing. Columns in file: "
                              f"{vector_data.columns.values.tolist()}")

    # make sure projection is set
    if not vector_data.crs:
        vector_data.crs = "EPSG:4326"
        logger.warning("Projection not defined in `vector_data`."
                       " Setting to geographic (EPSG:4326).")
    return vector_data
Esempio n. 5
0
    def _get_grouped_grid(self, grouped_dataframe, measurement_name, group_by):
        """Retrieve the variable data to append to the ssurgo :obj:`xarray.Dataset`.
        This method is designed specifically to work on a dataframe that has
        been grouped.

        Parameters
        ----------
        grouped_dataframe: pandas GroupBy object
            A pandas dataframe in as a GroupBy object.
        measurement_name: str
            Attributes name or list of names to be included. If a list is specified,
            the measurements will be returned in the order requested.
            By default all available measurements are included.
        group_by: str
            When specified, perform basic combining/reducing of the data on this column.

        Returns
        -------
        tuple: Options needed to create an :obj:`xarray.DataArray`.

        """
        logger = get_logger()

        image_data = []
        df_group = None
        fill_value = self._fill
        for _, df_group in grouped_dataframe:
            fill_value = (
                self._fill
                if str(df_group[measurement_name].dtype) != "category"
                else -1
            )
            image = self._rasterize_function(
                geometry_array=df_group.geometry,
                data_values=_format_series_data(df_group[measurement_name]).values,
                geobox=self._geobox,
                grid_coords=self._grid_coords,
                fill=fill_value,
            )
            if image is None:
                logger.warning(
                    f"Skipping attribute {measurement_name} due to missing data..."
                )
                return None

            image_data.append(image)

        attrs = self._get_attrs(measurement_name, fill_value)
        image_data = numpy.array(image_data)
        # it was converted to numeric date value
        if df_group is not None and "datetime" in str(df_group[measurement_name].dtype):
            self._update_time_attrs(attrs, image_data)

        return (
            (group_by, "y", "x"),
            image_data,
            attrs,
            {"grid_mapping": DEFAULT_GRID_MAP},
        )
Esempio n. 6
0
def rasterize_image(
    geometry_array,
    data_values,
    geobox,
    fill,
    merge_alg=MergeAlg.replace,
    filter_nan=False,
    **ignored_kwargs,
):
    """
    Rasterize a list of shapes+values for a given GeoBox.

    Parameters
    -----------
    geometry_array: geopandas.GeometryArray
        A geometry array of points.
    data_values: list
        Data values associated with the list of geojson shapes
    geobox: :obj:`datacube.utils.geometry.GeoBox`
        Transform of the resulting image.
    fill: float
        The value to fill in the grid with for nodata.
    merge_alg: `rasterio.enums.MergeAlg`, optional
        The algorithm for merging values into one cell. Default is `MergeAlg.replace`.
    filter_nan: bool, optional
        If True, will remove nodata values from the data before rasterization.
        Default is False.
    **ignored_kwargs:
        These are there to be flexible with additional rasterization methods and
        will be ignored.

    Returns
    -------
    :obj:`numpy.ndarray` or None
        The vector data in the rasterized format.

    """
    logger = get_logger()

    try:
        if filter_nan:
            data_values, geometry_array = _remove_missing_data(
                data_values, geometry_array
            )
        image = rasterio.features.rasterize(
            zip(geometry_array.apply(mapping).values, data_values),
            out_shape=(geobox.height, geobox.width),
            transform=geobox.affine,
            fill=fill,
            merge_alg=merge_alg,
            dtype=numpy.float64,
        )
        return image
    except TypeError as ter:
        if "cannot perform reduce with flexible type" in str(ter):
            logger.warning(f"{ter}")
            return None
        raise
Esempio n. 7
0
def test_log_to_file(mock_user_log_dir, tmpdir):
    mock_user_log_dir.return_value = str(tmpdir)
    log_to_file(False)  # clear any other loggers
    log_to_file(level="INFO")
    logm = get_logger()
    logm.info("here")
    with open(tmpdir.join("geocube.log")) as logf:
        captured = logf.read()
    assert "INFO-geocube: here" in captured
Esempio n. 8
0
def test_log_to_console__warning(capsys):
    log_to_console(False)  # clear any other loggers
    log_to_console()
    logm = get_logger()
    logm.info("here")
    logm.warning("there")
    captured = capsys.readouterr()
    assert "INFO-geocube: here" not in captured.err
    assert "WARNING-geocube: there" in captured.err
Esempio n. 9
0
def test_log_to_file__warning(mock_user_log_dir, tmpdir):
    mock_user_log_dir.return_value = str(tmpdir)
    log_to_file(False)  # clear any other loggers
    log_to_file()
    logm = get_logger()
    logm.info("here")
    logm.warning("there")
    with open(tmpdir.join("geocube.log")) as logf:
        captured = logf.read()
    assert "INFO-geocube: here" not in captured
    assert "WARNING-geocube: there" in captured
Esempio n. 10
0
def rasterize_points_radial(
    geometry_array,
    data_values,
    grid_coords,
    method="linear",
    filter_nan=False,
    **ignored_kwargs,
):
    """
    This method uses scipy.interpolate.Rbf to interpolate point data
    to a grid.

    Parameters
    ----------
    geometry_array: geopandas.GeometryArray
        A geometry array of points.
    data_values: list
        Data values associated with the list of geojson shapes
    grid_coords: dict
        Output from `rioxarray.rioxarray.affine_to_coords`
    method: str, optional
        The function to use for interpolation in `scipy.interpolate.Rbf`.
        {'multiquadric', 'inverse', 'gaussian', 'linear',
        'cubic', 'quintic', 'thin_plate'}
    filter_nan: bool, optional
        If True, will remove nodata values from the data before rasterization.
        Default is False.
    **ignored_kwargs:
        These are there to be flexible with additional rasterization methods and
        will be ignored.

    Returns
    -------
    :class:`numpy.ndarray`: An interpolated :class:`numpy.ndarray`.

    """
    logger = get_logger()

    try:
        if filter_nan:
            data_values, geometry_array = _remove_missing_data(
                data_values, geometry_array)
        interp = Rbf(geometry_array.x,
                     geometry_array.y,
                     data_values,
                     function=method)
        return interp(*numpy.meshgrid(grid_coords["x"], grid_coords["y"]))
    except ValueError as ter:
        if "object arrays are not supported" in str(ter):
            logger.warning(f"{ter}")
            return None
        raise
Esempio n. 11
0
    def _get_grid(self, dataframe, measurement_name):
        """Retrieve the variable data to append to the ssurgo :obj:`xarray.Dataset`
        from a regular :obj:`geopandas.GeoDataFrame`.

        Parameters
        ----------
        dataframe: :obj:`geopandas.GeoDataFrame`
            A geopandas GeoDataFrame object to rasterize.
        measurement_name: str
            Attributes name or list of names to be included. If a list is specified,
            the measurements will be returned in the order requested.
            By default all available measurements are included.

        Returns
        -------
        tuple: Options needed to create an :obj:`xarray.DataArray`.

        """
        logger = get_logger()
        fill_value = (
            self._fill if str(dataframe[measurement_name].dtype) != "category" else -1
        )
        image_data = self._rasterize_function(
            geometry_array=dataframe.geometry,
            data_values=_format_series_data(dataframe[measurement_name]).values,
            geobox=self._geobox,
            grid_coords=self._grid_coords,
            fill=fill_value,
        )
        if image_data is None:
            logger.warning(
                f"Skipping attribute {measurement_name} due to missing data..."
            )
            return None

        attrs = self._get_attrs(measurement_name, fill_value)

        # it was converted to numeric date value
        if "datetime" in str(dataframe[measurement_name].dtype):
            self._update_time_attrs(attrs, image_data)

        return (
            ("y", "x"),
            numpy.array(image_data),
            attrs,
            {"grid_mapping": DEFAULT_GRID_MAP},
        )
Esempio n. 12
0
def rasterize_points_radial(geometry_array,
                            data_values,
                            grid_coords,
                            method="linear",
                            **ignored_kwargs):
    """
    This method uses scipy.interpolate.Rbf to interpolate point data
    to a grid.

    Parameters
    ----------
    geometry_array: geopandas.GeometryArray
        A geometry array of points.
    data_values: list
        Data values associated with the list of geojson shapes
    grid_coords: dict
        Output from `rioxarray.rioxarray.affine_to_coords`
    fill: float, optional
        The value to fill in the grid with for nodata. Default is -9999.0.
    method: str, optional
        The function to use for interpolation in `scipy.interpolate.Rbf`.
        {'multiquadric', 'inverse', 'gaussian', 'linear',
        'cubic', 'quintic', 'thin_plate'}

    **ignored_kwargs:
        These are there to be flexible with additional rasterization methods and
        will be ignored.

    Returns
    -------
    :class:`numpy.ndarray`: An interpolated :class:`numpy.ndarray`.

    """
    logger = get_logger()

    try:
        interp = Rbf(geometry_array.x,
                     geometry_array.y,
                     data_values,
                     function=method)
        return interp(*numpy.meshgrid(grid_coords["x"], grid_coords["y"]))
    except ValueError as ter:
        if "object arrays are not supported" in str(ter):
            logger.warning("{warning}".format(warning=ter))
            return None
        raise
Esempio n. 13
0
def rasterize_image(geojson_shapes, data_values, geobox, fill=-9999.0):
    """
    Rasterize a list of shapes+values for a given GeoBox.

    Parameters
    -----------
    geojson_shapes: list
        List of geojson shapes to rasterize.
    data_values: list
        Data values associated with the list of geojson shapes
    geobox: :obj:`datacube.utils.geometry.GeoBox`
        Transform of the resulting image.
    fill: float, optional
        The value to fill in the grid with for nodata. Default is -9999.0.

    Returns
    -------
    :obj:`numpy.ndarray` or None
        The vector data in the rasterized format.

    """
    logger = get_logger()

    try:
        image = rasterio.features.rasterize(
            zip(geojson_shapes, data_values),
            out_shape=(geobox.height, geobox.width),
            transform=geobox.affine,
            fill=fill,
            dtype=numpy.float64,
        )
        return image
    except TypeError as ter:
        if "cannot perform reduce with flexible type" in str(ter):
            logger.warning("{warning}".format(warning=ter))
            return None
        raise