Beispiel #1
0
    def __init__(self, vector_data, geobox_maker, fill=None, categorical_enums=None):
        """
        Initialize the GeoCube class.

        Parameters
        ----------
        vector_data: str or :obj:`geopandas.GeoDataFrame`
            A file path to an OGR supported source or GeoDataFrame containing
            the vector data.
        geobox_maker: :obj:`geocube.geo_utils.geobox.GeoBoxMaker`
            The geobox for the grid to be generated from the vector data.
        fill: float, optional
            The value to fill in the grid with for nodata. Default is NaN.
        categorical_enums: dict, optional
            A dictionary of all categories for the table columns containing
            categorical data. The categories will be made unique and sorted
            if they are not already.
            E.g. {'column_name': ['a', 'b'], 'other_column': ['c', 'd']}

        """
        self._vector_data = load_vector_data(vector_data)
        self._geobox = geobox_maker.from_vector(self._vector_data)
        self._grid_coords = affine_to_coords(
            self._geobox.affine, self._geobox.width, self._geobox.height
        )
        self._fill = fill if fill is not None else numpy.nan
        if categorical_enums is not None:
            for column_name, categories in categorical_enums.items():
                category_type = pandas.api.types.CategoricalDtype(
                    categories=sorted(set(categories)) + ["nodata"]
                )
                self._vector_data[column_name] = self._vector_data[column_name].astype(
                    category_type
                )

        # define defaults
        self._rasterize_function = rasterize_image
        self._datetime_measurements = ()
        self._categorical_enums = {}
Beispiel #2
0
def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    mask_and_scale=False,
    variable=None,
    group=None,
    default_name=None,
    **open_kwargs,
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates: bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks: int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size")``.
    cache: bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock: False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked: bool, optional
        If True, read the mask and set values to NaN. Defaults to False.
    mask_and_scale: bool, optional
        Lazily scale (using the `scales` and `offsets` from rasterio) and mask.
        If the _Unsigned attribute is present treat integer arrays as unsigned.
    variable: str or list or tuple, optional
        Variable name or names to use to filter loading.
    group: str or list or tuple, optional
        Group name or names to use to filter loading.
    default_name: str, optional
        The name of the data array if none exists. Default is None.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    :obj:`xarray.Dataset` | :obj:`xarray.DataArray` | List[:obj:`xarray.Dataset`]:
        The newly created dataset(s).
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates
    masked = masked or mask_and_scale
    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            src_crs=vrt.src_crs.to_string(),
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            tolerance=vrt.tolerance,
            src_nodata=vrt.src_nodata,
            nodata=vrt.nodata,
            width=vrt.width,
            height=vrt.height,
            src_transform=vrt.src_transform,
            transform=vrt.transform,
            dtype=vrt.working_dtype,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)
    with warnings.catch_warnings(record=True) as rio_warnings:
        manager = CachingFileManager(rasterio.open,
                                     filename,
                                     lock=lock,
                                     mode="r",
                                     kwargs=open_kwargs)
        riods = manager.acquire()
        captured_warnings = rio_warnings.copy()
    # raise the NotGeoreferencedWarning if applicable
    for rio_warning in captured_warnings:
        if not riods.subdatasets or not isinstance(rio_warning.message,
                                                   NotGeoreferencedWarning):
            warnings.warn(str(rio_warning.message), type(rio_warning.message))

    # open the subdatasets if they exist
    if riods.subdatasets:
        return _load_subdatasets(
            riods=riods,
            group=group,
            variable=variable,
            parse_coordinates=parse_coordinates,
            chunks=chunks,
            cache=cache,
            lock=lock,
            masked=masked,
            mask_and_scale=mask_and_scale,
        )

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")

    # parse tags & load alternate coords
    attrs = _get_rasterio_attrs(riods=riods)
    coords = _load_netcdf_1d_coords(riods.tags())
    _parse_driver_tags(riods=riods, attrs=attrs, coords=coords)
    for coord in coords:
        if f"NETCDF_DIM_{coord}" in attrs:
            coord_name = coord
            attrs.pop(f"NETCDF_DIM_{coord}")
            break
    else:
        coord_name = "band"
        coords[coord_name] = np.asarray(riods.indexes)

    # Get geospatial coordinates
    transform = _rio_transform(riods)
    if parse_coordinates and transform.is_rectilinear:
        # 1d coordinates
        coords.update(
            affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    unsigned = False
    encoding = {}
    if mask_and_scale and "_Unsigned" in attrs:
        unsigned = variables.pop_to(attrs, encoding, "_Unsigned") == "true"

    da_name = attrs.pop("NETCDF_VARNAME", default_name)
    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(
            manager,
            lock,
            name=da_name,
            vrt_params=vrt_params,
            masked=masked,
            mask_and_scale=mask_and_scale,
            unsigned=unsigned,
        ))

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    result = DataArray(data=data,
                       dims=(coord_name, "y", "x"),
                       coords=coords,
                       attrs=attrs,
                       name=da_name)
    result.encoding = encoding

    # update attributes from NetCDF attributess
    _load_netcdf_attrs(riods.tags(), result)
    result = _decode_datetime_cf(result)

    # make sure the _FillValue is correct dtype
    if "_FillValue" in attrs:
        attrs["_FillValue"] = result.dtype.type(attrs["_FillValue"])

    # handle encoding
    if mask_and_scale:
        if "scale_factor" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "scale_factor",
                             name=da_name)
        if "add_offset" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "add_offset",
                             name=da_name)
    if masked:
        if "_FillValue" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "_FillValue",
                             name=da_name)
        if "missing_value" in result.attrs:
            variables.pop_to(result.attrs,
                             result.encoding,
                             "missing_value",
                             name=da_name)

    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    result.rio.write_transform(riods.transform, inplace=True)
    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        result = _prepare_dask(result, riods, filename, chunks)

    # Make the file closeable
    result._file_obj = manager

    return result
Beispiel #3
0
def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    **open_kwargs
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename : str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates : bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks : int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size").
    cache : bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock : False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked : bool, optional
        If True, read the mask and to set values to NaN. Defaults to False.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates

    import rasterio
    from rasterio.vrt import WarpedVRT

    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            src_nodata=vrt.src_nodata,
            dst_nodata=vrt.dst_nodata,
            tolerance=vrt.tolerance,
            transform=vrt.transform,
            width=vrt.width,
            height=vrt.height,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)
    manager = CachingFileManager(
        rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs
    )
    riods = manager.acquire()

    # open the subdatasets if they exist
    if riods.subdatasets:
        data_arrays = {}
        for iii, subdataset in enumerate(riods.subdatasets):
            rioda = open_rasterio(
                subdataset,
                parse_coordinates=iii == 0 and parse_coordinates,
                chunks=chunks,
                cache=cache,
                lock=lock,
                masked=masked,
            )
            data_arrays[rioda.name] = rioda
        return Dataset(data_arrays)

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    coords = OrderedDict()

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords["band"] = np.asarray(riods.indexes)

    # Get coordinates
    if LooseVersion(rasterio.__version__) < LooseVersion("1.0"):
        transform = riods.affine
    else:
        transform = riods.transform

    if transform.is_rectilinear and parse_coordinates:
        # 1d coordinates
        coords.update(affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    # Attributes
    attrs = _parse_tags(riods.tags(1))
    encoding = dict()
    # Affine transformation matrix (always available)
    # This describes coefficients mapping pixel coordinates to CRS
    # For serialization store as tuple of 6 floats, the last row being
    # always (0, 0, 1) per definition (see
    # https://github.com/sgillies/affine)
    attrs["transform"] = tuple(transform)[:6]
    if hasattr(riods, "nodata") and riods.nodata is not None:
        # The nodata values for the raster bands
        if masked:
            encoding["_FillValue"] = riods.nodata
        else:
            attrs["_FillValue"] = riods.nodata
    if hasattr(riods, "scales"):
        # The scale values for the raster bands
        attrs["scales"] = riods.scales
    if hasattr(riods, "offsets"):
        # The offset values for the raster bands
        attrs["offsets"] = riods.offsets
    if hasattr(riods, "descriptions") and any(riods.descriptions):
        # Descriptions for each dataset band
        attrs["descriptions"] = riods.descriptions
    if hasattr(riods, "units") and any(riods.units):
        # A list of units string for each dataset band
        attrs["units"] = riods.units

    # Parse extra metadata from tags, if supported
    parsers = {"ENVI": _parse_envi}

    driver = riods.driver
    if driver in parsers:
        meta = parsers[driver](riods.tags(ns=driver))

        for k, v in meta.items():
            # Add values as coordinates if they match the band count,
            # as attributes otherwise
            if isinstance(v, (list, np.ndarray)) and len(v) == riods.count:
                coords[k] = ("band", np.asarray(v))
            else:
                attrs[k] = v

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params, masked=masked)
    )

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    da_name = attrs.pop("NETCDF_VARNAME", None)
    result = DataArray(
        data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name
    )
    result.encoding = encoding

    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        from dask.base import tokenize

        # augment the token with the file modification time
        try:
            mtime = os.path.getmtime(filename)
        except OSError:
            # the filename is probably an s3 bucket rather than a regular file
            mtime = None

        if chunks in (True, "auto"):
            from dask.array.core import normalize_chunks
            import dask

            if LooseVersion(dask.__version__) < LooseVersion("0.18.0"):
                msg = (
                    "Automatic chunking requires dask.__version__ >= 0.18.0 . "
                    "You currently have version %s" % dask.__version__
                )
                raise NotImplementedError(msg)
            block_shape = (1,) + riods.block_shapes[0]
            chunks = normalize_chunks(
                chunks=(1, "auto", "auto"),
                shape=(riods.count, riods.height, riods.width),
                dtype=riods.dtypes[0],
                previous_chunks=tuple((c,) for c in block_shape),
            )
        token = tokenize(filename, mtime, chunks)
        name_prefix = "open_rasterio-%s" % token
        result = result.chunk(chunks, name_prefix=name_prefix, token=token)

    # Make the file closeable
    result._file_obj = manager

    return result
Beispiel #4
0
def open_rasterio(
    filename,
    parse_coordinates=None,
    chunks=None,
    cache=None,
    lock=None,
    masked=False,
    variable=None,
    group=None,
    default_name=None,
    **open_kwargs,
):
    """Open a file with rasterio (experimental).

    This should work with any file that rasterio can open (most often:
    geoTIFF). The x and y coordinates are generated automatically from the
    file's geoinformation, shifted to the center of each pixel (see
    `"PixelIsArea" Raster Space
    <http://web.archive.org/web/20160326194152/http://remotesensing.org/geotiff/spec/geotiff2.5.html#2.5.2>`_
    for more information).

    You can generate 2D coordinates from the file's attributes with::

        from affine import Affine
        da = xr.open_rasterio('path_to_file.tif')
        transform = Affine.from_gdal(*da.attrs['transform'])
        nx, ny = da.sizes['x'], da.sizes['y']
        x, y = np.meshgrid(np.arange(nx)+0.5, np.arange(ny)+0.5) * transform


    Parameters
    ----------
    filename: str, rasterio.DatasetReader, or rasterio.WarpedVRT
        Path to the file to open. Or already open rasterio dataset.
    parse_coordinates: bool, optional
        Whether to parse the x and y coordinates out of the file's
        ``transform`` attribute or not. The default is to automatically
        parse the coordinates only if they are rectilinear (1D).
        It can be useful to set ``parse_coordinates=False``
        if your files are very large or if you don't need the coordinates.
    chunks: int, tuple or dict, optional
        Chunk sizes along each dimension, e.g., ``5``, ``(5, 5)`` or
        ``{'x': 5, 'y': 5}``. If chunks is provided, it used to load the new
        DataArray into a dask array. Chunks can also be set to
        ``True`` or ``"auto"`` to choose sensible chunk sizes according to
        ``dask.config.get("array.chunk-size").
    cache: bool, optional
        If True, cache data loaded from the underlying datastore in memory as
        NumPy arrays when accessed to avoid reading from the underlying data-
        store multiple times. Defaults to True unless you specify the `chunks`
        argument to use dask, in which case it defaults to False.
    lock: False, True or threading.Lock, optional
        If chunks is provided, this argument is passed on to
        :py:func:`dask.array.from_array`. By default, a global lock is
        used to avoid issues with concurrent access to the same file when using
        dask's multithreaded backend.
    masked: bool, optional
        If True, read the mask and to set values to NaN. Defaults to False.
    variable: str or list or tuple, optional
        Variable name or names to use to filter loading.
    group: str or list or tuple, optional
        Group name or names to use to filter loading.
    default_name: str, optional
        The name of the data array if none exists. Default is None.
    **open_kwargs: kwargs, optional
        Optional keyword arguments to pass into rasterio.open().

    Returns
    -------
    data : DataArray
        The newly created DataArray.
    """
    parse_coordinates = True if parse_coordinates is None else parse_coordinates
    vrt_params = None
    if isinstance(filename, rasterio.io.DatasetReader):
        filename = filename.name
    elif isinstance(filename, rasterio.vrt.WarpedVRT):
        vrt = filename
        filename = vrt.src_dataset.name
        vrt_params = dict(
            crs=vrt.crs.to_string(),
            resampling=vrt.resampling,
            src_nodata=vrt.src_nodata,
            dst_nodata=vrt.dst_nodata,
            tolerance=vrt.tolerance,
            transform=vrt.transform,
            width=vrt.width,
            height=vrt.height,
            warp_extras=vrt.warp_extras,
        )

    if lock is None:
        lock = RASTERIO_LOCK

    # ensure default for sharing is False
    # ref https://github.com/mapbox/rasterio/issues/1504
    open_kwargs["sharing"] = open_kwargs.get("sharing", False)
    manager = CachingFileManager(
        rasterio.open, filename, lock=lock, mode="r", kwargs=open_kwargs
    )
    riods = manager.acquire()
    # open the subdatasets if they exist
    if riods.subdatasets:
        return _load_subdatasets(
            riods=riods,
            group=group,
            variable=variable,
            parse_coordinates=parse_coordinates,
            chunks=chunks,
            cache=cache,
            lock=lock,
            masked=masked,
        )

    if vrt_params is not None:
        riods = WarpedVRT(riods, **vrt_params)

    if cache is None:
        cache = chunks is None

    # Get bands
    if riods.count < 1:
        raise ValueError("Unknown dims")
    coords = OrderedDict()
    coords["band"] = np.asarray(riods.indexes)

    # parse tags
    attrs, encoding = _get_rasterio_attrs(riods=riods, masked=masked)
    _parse_driver_tags(riods=riods, attrs=attrs, coords=coords)

    # Get geospatial coordinates
    transform = _rio_transform(riods)
    if parse_coordinates and transform.is_rectilinear:
        # 1d coordinates
        coords.update(affine_to_coords(riods.transform, riods.width, riods.height))
    elif parse_coordinates:
        # 2d coordinates
        warnings.warn(
            "The file coordinates' transformation isn't "
            "rectilinear: xarray won't parse the coordinates "
            "in this case. Set `parse_coordinates=False` to "
            "suppress this warning.",
            RuntimeWarning,
            stacklevel=3,
        )

    data = indexing.LazilyOuterIndexedArray(
        RasterioArrayWrapper(manager, lock, vrt_params, masked=masked)
    )

    # this lets you write arrays loaded with rasterio
    data = indexing.CopyOnWriteArray(data)
    if cache and chunks is None:
        data = indexing.MemoryCachedArray(data)

    # create the output data array
    da_name = attrs.pop("NETCDF_VARNAME", default_name)
    result = DataArray(
        data=data, dims=("band", "y", "x"), coords=coords, attrs=attrs, name=da_name
    )
    result.encoding = encoding

    if hasattr(riods, "crs") and riods.crs:
        result.rio.write_crs(riods.crs, inplace=True)

    if chunks is not None:
        result = _prepare_dask(result, riods, filename, chunks)

    # Make the file closeable
    result._file_obj = manager

    return result
Beispiel #5
0
def rasterize_points(
    config=None,
    emission_types={
        "Fuel Consumption [kg]": "Fuel",
        "NOx [kg]": "NOX",
        "CO2 [kg]": "CO2"
    },
    #resolution=(-0.03, 0.05),
    #bbox=[-4, 50, 25, 65],
):
    """
    """
    if config is None:
        with open("config.json") as file:
            config = json.load(file)

    resolution = config["resolution"]
    bbox = config["bounding_box"]

    datapath = os.path.join(
        os.path.expanduser("~"),
        config["intermediate_data"],
        "ship_emissions",
    )

    filepaths = [os.path.join(
        datapath,
        i,
    ) for i in os.listdir(datapath)]
    filepaths.sort()

    # path to store data
    result_data = os.path.join(os.path.expanduser("~"), config["result_data"])

    if not os.path.exists(result_data):
        os.makedirs(result_data)

    # reproject to geo dataframe right LCC
    crs = "epsg:4326"  # LCC "+proj=lcc +lat_1=30 +lat_2=60 +lat_0=55 +lon_0=10 +y_0=1e+06 +x_0=1275000 +a=6370997 +b=6370997 +units=km +no_defs"

    bounding_box = box(bbox[0], bbox[1], bbox[2], bbox[3])

    json_box = mapping(bounding_box)  # minx miny maxx maxy

    json_box["crs"] = {"properties": {"name": crs}}

    geopoly = geometry.Geometry(
        json_box,
        crs=crs,
    )
    geobox = geometry.GeoBox.from_geopolygon(
        geopoly,
        resolution,
        crs=crs,
    )  # resolution y,x

    # geobox.xr_coords() # also get coords as xarrays from geobox
    coords = affine_to_coords(geobox.affine, geobox.width, geobox.height)

    for emission_type in emission_types.keys():
        emissions_per_day = {}
        dates = []
        for file in filepaths:
            df = pd.read_csv(file, index_col=[0],
                             parse_dates=True)  # , nrows=1000000)

            geodf = gpd.GeoDataFrame(
                df,
                crs="epsg:4326",
                geometry=gpd.points_from_xy(df.lon, df.lat),
            )

            if "lcc" in crs:
                geodf = geodf.to_crs(crs)

            arr = rasterize(
                zip(
                    geodf.geometry.apply(mapping).values,
                    geodf[emission_type],
                ),  # colums 7 is co2
                out_shape=(
                    geobox.height,
                    geobox.width,
                ),
                transform=geobox.affine,
                merge_alg=MergeAlg.add,
                all_touched=True,
            )

            date = df.index[
                0].dayofyear  # df.index.date[0].strftime("%Y-%m-%d")
            dates.append(date)
            emissions_per_day[date] = arr

        da = xr.DataArray(
            [i for i in emissions_per_day.values()],
            dims=[
                "time",
                "lat",
                "lon",
            ],
            coords=[
                np.array(dates),
                coords["y"],
                coords["x"],
            ],
        )

        da = da.rename("sum")
        da = da.astype("float64")
        da.attrs = {"units": "kg d-1"}

        da.coords["time"].attrs = {
            "standard_name": "time",
            "calendar": "proleptic_gregorian",
            "units": "days since 2015-01-01",
            "axis": "T",
        }
        da.coords["lon"].attrs = {
            "standard_name": "longnitude",
            "long_name": "longnitude",
            "units": "degrees_east",
            "axis": "X",
        }
        da.coords["lat"].attrs = {
            "standard_name": "latitude",
            "long_name": "latitude",
            "units": "degrees_north",
            "axis": "Y",
        }

        da.to_netcdf(
            os.path.join(result_data, emission_types[emission_type] +
                         ".nc"),  # write to shorter file name
            encoding={
                "lat": {
                    "dtype": "float32"
                },
                "lon": {
                    "dtype": "float32"
                },
                "sum": {
                    "dtype": "float32"
                },
            },
        )