def preprocess_file(ncfile, tbl, product):
    '''Preprocess a single file and write it to the database

    :param ncfile: path to the ncfile to preprocess
    :type ncfile: str
    :param tbl: Table to to write data to
    :type tbl: sqlalchemy.sql.schema.Table
    :param product: The name of the product to load from the files
    :type product: str

    '''
    logger.info("Reading '%s' from file '%s'", product, ncfile)
    scan = s5a.load_ncfile(ncfile, data_variable_name=product)

    logger.info("Filtering %s points by quality of file '%s'", len(scan),
                ncfile)
    scan = s5a.filter_by_quality(scan)

    # Skip file if no points are present after filtering
    if len(scan) == 0:
        logger.warning("No points left after filtering of '%s'", ncfile)
        return

    logger.info("Apply H3 grid to '%s' points of file '%s'", len(scan), ncfile)
    scan = s5a.point_to_h3(scan, resolution=emissionsapi.db.resolution)
    scan = s5a.aggregate_h3(scan)
    scan = s5a.h3_to_point(scan)

    logger.info("Writing %s points from '%s' to database", len(scan), ncfile)
    write_to_database(scan, ncfile, tbl)

    logger.info("Finished writing points from '%s' to database", ncfile)
Example #2
0
def to_pickle(directory: str, resolution: int = 5) -> None:
    """
    save the data as pickled file (.pkl)  after filtering the values to with h3 to a resolution of
    # LEVEL 5. 8.54 * 0.866 is circa the radii of 7.4
    # LEVEL 6. 3.23 * 0.866 is circa the radii of 2.8
    # LEVEL 6. 1.22 * 0.866 is circa the radii of 1.6
    :param directory: str
    :param resolution: int h3 resolution see: https://uber.github.io/h3/#/documentation/core-library/resolution-table
    :return: None
    """
    _files = [file for file in os.listdir(directory) if file.endswith(".nc")]

    data = []

    for file in _files:
        f = os.path.join(directory, file)
        try:
            data.append(s5a.load_ncfile(f))
        except OSError:
            print("OSERROR: {}".format(f))

    data = pd.concat(data, ignore_index=True)

    data = s5a.point_to_h3(data, resolution=resolution)
    data = s5a.aggregate_h3(data)
    data = s5a.h3_to_point(data)

    data.to_pickle(path=os.path.join(directory, "data.pkl"))
    def test_h3(self):
        # Create H3 indices
        d = self.data.copy()
        d = s5a.point_to_h3(d)
        self.assertIn('h3', d.columns)

        # Aggregate
        self.assertEqual(len(s5a.aggregate_h3(d)), 5)

        keys = ['longitude', 'latitude']

        # Check if all longitudes and latitudes are updated
        d = s5a.h3_to_point(d)
        unchanged = (d[keys] - self.data[keys] == 0).sum()
        for key in keys:
            self.assertEqual(unchanged[key], 0)

        # Check if longitude and latitude are added if not exist
        d = d.drop(columns=keys)
        d = s5a.h3_to_point(d)
        for key in keys:
            self.assertIn(key, d.columns)
Example #4
0
import os
import s5a
import glob

no2path = os.path.join(os.path.dirname(os.sys.path[0]), "data/NO2Files/")

print(no2path)

files = os.listdir(no2path)

data = s5a.load_ncfile(no2path + files[0])
dataraw = data
data = s5a.filter_by_quality(data)
data = s5a.point_to_h3(data, resolution=10)
data = s5a.aggregate_h3(data)
data = s5a.h3_to_point(data)

datafiltered = data
# print(data2.head())

import geopandas

geometry = geopandas.points_from_xy(data.longitude, data.latitude)
data = geopandas.GeoDataFrame(data,
                              geometry=geometry,
                              crs={'init': 'epsg:4326'})

world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres'))
worldplot = world.plot(figsize=(10, 5))

robinson_projection = '+a=6378137.0 +proj=robin +lon_0=0 +no_defs'