Python read_csv Beispiele, swisslandstats.read_csv Python Beispiele

Beispiel #1

0

Datei anzeigen

def test_geometry():
    import geopandas as gpd
    from shapely.geometry import Polygon

    ldf = sls.read_csv("tests/input_data/dataset.csv")

    # geopandas exports
    gser = ldf.get_geoseries()
    assert type(gser) == gpd.GeoSeries
    assert len(ldf) == len(gser)
    assert ldf.crs == gser.crs

    gdf = ldf.to_geodataframe()
    assert type(gdf) == gpd.GeoDataFrame
    assert len(ldf) == len(gdf)
    assert ldf.x_column not in gdf.columns and ldf.y_column not in gdf.columns
    assert ldf.crs == gdf.crs

    # clip methods
    geometry = Polygon([(0, 0), (0, 150), (150, 150), (150, 0)])

    clipped_ldf = ldf.clip_by_geometry(geometry)
    assert len(clipped_ldf) == 1

    clipped_ldf = ldf.clip_by_nominatim("Lausanne, Switzerland")
    assert len(clipped_ldf) == 0

Beispiel #2

0

Datei anzeigen

Datei: test_sls.py Projekt: weikang9009/swisslandstats-geopy

def test_slsdataframe():
    import tempfile
    import matplotlib.pyplot as plt
    plt.switch_backend('agg')  # only for testing purposes
    import numpy as np
    import pandas as pd

    ldf = sls.read_csv('tests/input_data/dataset.csv')

    assert np.all(
        ldf.to_ndarray('AS09_4') == np.arange(4, dtype=np.uint8).reshape(2, 2))
    ldf.to_geotiff(tempfile.TemporaryFile(), 'AS09_4')

    assert isinstance(ldf.plot('AS09_4', cmap=sls.noas04_4_cmap, legend=True),
                      plt.Axes)

    assert type(ldf[[ldf.x_column, ldf.y_column,
                     'AS09_4']]) == sls.LandDataFrame
    assert type(ldf[[ldf.x_column, 'AS09_4']]) == pd.DataFrame
    assert type(ldf['AS09_4']) == pd.Series

    # create dataframe with another dummy land statistics column, but with one
    # row less and test merge (should fill the missing row with a nan)
    ldf2 = ldf.copy()
    ldf2['AS85_4'] = pd.Series(1, index=ldf.index[:-1], name='AS85_4')
    ldf2 = ldf2.drop('AS09_4', axis=1)
    merged_ldf = ldf.merge(ldf2)

    assert 'AS85_4' in merged_ldf.columns.difference(ldf.columns)
    # to test for the presence of nan: merged_ldf['AS85_4'].isna().any()
    assert np.sum(merged_ldf['AS85_4'].isna()) == 1

    # test that `get_transform` returns a different transform if, e.g., we
    # change the min x or max y value
    assert ldf.get_transform() != ldf.iloc[:2].get_transform()

Beispiel #3

0

Datei anzeigen

def main(sls_filepath, gmb_filepath, agglomeration_slug, extract_filepath):
    logger = logging.getLogger(__name__)
    logger.info(f'preparing agglomeration extracts for {agglomeration_slug}')

    # read the land dataframe from the SLS dataset
    ldf = sls.read_csv(sls_filepath)

    # get the municipal boundaries that configure the agglomeration
    # ACHTUNG: we use contains because of `ANAME` codes such as "Basel (CH)"
    # and "Basel (CH/DE/FR)"
    gdf = gpd.read_file(gmb_filepath)
    agglomeration_geom = gdf[gdf['ANAME'].apply(slugify).str.contains(
        agglomeration_slug)]['geometry'].unary_union

    # crop the land dataframe to the extent of the agglomeration boundaries
    agglomeration_ldf = ldf.clip_by_geometry(agglomeration_geom,
                                             geometry_crs=gdf.crs)
    logger.info('cropped dataset to the agglomeration extent ('
                f'{len(agglomeration_ldf)} pixels)')

    # reclassify
    extracts_urban = settings.EXTRACTS_URBAN
    extracts_nonurban = settings.EXTRACTS_NONURBAN
    extracts_nodata = settings.EXTRACTS_NODATA

    def urban_reclassify_sls(class_val):
        # function to apply column-wise to a 4-class (urban, agricultural,
        # forest, unproductive) column of a `sls.LandDataFrame`, i.e.,
        # 'AS85R_4', 'AS97R_4', 'AS09R_4' or 'AS18_4'
        if class_val == 1:
            return extracts_urban
        elif class_val in [2, 3]:
            return extracts_nonurban
        else:  # nodata and unproductive use (e.g., water)
            return extracts_nodata

    main_domains_columns = ldf.columns[ldf.columns.str.startswith('AS')
                                       & ldf.columns.str.endswith('_4')]
    for main_domains_column in main_domains_columns:
        # replace the '4' by a '2' (two classes: urban and non-urban)
        urban_nonurban_column = main_domains_column[:-1] + '2'
        agglomeration_ldf[urban_nonurban_column] = agglomeration_ldf[
            main_domains_column].apply(urban_reclassify_sls)
    logger.info(
        f'reclassified columns {main_domains_columns} into urban/non-urban')

    # we will not dump (to the extract csv file) the more fine-grained land
    # use columns - we will just dump the urban/non-urban one (but we still
    # need to dump the other non-land use columns with the coordinates and
    # year data)
    agglomeration_ldf[agglomeration_ldf.
                      columns[~agglomeration_ldf.columns.str.startswith('AS')
                              | agglomeration_ldf.columns.str.
                              endswith('2')]].to_csv(extract_filepath)
    logger.info('saved dump of land dataframe extract for '
                f'{agglomeration_slug} to {extract_filepath} ')

Beispiel #4

0

Datei anzeigen

def main(statpop_filepath, agglom_extent_filepath, dst_filepath,
         vulnerable_columns, buffer_dist):
    logger = logging.getLogger(__name__)

    gdf = gpd.read_file(agglom_extent_filepath)
    ldf = sls.read_csv(statpop_filepath,
                       x_column='E_KOORD',
                       y_column='N_KOORD').clip_by_geometry(
                           gdf['geometry'].iloc[0].buffer(buffer_dist),
                           gdf.crs)

    if vulnerable_columns is None:
        vulnerable_columns = VULNERABLE_COLUMNS
    ldf['vulnerable'] = ldf[vulnerable_columns].sum(axis=1)

    ldf.to_geotiff(dst_filepath, 'vulnerable')
    logger.info("dumped vulnerable population raster to %s", dst_filepath)

Beispiel #5

0

Datei anzeigen

def test_slsdataframe():
    import tempfile

    import matplotlib.pyplot as plt
    import numpy as np
    import osmnx as ox
    import pandas as pd
    import xarray as xr
    from rasterio.crs import CRS

    plt.switch_backend("agg")  # only for testing purposes

    # test instantiation
    for crs in [None, "epsg:2056", CRS.from_string("epsg:2056")]:
        assert isinstance(
            sls.read_csv("tests/input_data/dataset.csv").crs, CRS)

    # test basic features and pandas-like transformations
    ldf = sls.read_csv("tests/input_data/dataset.csv")
    assert np.all(
        ldf.to_ndarray("AS09_4") == np.arange(4, dtype=np.uint8).reshape(2, 2))
    ldf.to_geotiff(tempfile.TemporaryFile(), "AS09_4")

    # test plots
    assert isinstance(ldf.plot("AS09_4", cmap=sls.noas04_4_cmap, legend=True),
                      plt.Axes)
    # test noas04_4_cmap. TODO: DRY this test??
    arr = ldf.to_ndarray("AS18_4")
    # if we do not use the `norm` arg and there is no "nodata" value in our land data
    # frame, the "nodata" color will actually be assigned to an actual valid color
    ax_no_norm = ldf.plot("AS18_4", cmap=sls.noas04_4_cmap)
    im_no_norm = ax_no_norm.get_images()[0]
    assert np.all(
        im_no_norm.cmap(im_no_norm.norm(np.unique(arr)))[0] ==
        sls.plotting._nodata_color)
    # instead, when we use the `norm` arg, the colors are properly assigned
    ax_norm = ldf.plot("AS18_4",
                       cmap=sls.noas04_4_cmap,
                       norm=sls.noas04_4_norm)
    im_norm = ax_norm.get_images()[0]
    assert np.all(
        im_norm.cmap(im_norm.norm(np.unique(arr)))[0] !=
        sls.plotting._nodata_color)

    # test data frame types
    assert type(ldf[[ldf.x_column, ldf.y_column,
                     "AS09_4"]]) == sls.LandDataFrame
    assert type(ldf[[ldf.x_column, "AS09_4"]]) == pd.DataFrame
    assert type(ldf["AS09_4"]) == pd.Series

    # create dataframe with another dummy land statistics column, but with one
    # row less and test merge (should fill the missing row with a nan)
    ldf2 = ldf.copy()
    ldf2["AS85_4"] = pd.Series(1, index=ldf.index[:-1], name="AS85_4")
    ldf2 = ldf2.drop("AS09_4", axis=1)
    merged_ldf = ldf.merge(ldf2)

    assert "AS85_4" in merged_ldf.columns.difference(ldf.columns)
    # to test for the presence of nan: merged_ldf['AS85_4'].isna().any()
    assert np.sum(merged_ldf["AS85_4"].isna()) == 1

    # test that `get_transform` returns a different transform if, e.g., we
    # change the min x or max y value
    assert ldf.get_transform() != ldf.iloc[:2].get_transform()

    # test export to xarray
    ldf = sls.read_csv("tests/input_data/dataset.csv")
    ldf["AS85_4"] = pd.Series(1, index=ldf.index[:-1], name="AS85_4")
    columns = ["AS85_4", "AS09_4"]
    assert isinstance(ldf.to_xarray(columns), xr.DataArray)
    # test that columns are the outermost dimension
    assert ldf.to_xarray(columns).shape[0] == len(columns)
    num_cols = 1
    assert ldf.to_xarray(columns[:num_cols]).shape[0] == num_cols
    # test that the name of the outermost dimension is set
    dim_name = "survey"
    da = ldf.to_xarray(columns, dim_name=dim_name)
    assert dim_name in da.dims
    assert np.all(da.coords[dim_name] == columns)
    # test that the data array metadata is set
    nodata = 255
    attrs = ldf.to_xarray(columns, nodata=nodata).attrs
    assert attrs["nodata"] == nodata
    assert "pyproj_srs" in attrs
    # test that the data array has the proper dtype
    dtype = "uint16"
    assert ldf.to_xarray(columns, dtype=dtype).dtype == dtype