Beispiel #1
0
def add_spatial_joins(df):
    """Add spatial joins of data provided by API, but not needed for network analysis.

    Parameters
    ----------
    df : GeoDataFrame

    Returns
    -------
    GeoDataFrame
        has fields added by spatial joins to other datasets
    """

    ### Protected lands
    print("Joining to protected areas")
    protected = from_geofeather(boundaries_dir / "protected_areas.feather")
    df = spatial_join(df, protected)
    df.OwnerType = df.OwnerType.fillna(-1).astype("int8")
    df.ProtectedLand = df.ProtectedLand.fillna(False).astype("bool")

    ### Priority layers
    print("Joining to priority watersheds")
    priorities = (
        deserialize_df(boundaries_dir / "priorities.feather")
        .set_index("HUC8")
        .rename(columns={"usfs": "HUC8_USFS", "coa": "HUC8_COA", "sgcn": "HUC8_SGCN"})
    )
    df = df.join(priorities, on="HUC8")
    df[priorities.columns] = df[priorities.columns].fillna(0).astype("uint8")

    return df
Beispiel #2
0
def test_points_geofeather_wkt(tmpdir, points_albers_conus_wkt):
    """Confirm that we can round-trip points to / from feather file with a wkt defined CRS"""

    filename = tmpdir / "points_albers_conus.feather"
    to_geofeather(points_albers_conus_wkt, filename)

    df = from_geofeather(filename)
    assert_frame_equal(df, points_albers_conus_wkt)
    assert df.crs == points_albers_conus_wkt.crs
Beispiel #3
0
def main(tile, input_pt_feather):
    # Data Directories
    source_dir = '/Users/arbailey/natcap/idb/data/source/'
    data_dir = '/Users/arbailey/natcap/idb/data/work/mangroves'
    work_dir = os.path.join(data_dir, 'yucatan')

    pt_data_source = os.path.join(work_dir, input_pt_feather)
    out_feather_path = os.path.join(work_dir,
                                    "gliht_srtm_{}.feather".format(tile))

    #--- Load the G-LiHT points
    print("Loading data from: {}".format(pt_data_source))
    start_time = time.time()
    gliht_pts = from_geofeather(os.path.join(work_dir, pt_data_source))
    print("Load time for {0}: {1}".format(pt_data_source,
                                          time_elapsed(start_time)))
    print(gliht_pts.dtypes)
    print(gliht_pts)

    #--- SRTM elevation data
    srtm_source = os.path.join(source_dir, 'srtm/nasa', ".".join(
        (tile, 'SRTMGL1', 'hgt', 'zip')))

    # Clip the points to SRTM raster extent (1 degree tile)
    # gliht_pts_clip = clip_pts_with_raster(gliht_pts[1:100], srtm_source)  # subset for testing
    gliht_pts_clip = clip_pts_with_raster(gliht_pts, srtm_source)

    # Sample the SRTM raster
    gliht_pts_clip = sample_raster(gliht_pts_clip, srtm_source, 'srtm_m')
    print(gliht_pts_clip.dtypes)
    print(gliht_pts_clip)

    # Create unique index value for SRTM raster
    srtm_unique_source = os.path.join(
        work_dir, "{}_{}_{}.{}".format(tile, 'srtm', 'uniqueid', 'tif'))
    make_unique_raster(srtm_source, srtm_unique_source)
    # Sample Unique ID SRTM raster
    gliht_pts_clip = sample_raster(gliht_pts_clip, srtm_unique_source,
                                   'srtm_idx')
    gliht_pts_clip.reset_index(inplace=True)
    print(gliht_pts_clip.dtypes)
    print(gliht_pts_clip)
    # Add columns to show the tile and unique index plus tile
    gliht_pts_clip['tile'] = tile
    gliht_pts_clip['tile_srtmidx'] = gliht_pts_clip[
        'tile'] + '_' + gliht_pts_clip['srtm_idx'].astype(str)
    print(gliht_pts_clip.dtypes)
    print(gliht_pts_clip)

    # Export to Feather format
    print("Exporting to Geofeather format")
    start_time = time.time()
    to_geofeather(gliht_pts_clip, out_feather_path)
    print("Export execution time for {0}: {1}".format(
        out_feather_path, time_elapsed(start_time)))
Beispiel #4
0
def test_polygons_geofeather(tmpdir, polygons_wgs84):
    """Confirm that we can round-trip polygons to / from feather file"""

    filename = tmpdir / "polygons_wgs84.feather"
    to_geofeather(polygons_wgs84, filename)

    assert os.path.exists(filename)

    df = from_geofeather(filename)
    assert_frame_equal(df, polygons_wgs84)
    assert df.crs == polygons_wgs84.crs
Beispiel #5
0
def cut_flowlines_at_barriers(region, barriers):
    """Read in flowlines and joins between segments, cut flowlines at barriers, and return updated
    flowlines, joins, and joins at each of the barriers

    NOTE: loops are dropped from the analysis.

    Parameters
    ----------
    region : str
        ID of region group
    barriers : GeoDataFrame
        Barriers to cut the network

    Returns
    -------
    (GeoDataFrame, DataFrame, DataFrame)
        cut flowlines, updated joins, barrier joins
    """

    ### Read NHD flowlines and joins
    print("Reading flowlines...")
    flowline_start = time()
    flowlines = (from_geofeather(
        nhd_dir / region / "flowlines.feather").set_index(
            "lineID", drop=False).drop(columns=["HUC2"], errors="ignore"))
    joins = deserialize_df(nhd_dir / region / "flowline_joins.feather")

    # Fix data issue; remove on next full run of prepare_flowlines_waterbodies.py
    ix = flowlines.loc[flowlines.loop].index
    joins.loc[joins.upstream_id.isin(ix) | joins.downstream_id.isin(ix),
              "loop"] = True
    joins.loop = joins.loop.fillna(False)

    ix = flowlines.loop == True
    print("Found {:,} loops, dropping...".format(ix.sum()))
    flowlines = flowlines.loc[~ix].copy()
    joins = joins.loc[~joins.loop].copy()

    print("Read {:,} flowlines in {:.2f}s".format(len(flowlines),
                                                  time() - flowline_start))

    ### Cut flowlines at barriers
    cut_start = time()

    # since all other lineIDs use HUC4 prefixes, this should be unique
    # Use the first HUC2 for the region group
    next_segment_id = int(REGION_GROUPS[region][0]) * 1000000 + 1
    flowlines, joins, barrier_joins = cut_flowlines(
        flowlines, barriers, joins, next_segment_id=next_segment_id)

    print("Done cutting flowlines in {:.2f}".format(time() - cut_start))

    return flowlines, joins, barrier_joins
Beispiel #6
0
def test_missing_crs_warning(tmpdir, points_wgs84):
    """Confirm that a warning is raised if the crs file is missing"""

    filename = tmpdir / "points_wgs84.feather"
    to_geofeather(points_wgs84, filename)

    os.remove("{}.crs".format(filename))

    with pytest.warns(UserWarning) as warning:
        df = from_geofeather(filename)
        assert ("coordinate reference system file is missing"
                in warning[0].message.args[0])

        assert df.crs is None
Beispiel #7
0
def test_points_geofeather_proj4(tmpdir, points_albers_conus_proj4):
    """Confirm that we can round-trip points to / from feather file with a proj4 defined CRS"""

    filename = tmpdir / "points_albers_conus.feather"
    to_geofeather(points_albers_conus_proj4, filename)

    df = from_geofeather(filename)
    assert_frame_equal(df, points_albers_conus_proj4)

    # equality comparision fails for CRS object constructed from proj4, even though they are still the same
    if hasattr(df.crs, "to_proj4"):
        assert df.crs.to_proj4() == points_albers_conus_proj4.crs.to_proj4()
    else:
        assert df.crs == points_albers_conus_proj4.crs
def main(raster_source, uniqueid_file, work_dir, input_pt_feather,
         out_feather):

    pt_data_source = os.path.join(work_dir, input_pt_feather)
    out_feather_path = os.path.join(work_dir, out_feather)

    #--- Load the points
    print("Loading data from: {}".format(pt_data_source))
    start_time = time.time()
    in_pts = from_geofeather(os.path.join(work_dir, pt_data_source))
    print("Load time for {0}: {1}".format(pt_data_source,
                                          time_elapsed(start_time)))
    print(in_pts.dtypes)
    print(in_pts)

    # Clip the points to raster extent
    # in_pts_clip = clip_pts_with_raster(in_pts[1:100], raster_source)  # subset for testing
    in_pts_clip = clip_pts_with_raster(in_pts, raster_source)

    # Sample the raster
    in_pts_clip = sample_raster(in_pts_clip, raster_source, 'tncdep_m')
    print(in_pts_clip.dtypes)
    print(in_pts_clip)

    # Create unique index value for SRTM raster
    raster_unique_source = os.path.join(work_dir, uniqueid_file)
    make_unique_raster(raster_source, raster_unique_source)
    # Sample Unique ID SRTM raster
    in_pts_clip = sample_raster(in_pts_clip, raster_unique_source,
                                'tncdep_idx')
    in_pts_clip.reset_index(inplace=True)
    print(in_pts_clip.dtypes)
    print(in_pts_clip)

    # Export to Feather format
    print("Exporting to Geofeather format")
    start_time = time.time()
    to_geofeather(in_pts_clip, out_feather_path)
    print("Export execution time for {0}: {1}".format(
        out_feather_path, time_elapsed(start_time)))
            columns=["miles", "free_miles"])

    serialize_df(barrier_networks.reset_index(drop=False),
                 out_dir / "barriers_network.feather")

### Update network geometries and barrier networks
# Cut networks from upstream regions and paste them into downstream regions
cut_networks = None
for region in cross_region.from_region.unique():
    out_dir = data_dir / "networks" / region / network_type

    ### Update network geometries
    print(
        "Cutting downstream network from upstream region {}...".format(region))

    network = from_geofeather(data_dir / "networks" / region / network_type /
                              "raw/network.feather")

    # select the affected networks
    idx = network.networkID.isin(cross_region.upstream_network)
    cut = network.loc[idx].copy()

    if cut_networks is None:
        cut_networks = cut

    else:
        cut_networks = cut_networks.append(cut, ignore_index=True, sort=False)

    # write the updated network back out
    network = network.loc[~idx].copy()

    print("Serializing updated network...")

### Read in master
print("Reading master...")
df = (
    from_geofeather(barriers_dir / "small_barriers.feather")
    .set_index("id")
    .drop(
        columns=[
            "level_0",
            "index",
            "dup_group",
            "dup_count",
            "dup_log",
            "snap_dist",
            "snap_tolerance",
            "snap_ref_id",
            "snap_log",
            "snapped",
            "ProtectedLand",
            "log",
            "lineID",
            "wbID",
        ],
        errors="ignore",
    )
    .rename(columns={"streamorder": "StreamOrder", "excluded": "Excluded"})
)


# drop any that should be DROPPED (dropped or duplicate) from the analysis
# NOTE: excluded ones are retained but don't have networks
def main(tile, input_pt_feather):

    # Data Directories
    data_dir = '/Users/arbailey/natcap/idb/data/work/mangroves'
    work_dir = os.path.join(data_dir, 'yucatan')
    pt_data_source = os.path.join(work_dir, input_pt_feather)

    out_feather_path = os.path.join(work_dir, "gliht_srtm_mangroves_{}.feather".format(tile))

    # --- Mangrove Max Height raster
    hmax_source = os.path.join(data_dir, 'gmc_hmax95_bahamas_MAR.tif')
    hba_source = os.path.join(data_dir, 'gmc_hba95_bahamas_MAR.tif')

    #--- Load the G-LiHT/SRTM points
    print("Loading data from: {}".format(pt_data_source))
    start_time = time.time()
    gliht_pts = from_geofeather(os.path.join(work_dir, pt_data_source))
    print("Load time for {0}: {1}".format(pt_data_source, time_elapsed(start_time)))
    gliht_pts.drop(columns=['index'], inplace=True)
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # Sample the Canopy Height rasters
    # Max Height - hmax95
    # gliht_pts = sample_raster(gliht_pts[0:100], hmax_source, 'hmax95')
    gliht_pts = sample_raster(gliht_pts, hmax_source, 'hmax95')
    print(gliht_pts.dtypes)
    print(gliht_pts)
    # Weighted Average Height - hba95
    gliht_pts = sample_raster(gliht_pts, hba_source, 'hba95')
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # # Create unique index value for Canopy raster
    gmc_unique_source = os.path.join(work_dir, "gmc_uniqueid.tif")
    # make_unique_raster(hmax_source, gmc_unique_source)  # Takes 1:55:14.79
    #
    # Sample Unique ID raster
    gliht_pts = sample_raster(gliht_pts, gmc_unique_source, 'hmax_idx')
    # gliht_pts.reset_index(inplace=True)
    # gliht_pts.drop(columns=['index'], inplace=True)
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # Add columns to show the tile and unique index plus tile
    gliht_pts['tile'] = tile
    gliht_pts['tile_hmaxidx'] = gliht_pts['tile'] + '_' + gliht_pts['hmax_idx'].astype(str)
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # Mangrove Extent Vector shapefile paths to join to Points
    #-- World Atlas of Mangroves
    wam_path = os.path.join(data_dir, 'wam_Bahamas_MAR.shp')
    wam_att = 'wam'
    wam = mangrove_poly_to_gdf(wam_path, wam_att)
    print(wam)
    gliht_pts = mangrove_join(gliht_pts, wam)
    print(gliht_pts)

    #-- Global Mangrove Watch
    gmw2016_path = os.path.join(data_dir, 'gmw2016_Bahamas_MAR.shp')
    gmw2016_att = 'gmw2016'
    gmw2016 = mangrove_poly_to_gdf(gmw2016_path, gmw2016_att)
    print(gmw2016)
    gliht_pts = mangrove_join(gliht_pts, gmw2016)
    print(gliht_pts)

    # Global Mangrove Forests
    gmf_path = os.path.join(data_dir, 'gmf_bahamas_MAR.shp')
    gmf_att = 'gmf'
    gmf = mangrove_poly_to_gdf(gmf_path, gmf_att)
    print(gmf)
    gliht_pts = mangrove_join(gliht_pts, gmf)
    print(gliht_pts)

    # NAtCap Mangrove compilation for MAR region (Mex, Belize, Guatemala, Honduras)
    ncmar_path = os.path.join(data_dir, 'natcap_mangrovesV4_MAR.shp')
    ncmar_att = 'ncMAR'
    ncmar = mangrove_poly_to_gdf(ncmar_path, ncmar_att)
    print(ncmar)
    gliht_pts = mangrove_join(gliht_pts, ncmar)
    print(gliht_pts)

    print(gliht_pts.dtypes)
    print(gliht_pts.describe())

    # Export to GeoFeather format
    gliht_pts.reset_index(inplace=True)  # get an error from feather export if don't do this
    # ValueError: feather does not support serializing a non-default index for the index; you can .reset_index() to make the index into column(s)
    print(gliht_pts.dtypes)
    print("Exporting to Geofeather format")
    start_time = time.time()
    to_geofeather(gliht_pts, out_feather_path)
    print("Export execution time for {0}: {1}".format(out_feather_path, time_elapsed(start_time)))
Beispiel #12
0
from dash.dependencies import Input, Output, State
import dash_core_components as dcc
import dash_html_components as html
import dash_table
import plotly
import plotly.express as px
from flask import Flask

# API keys and datasets
mb_token = 'pk.eyJ1IjoiamF2aS1hbGZhcm8iLCJhIjoiY2tiMnR0cm5zMDBoejJ4cWNxb3Bzcno5aiJ9.Zh0OEJmyiH27YG4Yw_KLyg'
map_shape = gpd.read_file('./data/slv_adm2/SLV_adm2.shp')
map_shape.columns = map(str.lower, map_shape.columns)
map_shape['codigomunic'] = map_shape.name_2
map_shape['depto'] = map_shape.name_1

gdf = gf.from_geofeather('./data/ehpm19_merged_sample.feather')
gdf.crs = "EPSG:4326"
map_data = gdf.copy()
map_data["lon"] = gdf.centroid.x
map_data["lat"] = gdf.centroid.y

del gdf

# Preparing geojson
map_shape.to_file("./data/esa.json", driver="GeoJSON")

with open('./data/esa.json') as response:
    esa_geoj = json.load(response)

px.set_mapbox_access_token(mb_token)
Beispiel #13
0
### Read in master
print("Reading master...")
df = (from_geofeather(barriers_dir / "dams.feather").set_index("id").drop(
    columns=[
        "level_0",
        "index",
        "dup_group",
        "dup_count",
        "dup_sort",
        "dup_log",
        "snap_dist",
        "snap_tolerance",
        "snap_ref_id",
        "snap_log",
        "snapped",
        "ProtectedLand",
        "NHDPlusID",
        "SourceState",
        "lineID",
        "wbID",
        "waterbody",
        "src",
        "kind",
        "log",
    ],
    errors="ignore",
).rename(columns={
    "streamorder": "StreamOrder",
    "excluded": "Excluded"
}))

# drop any that should be DROPPED (dropped or duplicate) from the analysis
print("{:,} duplicate road crossings".format(len(df) - len(keep_ids)))

df = df.loc[keep_ids].copy()

### Remove crossings that are very close
print("Removing nearby road crossings...")
# consider 5 m nearby
df = mark_duplicates(df, 5)
print("{:,} very close road crossings dropped".format(len(
    df.loc[df.duplicate])))
df = df.loc[~df.duplicate].drop(
    columns=["duplicate", "dup_count", "dup_group"])

### Remove those that otherwise duplicate existing small barriers
print("Removing crossings that duplicate existing barriers")
barriers = from_geofeather(barriers_dir / "master/small_barriers.feather")
barriers = barriers.loc[~barriers.duplicate]
barriers["kind"] = "barrier"

df["joinID"] = (df.index * 1e6).astype("uint32")
df["kind"] = "crossing"

merged = barriers[["kind",
                   "geometry"]].append(df[["joinID", "kind", "geometry"]],
                                       sort=False,
                                       ignore_index=True)
merged = mark_duplicates(merged, tolerance=DUPLICATE_TOLERANCE)

dup_groups = merged.loc[(merged.dup_count > 1)
                        & (merged.kind == "barrier")].dup_group.unique()
remove_ids = merged.loc[merged.dup_group.isin(dup_groups)
Beispiel #15
0
def read_barriers(region, mode):
    """Read files created by prep_dams.py, prep_waterfalls.py, prep_small_barriers.py
    Merge together and assign uniqueID for internal use in network analysis

    NOTE: barriers on loops are dropped

    Parameters
    ----------
    region : str
        region group identifier, e.g., "02"
    mode : str
        One of "natural", "dams", "small_barriers"

    Returns
    -------
    GeoDataFrame
        Merged barriers file
    """

    start = time()

    print("Reading waterfalls")
    wf = from_geofeather(barriers_dir / "waterfalls.feather")
    wf = wf.loc[wf.HUC2.isin(REGION_GROUPS[region])].copy()
    print("Selected {:,} waterfalls".format(len(wf)))

    wf["barrierID"] = WATERFALLS_ID + wf.id
    wf["kind"] = "waterfall"

    barriers = wf

    if mode != "natural":
        print("Reading dams")
        dams = from_geofeather(barriers_dir / "dams.feather")
        dams = dams.loc[dams.HUC2.isin(REGION_GROUPS[region])].copy()
        print("Selected {:,} dams".format(len(dams)))

        dams["barrierID"] = DAMS_ID + dams.id
        dams["kind"] = "dam"

        if len(dams):
            barriers = barriers.append(dams, ignore_index=True, sort=False)

    if mode == "small_barriers":
        print("Reading small barriers")
        sb = from_geofeather(barriers_dir / "small_barriers.feather")
        sb = sb.loc[sb.HUC2.isin(REGION_GROUPS[region])].copy()
        print("Selected {:,} small barriers".format(len(sb)))

        sb["barrierID"] = SB_ID + sb.id
        sb["kind"] = "small_barrier"

        if len(sb):
            barriers = barriers.append(sb, ignore_index=True, sort=False)

    # Update dtypes
    # TODO: not neeed after rerun of prep_*.py scripts
    barriers.id = barriers.id.astype("uint32")
    barriers.lineID = barriers.lineID.astype("uint32")
    barriers.NHDPlusID = barriers.NHDPlusID.astype("uint64")

    barriers.barrierID = barriers.barrierID.astype("uint64")

    ix = barriers.loop == True
    print("Found {:,} barriers on loops, dropping".format(ix.sum()))
    barriers = barriers.loc[~ix].copy()

    print("Extracted {:,} barriers in {:.2f}s".format(len(barriers), time() - start))

    return barriers[
        ["geometry", "id", "lineID", "NHDPlusID", "barrierID", "kind"]
    ].set_index("barrierID", drop=False)
    os.makedirs(tile_dir)

### Read in master
print("Reading master...")
df = (from_geofeather(barriers_dir /
                      "waterfalls.feather").set_index("id").drop(
                          columns=[
                              "level_0",
                              "index",
                              "dup_group",
                              "dup_count",
                              "dup_log",
                              "snap_dist",
                              "snap_tolerance",
                              "snap_log",
                              "snapped",
                              "log",
                              "lineID",
                              "wbID",
                          ],
                          errors="ignore",
                      ).rename(
                          columns={
                              "streamorder": "StreamOrder",
                              "name": "Name",
                              "watercours": "Stream",
                              "gnis_name_": "GNIS_Name",
                          }))

### Fix data type issues
# TODO: move to prep script
df.Name = df.Name.fillna("").str.strip()
Beispiel #17
0
                    parallel_runner, enumerate(chunks)),
                                     total=chunk_count):
                partial_results[index] = chunk

    df_join = pd.concat(partial_results)
    return df_join


if __name__ == "__main__":
    with open(PROCESSED_DATA_SOURCES + TEMP + "TEMP_2019.pkl", 'rb') as f:
        data_TEMP = pickle.load(f)

    crs = {'init': 'EPSG:4326'}
    geometry = [Point(xy) for xy in zip(data_TEMP['x'], data_TEMP['y'])]
    df_points = gpd.GeoDataFrame(data_TEMP, crs=crs, geometry=geometry)
    df_zones = from_geofeather(PROCESSED_DATA_SOURCES + 'Shape_Joined.feather')

    df_joined = run(df_zones, df_points, use_parallel=True, processes=10)
    df_joined = df_joined.reset_index(drop=True)
    aggregations = {
        'NAME_0': 'first',
        'value': 'mean',
        'TYPE_1': 'first',
        'ENGTYPE_1': 'first',
        'GID_0': 'first',
        'GID_1': 'first'
    }
    temp_grouped = df_joined.groupby(['date_range',
                                      'NAME_1']).agg(aggregations)
    temp_grouped.to_csv(PROCESSED_DATA_SOURCES + 'temp_19.csv')