Esempio n. 1
0
def main(in_dir, in_file, out_dir, out_layer):
    # Main Script to convert raster to points and export to Geofeather

    # Define output columns
    out_columns = {
        'x': 'x_utm16n',
        'y': 'y_utm16n',
        'z': 'depth_m',
    }
    # Create point geodataframe from raster
    raster_file_path = os.path.join(in_dir, in_file)
    print("Processing {}".format(raster_file_path))

    start_time = time.time()
    out_gdf = raster_to_points(raster_file_path, out_columns, type='gdf')
    print(out_gdf)
    print("Raster to points conversion time {0}: {1}".format(
        in_file, time_elapsed(start_time)))

    #  Export final Geodataframe to Geofeather format
    print("Exporting to Geofeather format")
    geofeather_path = os.path.join(out_dir, "{}.feather".format(out_layer))
    start_time = time.time()
    to_geofeather(out_gdf, geofeather_path)
    print("Export execution time for {0}: {1}".format(
        geofeather_path, time_elapsed(start_time)))
Esempio n. 2
0
def test_points_read_benchmark(tmpdir, points_wgs84, benchmark):
    """Test performance of reading feather files"""

    filename = tmpdir / "points_wgs84.feather"
    to_geofeather(points_wgs84, filename)

    benchmark(from_geofeather, filename)
Esempio n. 3
0
def save_cut_flowlines(out_dir, flowlines, joins, barrier_joins):
    """Save cut flowline data frames to disk.

    Parameters
    ----------
    out_dir : str
    flowlines : GeoDataFrame
        cut flowlines
    joins : DataFrame
        updated joins
    barrier_joins : DataFrame
        barrier joins
    """

    print("serializing {:,} cut flowlines...".format(len(flowlines)))
    start = time()

    to_geofeather(flowlines.reset_index(drop=True),
                  out_dir / "flowlines.feather")
    serialize_df(joins, out_dir / "flowline_joins.feather", index=False)
    serialize_df(
        barrier_joins.reset_index(drop=True),
        out_dir / "barrier_joins.feather",
        index=False,
    )

    print("Done serializing cut flowlines in {:.2f}s".format(time() - start))
Esempio n. 4
0
def test_points_geofeather_wkt(tmpdir, points_albers_conus_wkt):
    """Confirm that we can round-trip points to / from feather file with a wkt defined CRS"""

    filename = tmpdir / "points_albers_conus.feather"
    to_geofeather(points_albers_conus_wkt, filename)

    df = from_geofeather(filename)
    assert_frame_equal(df, points_albers_conus_wkt)
    assert df.crs == points_albers_conus_wkt.crs
Esempio n. 5
0
def process_files(in_chmdir, in_chmfiles, in_dtmdir, in_dtmfiles, out_dir, out_layer):

    ## -------- DTM ------------
    # Create point geodataframes from DTM tif files in file list
    dtm_columns = {
        'x': 'x_utm16n',
        'y': 'y_utm16n',
        'z': 'z_dtm_m',
    }
    out_dtm_gdfs = []
    for file in in_dtmfiles:
        print("Processing {}".format(file))
        dtm_file_path = os.path.join(in_dtmdir, file)
        dtm_gdf = raster_to_points(dtm_file_path, dtm_columns, type='gdf')
        out_dtm_gdfs.append(dtm_gdf)
    print("{} DTM data frames combined".format(len(out_dtm_gdfs)))

    # Concat all the point DTM gdfs into a single gdf
    print("Concatenating DTM geodataframes")
    concat_dtm_gdf = pd.concat(out_dtm_gdfs, axis=0, ignore_index=True)
    # Add a unique ID for each point
    concat_dtm_gdf['gliht_ptidx'] = concat_dtm_gdf.index + 1
    concat_dtm_gdf['gliht_ptid'] = concat_dtm_gdf[dtm_columns['x']].astype(str) + "_" + concat_dtm_gdf[dtm_columns['y']].astype(str)
    print(concat_dtm_gdf)

    ## --------- CHM ------------
    # Create point geodataframes from CHM tif files in file list
    chm_columns = {
        'x': 'x_utm16n',
        'y': 'y_utm16n',
        'z': 'z_chm_m',
    }
    out_chm_dfs = []
    for file in in_chmfiles:
        print("Processing {}".format(file))
        chm_file_path = os.path.join(in_chmdir, file)
        chm_df = raster_to_points(chm_file_path, chm_columns, type='df')
        out_chm_dfs.append(chm_df)
    print("{} CHM data frames combined".format(len(out_chm_dfs)))

    # Concat all the point CHM gdfs into a single gdf
    print("Concatenating CHM geodataframes")
    concat_chm_df = pd.concat(out_chm_dfs, axis=0, ignore_index=True)
    print(concat_chm_df)

    #----------- Join and Export -------------
    # Join the CHM and the DTM GeoDataframes
    print("Joining CHM and DTM Geodataframes")
    chm_dtm_gdf = pd.merge(concat_dtm_gdf, concat_chm_df, how='left',on=[dtm_columns['x'], dtm_columns['y']])
    print(chm_dtm_gdf)

    #  Export final Geodataframe
    print("Exporting to Geofeather format")
    geofeather_path = os.path.join(dest_dir, "{}.feather".format(out_layer))
    to_geofeather(chm_dtm_gdf, geofeather_path)
Esempio n. 6
0
def main(tile, input_pt_feather):
    # Data Directories
    source_dir = '/Users/arbailey/natcap/idb/data/source/'
    data_dir = '/Users/arbailey/natcap/idb/data/work/mangroves'
    work_dir = os.path.join(data_dir, 'yucatan')

    pt_data_source = os.path.join(work_dir, input_pt_feather)
    out_feather_path = os.path.join(work_dir,
                                    "gliht_srtm_{}.feather".format(tile))

    #--- Load the G-LiHT points
    print("Loading data from: {}".format(pt_data_source))
    start_time = time.time()
    gliht_pts = from_geofeather(os.path.join(work_dir, pt_data_source))
    print("Load time for {0}: {1}".format(pt_data_source,
                                          time_elapsed(start_time)))
    print(gliht_pts.dtypes)
    print(gliht_pts)

    #--- SRTM elevation data
    srtm_source = os.path.join(source_dir, 'srtm/nasa', ".".join(
        (tile, 'SRTMGL1', 'hgt', 'zip')))

    # Clip the points to SRTM raster extent (1 degree tile)
    # gliht_pts_clip = clip_pts_with_raster(gliht_pts[1:100], srtm_source)  # subset for testing
    gliht_pts_clip = clip_pts_with_raster(gliht_pts, srtm_source)

    # Sample the SRTM raster
    gliht_pts_clip = sample_raster(gliht_pts_clip, srtm_source, 'srtm_m')
    print(gliht_pts_clip.dtypes)
    print(gliht_pts_clip)

    # Create unique index value for SRTM raster
    srtm_unique_source = os.path.join(
        work_dir, "{}_{}_{}.{}".format(tile, 'srtm', 'uniqueid', 'tif'))
    make_unique_raster(srtm_source, srtm_unique_source)
    # Sample Unique ID SRTM raster
    gliht_pts_clip = sample_raster(gliht_pts_clip, srtm_unique_source,
                                   'srtm_idx')
    gliht_pts_clip.reset_index(inplace=True)
    print(gliht_pts_clip.dtypes)
    print(gliht_pts_clip)
    # Add columns to show the tile and unique index plus tile
    gliht_pts_clip['tile'] = tile
    gliht_pts_clip['tile_srtmidx'] = gliht_pts_clip[
        'tile'] + '_' + gliht_pts_clip['srtm_idx'].astype(str)
    print(gliht_pts_clip.dtypes)
    print(gliht_pts_clip)

    # Export to Feather format
    print("Exporting to Geofeather format")
    start_time = time.time()
    to_geofeather(gliht_pts_clip, out_feather_path)
    print("Export execution time for {0}: {1}".format(
        out_feather_path, time_elapsed(start_time)))
Esempio n. 7
0
def test_polygons_geofeather(tmpdir, polygons_wgs84):
    """Confirm that we can round-trip polygons to / from feather file"""

    filename = tmpdir / "polygons_wgs84.feather"
    to_geofeather(polygons_wgs84, filename)

    assert os.path.exists(filename)

    df = from_geofeather(filename)
    assert_frame_equal(df, polygons_wgs84)
    assert df.crs == polygons_wgs84.crs
Esempio n. 8
0
def test_points_geofeather_proj4(tmpdir, points_albers_conus_proj4):
    """Confirm that we can round-trip points to / from feather file with a proj4 defined CRS"""

    filename = tmpdir / "points_albers_conus.feather"
    to_geofeather(points_albers_conus_proj4, filename)

    df = from_geofeather(filename)
    assert_frame_equal(df, points_albers_conus_proj4)

    # equality comparision fails for CRS object constructed from proj4, even though they are still the same
    if hasattr(df.crs, "to_proj4"):
        assert df.crs.to_proj4() == points_albers_conus_proj4.crs.to_proj4()
    else:
        assert df.crs == points_albers_conus_proj4.crs
Esempio n. 9
0
def test_missing_crs_warning(tmpdir, points_wgs84):
    """Confirm that a warning is raised if the crs file is missing"""

    filename = tmpdir / "points_wgs84.feather"
    to_geofeather(points_wgs84, filename)

    os.remove("{}.crs".format(filename))

    with pytest.warns(UserWarning) as warning:
        df = from_geofeather(filename)
        assert ("coordinate reference system file is missing"
                in warning[0].message.args[0])

        assert df.crs is None
Esempio n. 10
0
def save_barriers(out_dir, barriers):
    """Save consolidated barriers to disk for QA.

    Parameters
    ----------
    out_dir : str
    barriers : GeoDataFrame
    """

    print("Serializing {:,} barriers...".format(len(barriers)))
    start = time()

    tmp = barriers.reset_index(drop=True)
    to_geofeather(tmp, out_dir / "barriers.feather")
    to_shp(tmp, out_dir / "barriers.shp")

    print("Done serializing barriers in {:.2f}s".format(time() - start))
def main(raster_source, uniqueid_file, work_dir, input_pt_feather,
         out_feather):

    pt_data_source = os.path.join(work_dir, input_pt_feather)
    out_feather_path = os.path.join(work_dir, out_feather)

    #--- Load the points
    print("Loading data from: {}".format(pt_data_source))
    start_time = time.time()
    in_pts = from_geofeather(os.path.join(work_dir, pt_data_source))
    print("Load time for {0}: {1}".format(pt_data_source,
                                          time_elapsed(start_time)))
    print(in_pts.dtypes)
    print(in_pts)

    # Clip the points to raster extent
    # in_pts_clip = clip_pts_with_raster(in_pts[1:100], raster_source)  # subset for testing
    in_pts_clip = clip_pts_with_raster(in_pts, raster_source)

    # Sample the raster
    in_pts_clip = sample_raster(in_pts_clip, raster_source, 'tncdep_m')
    print(in_pts_clip.dtypes)
    print(in_pts_clip)

    # Create unique index value for SRTM raster
    raster_unique_source = os.path.join(work_dir, uniqueid_file)
    make_unique_raster(raster_source, raster_unique_source)
    # Sample Unique ID SRTM raster
    in_pts_clip = sample_raster(in_pts_clip, raster_unique_source,
                                'tncdep_idx')
    in_pts_clip.reset_index(inplace=True)
    print(in_pts_clip.dtypes)
    print(in_pts_clip)

    # Export to Feather format
    print("Exporting to Geofeather format")
    start_time = time.time()
    to_geofeather(in_pts_clip, out_feather_path)
    print("Export execution time for {0}: {1}".format(
        out_feather_path, time_elapsed(start_time)))
Esempio n. 12
0
out_dir = boundaries_dir

bnd = gp.read_file(boundaries_dir / "SARP_boundary_prj.shp")
bnd.sindex

### Process watershed boundaries
### HUC4s that overlap with SARP region is the outer boundary for analysis
huc4 = gp.read_file(boundaries_dir / "HUC4_prj.shp")
huc4.sindex

### Watersheds
### HUC6s - used for basin names
df = gp.read_file(intermediate_dir /
                  "HUC6_prj.shp")[["geometry", "HUC6", "NAME"]]
df.sindex
to_geofeather(df, out_dir / "HUC6.feather")

# Select out within the SARP boundary
in_sarp = gp.sjoin(df, bnd)
df = df.loc[df.HUC6.isin(in_sarp.HUC6)]
to_shp(
    df.reset_index().rename(columns={
        "HUC6": "id",
        "NAME": "name"
    }),
    boundaries_dir / "HUC6_prj.shp",
)

### HUC12s - primary for all spatial joins (other codes can be derived from HUC12)
df = gp.read_file(intermediate_dir /
                  "HUC12_prj.shp")[["geometry", "HUC12", "NAME"]]
Esempio n. 13
0
    # select the affected networks
    idx = network.networkID.isin(cross_region.upstream_network)
    cut = network.loc[idx].copy()

    if cut_networks is None:
        cut_networks = cut

    else:
        cut_networks = cut_networks.append(cut, ignore_index=True, sort=False)

    # write the updated network back out
    network = network.loc[~idx].copy()

    print("Serializing updated network...")
    to_geofeather(network.reset_index(drop=True), out_dir / "network.feather")
    to_shp(network, out_dir / "network.shp")

### Update new networkID and stats into cut networks
cut_networks = (cut_networks[[
    "geometry", "networkID"
]].set_index("networkID").join(
    cross_region.set_index("upstream_network").downstream_network).reset_index(
    ).rename(columns={"index": "networkID"}))
cut_networks.networkID = cut_networks.downstream_network
cut_networks = (cut_networks.drop(
    columns=["downstream_network"]).set_index("networkID").join(network_stats))

### Read in downstream networks, remove the original networks that are merged above, and append in merged ones
for region in cross_region.region.unique():
    print(
    new_upstreams = (cross_huc_joins.join(
        joins.set_index("downstream").downstream_id.rename("new_upstream"),
        on="upstream",
    ).new_upstream.fillna(0).astype("uint32"))
    joins.loc[new_upstreams.index, "upstream_id"] = new_upstreams

    # update new internal joins
    joins.loc[(joins.type == "huc_in") & (joins.upstream_id != 0),
              "type"] = "internal"

    # remove the duplicate downstreams that used to be terminals for their respective HUCs
    joins = joins.loc[~(joins.upstream.isin(cross_huc_joins.upstream) &
                        (joins.type == "terminal"))]

    # remove dead ends
    joins = joins.loc[~((joins.downstream == 0) &
                        (joins.upstream == 0))].copy()

    print("serializing {:,} flowlines to feather".format(len(flowlines)))
    to_geofeather(flowlines, region_dir / "flowlines.feather")
    serialize_df(joins, region_dir / "flowline_joins.feather", index=False)

    print("serializing {:,} waterbodies to feather".format(len(waterbodies)))
    to_geofeather(waterbodies, region_dir / "waterbodies.feather")
    serialize_df(wb_joins,
                 region_dir / "waterbody_flowline_joins.feather",
                 index=False)
    print("Region done in {:.0f}s".format(time() - region_start))

print("Done in {:.2f}s\n============================".format(time() - start))
    "FULLNAME": "Road",
    "GNIS_NAME": "Stream",
    "RDXID": "SARPID"
})
df.SARPID = df.SARPID.astype("uint")
df["id"] = df.index.astype("uint")

# Cleanup fields
df.Stream = df.Stream.str.strip().fillna("")
df.Road = df.Road.str.strip().fillna("")

df.loc[(df.Stream.str.strip().str.len() > 0) &
       (df.Road.str.strip().str.len() > 0),
       "Name"] = (df.Stream + " / " + df.Road)

df.Name = df.Name.fillna("")

### Spatial joins to boundary layers
# NOTE: these are used for summary stats, but not used in most of the rest of the stack
df = add_spatial_joins(df)

### Spatial joins to protected lands and priority watersheds
df = add_protectedland_priorities(df)

print("Adding lat / lon fields")
df = add_lat_lon(df)

to_geofeather(df.reset_index(drop=True), out_dir / "road_crossings.feather")

print("Done in {:.2f}".format(time() - start))

### Sanity check
if df.groupby(level=0).size().max() > 1:
    raise ValueError(
        "Error - there are duplicate barriers in the results.  Check uniqueness of IDs and joins."
    )


### Output results
print("Writing to output files...")


# Full results for SARP
print("Saving full results to feather")
to_geofeather(df.reset_index(), qa_dir / "small_barriers_network_results.feather")

# drop geometry, not needed from here on out
df = df.drop(columns=["geometry"])

print("Saving full results to CSV")
df.to_csv(
    qa_dir / "small_barriers_network_results.csv",
    index_label="id",
    quoting=csv.QUOTE_NONNUMERIC,
)


# Drop any fields we don't need for API or tippecanoe
# save for API
serialize_df(df[SB_API_FIELDS].reset_index(), api_dir / "small_barriers.feather")
Esempio n. 17
0
    PRECIP,
    POP_DEN,
)

def convert_to_df(file_name):
  with ZipFile(file_name, 'r') as zipObj:
     listOfFileNames = zipObj.namelist()
     strip_end = re.search("(.*)_shp.zip",file_name)
     raw_name = strip_end.group(1)
     for fileName in listOfFileNames:
        if "_1" in fileName:
           zipObj.extract(fileName,'temp_shp')
  read_fname= pathlib.Path("temp_shp/"+ raw_name+ "_1.shp") 
  if read_fname.exists(): 
    shp=gpd.read_file(read_fname)
    return shp     

def concat_function(file_zip):
  for variable_name in tqdm(file_zip) :
    shp= convert_to_df(variable_name)
    appended_data.append(shp)

if __name__ == "__main__":
    dem=Helper.read_files_ini_dir(PROCESSED_DATA_SOURCE+SHAPE)
    var_name=set(dem)
    var_name= [item for item in var_name if item.endswith('.zip')]
    appended_data = []
    concat_function(var_name)
    shp_concat = pd.concat(appended_data).reset_index(drop=True)
    to_geofeather(shp_concat, PROCESSED_DATA_SOURCES+'Shape_Joined.feather')
def main(tile, input_pt_feather):

    # Data Directories
    data_dir = '/Users/arbailey/natcap/idb/data/work/mangroves'
    work_dir = os.path.join(data_dir, 'yucatan')
    pt_data_source = os.path.join(work_dir, input_pt_feather)

    out_feather_path = os.path.join(work_dir, "gliht_srtm_mangroves_{}.feather".format(tile))

    # --- Mangrove Max Height raster
    hmax_source = os.path.join(data_dir, 'gmc_hmax95_bahamas_MAR.tif')
    hba_source = os.path.join(data_dir, 'gmc_hba95_bahamas_MAR.tif')

    #--- Load the G-LiHT/SRTM points
    print("Loading data from: {}".format(pt_data_source))
    start_time = time.time()
    gliht_pts = from_geofeather(os.path.join(work_dir, pt_data_source))
    print("Load time for {0}: {1}".format(pt_data_source, time_elapsed(start_time)))
    gliht_pts.drop(columns=['index'], inplace=True)
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # Sample the Canopy Height rasters
    # Max Height - hmax95
    # gliht_pts = sample_raster(gliht_pts[0:100], hmax_source, 'hmax95')
    gliht_pts = sample_raster(gliht_pts, hmax_source, 'hmax95')
    print(gliht_pts.dtypes)
    print(gliht_pts)
    # Weighted Average Height - hba95
    gliht_pts = sample_raster(gliht_pts, hba_source, 'hba95')
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # # Create unique index value for Canopy raster
    gmc_unique_source = os.path.join(work_dir, "gmc_uniqueid.tif")
    # make_unique_raster(hmax_source, gmc_unique_source)  # Takes 1:55:14.79
    #
    # Sample Unique ID raster
    gliht_pts = sample_raster(gliht_pts, gmc_unique_source, 'hmax_idx')
    # gliht_pts.reset_index(inplace=True)
    # gliht_pts.drop(columns=['index'], inplace=True)
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # Add columns to show the tile and unique index plus tile
    gliht_pts['tile'] = tile
    gliht_pts['tile_hmaxidx'] = gliht_pts['tile'] + '_' + gliht_pts['hmax_idx'].astype(str)
    print(gliht_pts.dtypes)
    print(gliht_pts)

    # Mangrove Extent Vector shapefile paths to join to Points
    #-- World Atlas of Mangroves
    wam_path = os.path.join(data_dir, 'wam_Bahamas_MAR.shp')
    wam_att = 'wam'
    wam = mangrove_poly_to_gdf(wam_path, wam_att)
    print(wam)
    gliht_pts = mangrove_join(gliht_pts, wam)
    print(gliht_pts)

    #-- Global Mangrove Watch
    gmw2016_path = os.path.join(data_dir, 'gmw2016_Bahamas_MAR.shp')
    gmw2016_att = 'gmw2016'
    gmw2016 = mangrove_poly_to_gdf(gmw2016_path, gmw2016_att)
    print(gmw2016)
    gliht_pts = mangrove_join(gliht_pts, gmw2016)
    print(gliht_pts)

    # Global Mangrove Forests
    gmf_path = os.path.join(data_dir, 'gmf_bahamas_MAR.shp')
    gmf_att = 'gmf'
    gmf = mangrove_poly_to_gdf(gmf_path, gmf_att)
    print(gmf)
    gliht_pts = mangrove_join(gliht_pts, gmf)
    print(gliht_pts)

    # NAtCap Mangrove compilation for MAR region (Mex, Belize, Guatemala, Honduras)
    ncmar_path = os.path.join(data_dir, 'natcap_mangrovesV4_MAR.shp')
    ncmar_att = 'ncMAR'
    ncmar = mangrove_poly_to_gdf(ncmar_path, ncmar_att)
    print(ncmar)
    gliht_pts = mangrove_join(gliht_pts, ncmar)
    print(gliht_pts)

    print(gliht_pts.dtypes)
    print(gliht_pts.describe())

    # Export to GeoFeather format
    gliht_pts.reset_index(inplace=True)  # get an error from feather export if don't do this
    # ValueError: feather does not support serializing a non-default index for the index; you can .reset_index() to make the index into column(s)
    print(gliht_pts.dtypes)
    print("Exporting to Geofeather format")
    start_time = time.time()
    to_geofeather(gliht_pts, out_feather_path)
    print("Export execution time for {0}: {1}".format(out_feather_path, time_elapsed(start_time)))
Esempio n. 19
0
            # convert to LineString from MultiLineString
            idx = df.loc[df.geometry.type == "MultiLineString"].index
            df.loc[idx,
                   "geometry"] = df.loc[idx].geometry.apply(lambda g: g[0])

            df.geometry = df.geometry.apply(to2D)
            df = df.to_crs(CRS)

            df.FType = df.FType.astype("uint16")
            df.FCode = df.FCode.astype("uint16")
            df["HUC2"] = HUC2

            if merged is None:
                merged = df
            else:
                merged = merged.append(df, ignore_index=True, sort=False)

print("Extracted {:,} NHD lines".format(len(merged)))
df = merged.reset_index(drop=True)

# add our own ID,
df["id"] = df.index.values.copy()
df.id = (df.id + 1).astype("uint32")

print("Serializing {:,} lines...".format(len(df)))
to_geofeather(df, out_dir / "nhd_lines.feather")
to_shp(df, out_dir / "nhd_lines.shp")

print("Done in {:.2f}s\n============================".format(time() - start))
Esempio n. 20
0
df = merged

print("Projecting dams...")
# Drop dams without locations and project
df = df.loc[df.geometry.notnull()].copy().to_crs(CRS)

print("Merged {:,} dams in SARP states".format(len(df)))

missing_sarpid = df.loc[df.SARPID.isnull()]
if len(missing_sarpid):
    print(
        "--------------------------\nWARNING: {:,} dams are missing SARPID\n----------------------------"
        .format(len(missing_sarpid)))

to_geofeather(df, out_dir / "sarp_dams.feather")

### Download manually snapped dams
download_start = time()
print("---- Downloading Snapped Dams ----")
df = download_fs(
    SNAPPED_URL,
    fields=["SARPID", "ManualReview"],
    token=token,
    target_wkid=TARGET_WKID,
)

print("Projecting manually snapped dams...")
df = df.loc[df.geometry.notnull()].to_crs(CRS)

print("Downloaded {:,} snapped dams in {:.2f}s".format(len(df),
Esempio n. 21
0
### Calculate tiers for the region and by state
df = calculate_tiers(df, prefix="SE")
df = calculate_tiers(df, group_field="State", prefix="State")

### Sanity check
if df.groupby(level=0).size().max() > 1:
    raise ValueError(
        "Error - there are duplicate barriers in the results.  Check uniqueness of IDs and joins."
    )

### Output results
print("Writing to output files...")

# Full results for SARP
print("Saving full results to feather")
to_geofeather(df.reset_index(), qa_dir / "dams_network_results.feather")

# drop geometry, not needed from here on out
df = df.drop(columns=["geometry"])

print("Saving full results to CSV")
df.to_csv(qa_dir / "dams_network_results.csv",
          index_label="id",
          quoting=csv.QUOTE_NONNUMERIC)

# save for API
serialize_df(df[DAM_API_FIELDS].reset_index(), api_dir / "dams.feather")

# Drop fields that can be calculated on frontend
keep_fields = [
    c for c in DAM_API_FIELDS if not c in {"GainMiles", "TotalNetworkMiles"}
            df = gp.read_file(gdb, layer="NHDPoint")
            df.NHDPlusID = df.NHDPlusID.astype("uint64")

            df = df.loc[df.FType.isin(KEEP_FTYPES)][KEEP_COLS].copy()

            df.geometry = df.geometry.apply(to2D)
            df = df.to_crs(CRS)

            df.FType = df.FType.astype("uint16")
            df.FCode = df.FCode.astype("uint16")
            df["HUC2"] = HUC2

            if merged is None:
                merged = df
            else:
                merged = merged.append(df, ignore_index=True, sort=False)

print("Extracted {:,} NHD Points".format(len(merged)))
df = merged.reset_index(drop=True)

# add our own ID,
df["id"] = df.index.values.copy()
df.id = (df.id + 1).astype("uint32")

print("Serializing {:,} points...".format(len(df)))
to_geofeather(df, out_dir / "nhd_points.feather")
to_shp(df, out_dir / "nhd_points.shp")

print("Done in {:.2f}s\n============================".format(time() - start))
            nhd_dir / "clean" / region / "waterbodies.feather"
            for region in REGION_GROUPS
        ],
        src=[region for region in REGION_GROUPS],
    )
    .rename(columns={"src": "region"})
    .reset_index(drop=True)
)
print("Read {:,} waterbodies".format(len(wb)))

# TEMP: can remove on next full run of prepare_flowlines_waterbodies.feather
wb.wbID = wb.wbID.astype("uint32")


print("Serializing waterbodies...")
to_geofeather(wb, out_dir / "waterbodies.feather")


print("Reading waterbody drain points...")
drains = deserialize_gdfs(
    [
        nhd_dir / "clean" / region / "waterbody_drain_points.feather"
        for region in REGION_GROUPS
    ],
    src=[region for region in REGION_GROUPS],
).reset_index(drop=True)

print("Read {:,} waterbody drain points".format(len(drains)))

### Deduplicate and assign to the next segment downstream where there are multiple segments intersecting
joins = deserialize_dfs(
Esempio n. 24
0
            df = gp.read_file(gdb, layer="NHDArea")
            df.NHDPlusID = df.NHDPlusID.astype("uint64")

            df = df.loc[df.FType.isin(KEEP_FTYPES)][KEEP_COLS].copy()

            df.geometry = df.geometry.apply(to2D)
            df = df.to_crs(CRS)

            df.FType = df.FType.astype("uint16")
            df.FCode = df.FCode.astype("uint16")
            df["HUC2"] = HUC2

            if merged is None:
                merged = df
            else:
                merged = merged.append(df, ignore_index=True, sort=False)

print("Extracted {:,} NHD areas".format(len(merged)))
df = merged.reset_index(drop=True)

# add our own ID,
df["id"] = df.index.values.copy()
df.id = (df.id + 1).astype("uint32")

print("Serializing {:,} areas...".format(len(df)))
to_geofeather(df, out_dir / "nhd_areas.feather")
to_shp(df, out_dir / "nhd_areas.shp")

print("Done in {:.2f}s\n============================".format(time() - start))