Beispiel #1
0
def update_original_database(new_data, new_data_dups):
    '''
    Adiciona os dados das últimas 24h 
    ao banco completo de fogo NRT
    '''

    # Lê o banco de dados original em formato feather
    gdf = gpd.read_feather(
        f"{PROJECT_ROOT}/output/feathers/tilesets/bd_completo.feather")
    gdf_dups = gpd.read_feather(
        f"{PROJECT_ROOT}/output/feathers/tilesets/bd_completo_com_duplicatas.feather"
    )

    for datapoints, updates, label in zip([gdf, gdf_dups],
                                          [new_data, new_data_dups],
                                          ["clean", "duplicated"]):

        # Concatena com os novos dados
        datapoints = pd.concat((datapoints, updates))

        # Mantém apenas os dados do último ano
        year = datetime.datetime.now().year

        datapoints["datetime"] = pd.to_datetime(datapoints.data)
        datapoints = datapoints[datapoints.datetime.dt.year == year]
        datapoints = datapoints.drop("datetime", axis=1)

        # Reindexa
        datapoints = datapoints.reset_index(drop=True)

        # Salva como CSV
        if label == "clean":

            datapoints = sanitize_api_duplicates(datapoints, "bd_completo")

            save_csv(datapoints,
                     f"{PROJECT_ROOT}/output/csvs/tilesets/bd_completo.csv")
            save_feather(
                datapoints,
                f"{PROJECT_ROOT}/output/feathers/tilesets/bd_completo.feather")
            save_geojson(
                datapoints,
                f"{PROJECT_ROOT}/output/jsons/tilesets/bd_completo.json")

            # Salva os dados sem duplicatas em uma variável
            gdf = datapoints.copy()

        elif label == "duplicated":

            #save_csv(datapoints, f"{PROJECT_ROOT}/output/csvs/tilesets/bd_completo_com_duplicatas.csv")
            save_feather(
                datapoints,
                f"{PROJECT_ROOT}/output/csvs/tilesets/bd_completo_com_duplicatas.feather"
            )
            #save_geojson(datapoints, f"{PROJECT_ROOT}/output/csvs/tilesets/bd_completo_com_duplicatas.geojson")

            gdf_dups = datapoints.copy()

    return gdf, gdf_dups
def main():

    df_24h = gpd.read_feather(
        f"{PROJECT_ROOT}/output/feathers/tilesets/24h.feather")
    df_7d = gpd.read_feather(
        f"{PROJECT_ROOT}/output/feathers/tilesets/7d.feather")
    full_db = gpd.read_feather(
        f"{PROJECT_ROOT}/output/feathers/tilesets/bd_completo.feather")

    # Cria arquivos com os dados estáticos sobre terras indígenas e unidades de conservação
    print("> Creating land databases")
    update_land_datasets(df_24h, df_7d, full_db)
Beispiel #3
0
    def get_slr(self):
        """Extract SLR for any geometries that overlap bounds where SLR is available

        Returns
        -------
        dict
            {"slr_acres": <acres>, "slr": [<slr_0ft>, <slr_1ft>, ..., <slr_6ft>]}
        """
        slr_bounds = gp.read_feather(
            slr_bounds_filename).geometry.values.data[0]
        ix = pg.intersects(self.geometry, slr_bounds)

        if not ix.sum():
            # No overlap
            return None

        # only extract SLR where there are overlaps
        slr_results = extract_slr_by_geometry(self.shapes[ix],
                                              bounds=pg.total_bounds(
                                                  self.geometry[ix]))
        # None only if no shape mask
        if slr_results is None:
            return None

        slr = [slr_results[i] for i in range(7)]

        return {"slr_acres": slr_results["shape_mask"], "slr": slr}
Beispiel #4
0
def read_file(fpath):

    gdf = gpd.read_feather(fpath)

    #gdf = gdf[ [ "Cod_setor", "populacao_residente", "geometry"] ]

    return gdf
Beispiel #5
0
def summarize_by_huc12(units_df):
    print("Calculating overlap with land ownership and protection")

    ownership = gp.read_feather(
        ownership_filename, columns=["geometry", "FEE_ORGTYP", "GAP_STATUS"])

    index_name = units_df.index.name

    df = intersection(units_df, ownership)

    if not len(df):
        return

    df["acres"] = pg.area(df.geometry_right.values.data) * M2_ACRES

    # drop areas that touch but have no overlap
    df = df.loc[df.acres > 0].copy()

    by_owner = (df[["FEE_ORGTYP", "acres"]].groupby(
        [index_name,
         "FEE_ORGTYP"]).acres.sum().astype("float32").round().reset_index())

    by_protection = (df[["GAP_STATUS", "acres"]].groupby(
        [index_name,
         "GAP_STATUS"]).acres.sum().astype("float32").round().reset_index())

    by_owner.to_feather(ownership_results_filename)
    by_protection.to_feather(protection_results_filename)
def import_df (list_df, list_prop):
    data_dir = file_fct.get_parent_dir(2, 'data')
    
    list_final_df = []
    for df_name, a_prop in zip(list_df, list_prop):
        source_df = read_db_list (a_prop)
        import_path = os.path.normcase(f'{data_dir}/{a_prop}/{source_df.loc[df_name, "sub_dir"]}/{source_df.loc[df_name, "file_name"]}')
        export_format = source_df.loc[df_name, "file_name"].split('.')[-1]
        if source_df.loc[df_name, 'type'] == 'Pandas':
            if export_format == 'csv':
                importing_df = pandas.read_csv(import_path, 
                                             sep=source_df.loc[df_name, 'sep'],
                                             encoding=source_df.loc[df_name, 'encoding'])
            elif export_format == 'json':
                importing_df = pandas.read_json(import_path, orient = "table")
                
        elif source_df.loc[df_name, 'type'] == 'GeoPandas':
            if export_format == 'csv' or export_format == 'shp':
                importing_df = gpd.read_file(import_path)
            elif export_format == 'json' or export_format == 'geojson':
                importing_df = gpd.read_json(import_path, orient = "table")
            elif export_format == 'feather':
                importing_df = gpd.read_feather(import_path)
                
      
        list_final_df.append(importing_df)
        #print(df_name)
        #print(importing_df)

    return list_final_df
Beispiel #7
0
def test_write_read_feather_expand_user():
    gdf = geopandas.GeoDataFrame(geometry=[box(0, 0, 10, 10)], crs="epsg:4326")
    test_file = "~/test_file.feather"
    gdf.to_feather(test_file)
    f_df = geopandas.read_feather(test_file)
    assert_geodataframe_equal(gdf, f_df, check_crs=True)
    os.remove(os.path.expanduser(test_file))
Beispiel #8
0
def aerial_buffer(sample_gdf, layer_dir):
    """
    Aggregate data from layers to sample_gdf according to defined radii
    :param sample_gdf:
    :param layer_dir:
    :return:
    """
    # Test if sample_gdf has crs
    assert sample_gdf.crs is not None, AttributeError ('Assign coordinate reference system to sample_gdf')

    # Test if layers and columns exist within GeoPackage
    gdfs={layer: gpd.read_feather(f'{layer_dir}/{layer}.feather') for layer, cols in tqdm(network_layers.items())}
    for layer, cols in tqdm(network_layers.items()):
        for column in cols:
            assert column in gdfs[layer].columns, ValueError (f'{column} column not found in {layer} layer')

    for layer, cols in tqdm(network_layers.items()):
        for column in cols:
            right_gdf = gdfs[layer]

            # Test if right_gdf has crs
            assert right_gdf.crs is not None, AttributeError (f'Assign coordinate reference system to {layer}')

            # Test if column type is categorical or numerical
            if is_numeric_dtype(right_gdf[column]):
                for radius in radii:
                    sample_gdf = Analyst(sample_gdf).buffer_join(right_gdf.loc[:, [column, 'geometry']], radius)
            else:
                for category in right_gdf[column].unique():
                    filtered = right_gdf[right_gdf[column] == category]
                    filtered[category] = 1
                    for radius in radii:
                        sample_gdf = Analyst(sample_gdf).buffer_join(filtered.loc[:, [category, 'geometry']], radius)
    return sample_gdf
Beispiel #9
0
def summarize_by_huc12(units_df):
    """Calculate spatial join with counties

    Parameters
    ----------
    df : GeoDataFrame
        summary units
    """

    print("Calculating overlap with PARCAs")
    parca = gp.read_feather(parca_filename)

    df = intersection(units_df, parca)
    df["acres"] = pg.area(df.geometry_right.values.data) * M2_ACRES

    # drop areas that touch but have no overlap
    df = df.loc[df.acres > 0].copy()

    # aggregate these back up by ID
    by_parca = (df[[
        "parca_id", "name", "description", "acres"
    ]].groupby(by=[df.index.get_level_values(0), "parca_id"]).agg({
        "name":
        "first",
        "description":
        "first",
        "acres":
        "sum"
    }).reset_index().rename(columns={"level_0": "id"}))
    by_parca.acres = by_parca.acres.astype("float32").round()

    by_parca.to_feather(results_filename)
Beispiel #10
0
def get_input_area_boundary(input_area):
    """Extract and union polygons associated with input area into a single
    boundary (Multi)Polygon.

    Parameters
    ----------
    input_area : str
        id of input area

    Returns
    -------
    (Multi)Polygon
    """
    # have to make valid or we get errors during union for FL
    values = [
        e["value"] for e in INPUT_AREA_VALUES
        if input_area in set(e["id"].split(","))
    ]

    inputs_df = gp.read_feather(bnd_dir / "input_areas.feather")

    bnd = pg.union_all(
        pg.make_valid(
            inputs_df.loc[inputs_df.value.isin(values)].geometry.values.data))

    return bnd
    def get_parca(self):
        parca = gp.read_feather(parca_filename)
        df = intersection(pd.DataFrame({"geometry": self.geometry}), parca)

        if not len(df):
            return None

        df["acres"] = pg.area(df.geometry_right.values.data) * M2_ACRES
        df = df.loc[df.acres > 0].copy()

        # aggregate these back up by ID
        by_parca = (df[[
            "parca_id", "name", "description", "acres"
        ]].groupby(by=[df.index.get_level_values(0), "parca_id"]).agg({
            "name":
            "first",
            "description":
            "first",
            "acres":
            "sum"
        }).reset_index().rename(columns={"level_0": "id"}))
        by_parca.acres = by_parca.acres.astype("float32").round()

        return {
            "parca":
            by_parca[["name", "description",
                      "acres"]].to_dict(orient="records")
        }
Beispiel #12
0
def get_city_count(data, fpath):

    centroids = gpd.read_feather("../output/city_info.feather")

    centroids = centroids[["code_muni", "geometry"]]

    # We need 6 digits IBGE code
    data.city_ibge_code = data.city_ibge_code.str.extract("(\d{6})")

    # Fix for Vitória
    centroids.loc[centroids.code_muni == "320530",
                  "geometry"] = Point([-40.297984, -20.277465])

    centroids = centroids.merge(data,
                                left_on="code_muni",
                                right_on="city_ibge_code")

    centroids = centroids[["geometry", "deaths"]]

    centroids = centroids[centroids.deaths > 0]

    # Round point size
    centroids.geometry = centroids.geometry.apply(
        lambda x: Point([round(coord, 2) for coord in x.coords[0]]))

    centroids.to_file(f"{fpath}/deaths.json", driver='GeoJSON')
Beispiel #13
0
    def _read_to_geodf(
        self,
        path: Union[str, os.PathLike],
    ) -> gpd.GeoDataFrame:

        gdf = gpd.read_feather(path)

        return gdf
    def read_feather(self):
        """市区町村ポリゴンを読み込んでGeoDataFrameを作成する

        Returns:
            GeoDataFrame: 市区町村ポリゴンのGeoDataFrame

        """
        return gpd.read_feather(str(self.city_features_path.resolve()))
Beispiel #15
0
    def _read_multipolygon(self,
                           path: Union[str, os.PathLike],
                           fix: bool = True) -> MultiPolygon:

        multipolygon = MultiPolygon(list(gpd.read_feather(path).geometry))

        if fix:
            multipolygon = self._get_valid_multipolygon(multipolygon)

        return multipolygon
Beispiel #16
0
def read_gdfs(f_path, files):
    gdfs = {}
    for file in files:
        if file is not None:
            file_type = file.split('.')[len(file.split('.')) - 1]
            if file_type == 'feather':
                gdf = gpd.read_feather(f'{f_path}/{file}').to_crs(26910)
            else:
                gdf = gpd.read_file(f'{f_path}/{file}').to_crs(26910)
            gdfs[file] = gdf
    return gdfs
    def get_counties(self):
        counties = gp.read_feather(county_filename)[[
            "geometry", "FIPS", "state", "county"
        ]]

        df = (sjoin(pd.DataFrame({"geometry": self.geometry}), counties)[[
            "FIPS", "state", "county"
        ]].reset_index(drop=True).sort_values(by=["state", "county"]))

        if not len(df):
            return None

        return {"counties": df.to_dict(orient="records")}
Beispiel #18
0
def main():
	
	# Lê os dados necessários
	points_24h = gpd.read_feather(f"{PROJECT_ROOT}/output/feathers/tilesets/24h.feather")

	# Salva os recortes com dados de 24h
	inside_ucs = points_24h[~points_24h.cod_uc.isna()]
	inside_tis = points_24h[~points_24h.cod_ti.isna()]

	ti_most_fire_id = find_place_with_most_fire(points_24h, "cod_ti", position=1)
	ti_most_fire = points_24h[points_24h.cod_ti == ti_most_fire_id]

	uc_most_fire_id = find_place_with_most_fire(points_24h, "cod_uc", position=1)
	uc_most_fire = points_24h[points_24h.cod_uc == uc_most_fire_id]

	# Salva os recortes com dados de 7d
	grid = gpd.read_feather(f"{PROJECT_ROOT}/output/feathers/land_info/grid_20km.feather")
	points_7d = gpd.read_feather(f"{PROJECT_ROOT}/output/feathers/tilesets/7d.feather")

	grid_most_fire_1_id = find_grid_with_most_fire(grid, time="7d", position=1)
	grid_most_fire_1 = points_7d[points_7d.cod_box == grid_most_fire_1_id]

	grid_most_fire_2_id = find_grid_with_most_fire(grid, time="7d", position=2)
	grid_most_fire_2 = points_7d[points_7d.cod_box == grid_most_fire_2_id]

	grid_most_fire_3_id = find_grid_with_most_fire(grid, time="7d", position=3)
	grid_most_fire_3 = points_7d[points_7d.cod_box == grid_most_fire_3_id]

	# Salva os recortes de 24h
	inside_tis.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/24h_tis.json", driver="GeoJSON")
	inside_ucs.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/24h_ucs.json", driver="GeoJSON")
	uc_most_fire.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/24h_uc_most_fire.json", driver="GeoJSON")
	ti_most_fire.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/24h_ti_most_fire.json", driver="GeoJSON")


	# Salva os recortes de 7d
	grid_most_fire_1.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/7d_grid_1.json", driver="GeoJSON")
	grid_most_fire_2.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/7d_grid_2.json", driver="GeoJSON")
	grid_most_fire_3.to_file(f"{PROJECT_ROOT}/output/jsons/tilesets/7d_grid_3.json", driver="GeoJSON")
Beispiel #19
0
def run_query(point):

    
    # Gets information from the user input
    point = parse_input(point)

    # Opens the file with the current count of covid-19 deaths
    target = get_covid_count(measure='deaths')

    cities_info = gpd.read_feather("../output/city_info.feather")

    # Gets the parts of the census tracts with the user data that we need to load
    gdf = find_user_area(point, target)
        
    # Uses a buffer to avoid self-intercepting shapes
    gdf["geometry"] = gdf.geometry.buffer(0)
        
    # Creates a sindex to improve search
    spatial_index = gdf.sindex
        
    # Finds the area that we will need to highlight along with the respective population
    radius_data = find_radius(point, gdf, spatial_index, target)

    # Finds informations about the user city
    city_data = find_user_city(point, target, cities_info)

    # If the user city has less population than covid deaths,
    # the closest city that would vanish is itself
    if city_data["pop_2019"] <= target:
        neighbor_data = city_data.copy()

    # Else, finds the closest city with population smaller to the total deaths
    else:
        neighbor_data = find_neighboring_city(point, target, cities_info)

    # Selects two random capitals to highlight
    capitals_data = choose_capitals(point, city_data["code_muni"], cities_info)

    output = {

        "radius": radius_data,

        "user_city": city_data,

        "neighboring_city": neighbor_data,

        "capitals_to_highlight": capitals_data

    }

    return output
Beispiel #20
0
def test_feather_compression(compression, tmpdir):
    """Using compression options should not raise errors, and should
    return identical GeoDataFrame.
    """

    test_dataset = "naturalearth_lowres"
    df = read_file(get_path(test_dataset))

    filename = os.path.join(str(tmpdir), "test.feather")
    df.to_feather(filename, compression=compression)
    pq_df = read_feather(filename)

    assert isinstance(pq_df, GeoDataFrame)
    assert_geodataframe_equal(df, pq_df)
def summarize_by_huc12(units_df):
    """Calculate spatial join with counties

    Parameters
    ----------
    df : GeoDataFrame
        summary units
    """

    print("Calculating spatial join with counties")
    counties = gp.read_feather(county_filename)
    df = (sjoin(units_df, counties,
                how="inner")[["FIPS", "state",
                              "county"]].reset_index().round())
    df.to_feather(results_filename)
    def get_ownership(self):
        ownership = gp.read_feather(ownership_filename)
        df = intersection(pd.DataFrame({"geometry": self.geometry}), ownership)

        if not len(df):
            return None

        df["acres"] = pg.area(df.geometry_right.values.data) * M2_ACRES
        df = df.loc[df.acres > 0].copy()

        if not len(df):
            return None

        results = dict()

        by_owner = (df[[
            "FEE_ORGTYP", "acres"
        ]].groupby(by="FEE_ORGTYP").acres.sum().astype("float32").to_dict())
        # use the native order of OWNERSHIP to drive order of results
        results["ownership"] = [{
            "label": value["label"],
            "acres": by_owner[key]
        } for key, value in OWNERSHIP.items() if key in by_owner]

        by_protection = (df[[
            "GAP_STATUS", "acres"
        ]].groupby(by="GAP_STATUS").acres.sum().astype("float32").to_dict())
        # use the native order of PROTECTION to drive order of results
        results["protection"] = [{
            "label": value["label"],
            "acres": by_protection[key]
        } for key, value in PROTECTION.items() if key in by_protection]

        by_area = (df[["AREA_NAME", "FEE_OWNER", "acres"]].groupby(
            by=[df.index.get_level_values(0), "AREA_NAME", "FEE_OWNER"
                ]).acres.sum().astype("float32").round().reset_index().rename(
                    columns={
                        "level_0": "id",
                        "AREA_NAME": "name",
                        "FEE_OWNER": "owner"
                    }).sort_values(by="acres", ascending=False))
        # drop very small areas, these are not helpful
        by_area = by_area.loc[by_area.acres >= 1].copy()

        results["protected_areas"] = by_area.head(25).to_dict(orient="records")
        results["num_protected_areas"] = len(by_area)

        return results
Beispiel #23
0
def test_write(tmp_path):
    df = geopandas.read_file(
        geopandas.datasets.get_path("naturalearth_lowres"))
    ddf = dask_geopandas.from_geopandas(df, npartitions=4)

    basedir = tmp_path / "dataset"
    ddf.to_feather(basedir)

    # each partition (4) is written as a feather file
    paths = list(basedir.glob("*.feather"))
    assert len(paths) == 4

    # each individual file is a valid feather file
    result_part0 = geopandas.read_feather(basedir / "part.0.feather")
    result_part0.index.name = None
    assert_geodataframe_equal(result_part0, df.iloc[:45])
def summarize_by_aoi(df, analysis_acres, total_acres):
    """Calculate ranks and areas of overlap within Caribbean Priority Watersheds.

    Parameters
    ----------
    df : GeoDataframe
        area of interest
    analysis_acres : float
        area in acres of area of interest less any area outside SE Blueprint
    total_acres : float
        area in acres of area of interest

        dict
        {
            "priorities": [...],
            "legend": [...],
            "analysis_notes": <analysis_notes>
        }
    """

    car_df = gp.read_feather(caribbean_filename,
                             columns=["geometry", "carrank"])
    df = intersection(df, car_df)
    df["acres"] = pg.area(df.geometry_right.values.data) * M2_ACRES

    # aggregate totals by rank
    by_rank = (df[["carrank", "acres"]].groupby(
        by="carrank").acres.sum().astype("float32").reset_index().sort_values(
            by="carrank"))

    priorities = []
    for ix, row in by_rank.iterrows():
        value = get_rank_value(row.carrank)
        value["acres"] = row.acres
        value["percent"] = 100 * row.acres / analysis_acres

        priorities.append(value)

    # Note: input area remainder deliberately omitted, since all
    # areas outside but close to this input are outside SE Blueprint
    return {
        "priorities": priorities,
        "legend": LEGEND,
        "analysis_notes": get_analysis_notes(),
        "analysis_acres": analysis_acres,
        "total_acres": total_acres,
    }
    def get_slr(self):
        slr_bounds = gp.read_feather(
            slr_bounds_filename).geometry.values.data[0]
        ix = pg.intersects(self.geometry, slr_bounds)

        if not ix.sum():
            # No overlap
            return None

        # only extract SLR where there are overlaps
        slr_results = extract_slr_by_geometry(self.shapes[ix],
                                              bounds=pg.total_bounds(
                                                  self.geometry[ix]))
        # None only if no shape mask
        if slr_results is None:
            return None

        slr = [slr_results[i] for i in range(7)]

        return {"slr_acres": slr_results["shape_mask"], "slr": slr}
Beispiel #26
0
def summarize_by_huc12(geometries):
    """Summarize by HUC12

    Parameters
    ----------
    geometries : Series of pygeos geometries, indexed by HUC12 id
    """

    # find the indexes of the geometries that overlap with SLR bounds; these are the only
    # ones that need to be analyzed for SLR impacts
    slr_bounds = gp.read_feather(slr_bounds_filename).geometry
    tree = pg.STRtree(geometries)
    ix = tree.query(slr_bounds.geometry.values.data[0], predicate="intersects")
    geometries = geometries.iloc[ix].copy()

    if not len(geometries):
        return

    results = []
    index = []
    for huc12, geometry in Bar(
        "Calculating SLR counts for HUC12", max=len(geometries)
    ).iter(geometries.iteritems()):
        zone_results = extract_by_geometry(
            [to_dict(geometry)], bounds=pg.total_bounds(geometry)
        )
        if zone_results is None:
            continue

        index.append(huc12)
        results.append(zone_results)

    df = pd.DataFrame(results, index=index)

    # reorder columns
    df = df[["shape_mask"] + list(df.columns.difference(["shape_mask"]))]
    # extract only areas that actually had SLR pixels
    df = df[df[df.columns[1:]].sum(axis=1) > 0]
    df.columns = [str(c) for c in df.columns]
    df = df.reset_index().rename(columns={"index": "id"}).round()
    df.to_feather(results_filename)
    def get_results(self):
        sa_bnd = gp.read_feather(boundary_filename)

        # if area of interest does not intersect SA boundary, there will be no results
        if not pg.intersects(self.geometry, sa_bnd.geometry.values.data).max():
            return None

        results = {
            "type": "",
            "acres": pg.area(self.geometry).sum() * M2_ACRES,
            "name": self.name,
        }

        blueprint_results = self.get_blueprint()
        if blueprint_results is None:
            return None

        results.update(blueprint_results)

        urban_results = self.get_urban()
        if urban_results is not None:
            results.update(urban_results)

        slr_results = self.get_slr()
        if slr_results is not None:
            results.update(slr_results)

        ownership_results = self.get_ownership()
        if ownership_results is not None:
            results.update(ownership_results)

        county_results = self.get_counties()
        if county_results is not None:
            results.update(county_results)

        parca_results = self.get_parca()
        if parca_results is not None:
            results.update(parca_results)

        return results
Beispiel #28
0
    def get_counties(self):
        """Get county and state names that overlap this area.

        Returns
        -------
        dict
            {"counties": [
                {"FIPS": <FIPS>, "state": <state name>, "county": <county_name>},
                ...
            ]
        """
        counties = gp.read_feather(county_filename)[[
            "geometry", "FIPS", "state", "county"
        ]]

        df = (sjoin(self.gdf, counties)[[
            "FIPS", "state", "county"
        ]].reset_index(drop=True).sort_values(by=["state", "county"]))

        if not len(df):
            return None

        return {"counties": df.to_dict(orient="records")}
nhd_dams["damID"] = nhd_dams.index.copy()
nhd_dams.damID = nhd_dams.damID.astype("uint32")

nhd_dams = nhd_dams.set_index("damID")

merged = None
for huc2 in huc2s:
    region_start = time()

    print(f"----- {huc2} ------")

    dams = nhd_dams.loc[nhd_dams.HUC2 == huc2, ["geometry"]].copy()

    print("Reading flowlines...")
    flowlines = gp.read_feather(
        clean_dir / huc2 / "flowlines.feather",
        columns=["lineID", "loop", "geometry", "sizeclass"],
    ).set_index("lineID")
    joins = pd.read_feather(
        clean_dir / huc2 / "flowline_joins.feather",
        columns=["downstream_id", "upstream_id"],
    )

    ### Find all intersection points with flowlines
    # we do this before looking for adjacent drain points, since there may be
    # multiple flowlines of different networks associated with a given dam

    print(f"Joining {len(dams):,} NHD dams to {len(flowlines):,} flowlines")
    join_start = time()
    dams = (
        pd.DataFrame(
            sjoin_geometry(
data_dir = Path("data")
boundaries_dir = data_dir / "boundaries"
nhd_dir = data_dir / "nhd"
barriers_dir = data_dir / "barriers"
src_dir = barriers_dir / "source"
master_dir = barriers_dir / "master"
snapped_dir = barriers_dir / "snapped"
qa_dir = barriers_dir / "qa"


start = time()


### Read in SARP states and merge
print("Reading dams in SARP states")
df = gp.read_feather(src_dir / "sarp_dams.feather")
print(f"Read {len(df):,} dams in region states")

### Read in non-SARP states and join in
# these are for states that overlap with HUC4s that overlap with SARP states
print(
    "Reading dams that fall outside region states, but within HUC4s that overlap with region states..."
)

outside_df = gp.read_feather(src_dir / "dams_outer_huc4.feather")
# drop any that are in the main dataset, since there are several dams at state lines
outside_df = outside_df.loc[~outside_df.SARPID.isin(df.SARPID.unique())].copy()
print(f"Read {len(outside_df):,} dams outer HUC4s")

df = df.append(outside_df, ignore_index=True, sort=False)