def sliver_line(): """Create a line that will create a point when clipped.""" linea = LineString([(10, 5), (13, 5), (15, 5)]) lineb = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)]) return GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:4326")
def test_suffixes(self, how: str, lsuffix: str, rsuffix: str, expected_cols): left = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]}) right = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]}) joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix) assert set(joined.columns) == expected_cols | set(("geometry", ))
class TestSpatialJoin: @pytest.mark.parametrize( "how, lsuffix, rsuffix, expected_cols", [ ("left", "left", "right", {"col_left", "col_right", "index_right" }), ("inner", "left", "right", {"col_left", "col_right", "index_right"}), ("right", "left", "right", {"col_left", "col_right", "index_left" }), ("left", "lft", "rgt", {"col_lft", "col_rgt", "index_rgt"}), ("inner", "lft", "rgt", {"col_lft", "col_rgt", "index_rgt"}), ("right", "lft", "rgt", {"col_lft", "col_rgt", "index_lft"}), ], ) def test_suffixes(self, how: str, lsuffix: str, rsuffix: str, expected_cols): left = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]}) right = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]}) joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix) assert set(joined.columns) == expected_cols | set(("geometry", )) @pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True) def test_crs_mismatch(self, dfs): index, df1, df2, expected = dfs df1.crs = "epsg:4326" with pytest.warns(UserWarning, match="CRS mismatch between the CRS"): sjoin(df1, df2) @pytest.mark.parametrize("dfs", ["default-index"], indirect=True) @pytest.mark.parametrize("op", ["intersects", "contains", "within"]) def test_deprecated_op_param(self, dfs, op): _, df1, df2, _ = dfs with pytest.warns(FutureWarning, match="`op` parameter is deprecated"): sjoin(df1, df2, op=op) @pytest.mark.parametrize("dfs", ["default-index"], indirect=True) @pytest.mark.parametrize("op", ["intersects", "contains", "within"]) @pytest.mark.parametrize("predicate", ["contains", "within"]) def test_deprecated_op_param_nondefault_predicate(self, dfs, op, predicate): _, df1, df2, _ = dfs match = "use the `predicate` parameter instead" if op != predicate: warntype = UserWarning match = ("`predicate` will be overriden by the value of `op`" + r"(.|\s)*" + match) else: warntype = FutureWarning with pytest.warns(warntype, match=match): sjoin(df1, df2, predicate=predicate, op=op) @pytest.mark.parametrize("dfs", ["default-index"], indirect=True) def test_unknown_kwargs(self, dfs): _, df1, df2, _ = dfs with pytest.raises( TypeError, match= r"sjoin\(\) got an unexpected keyword argument 'extra_param'", ): sjoin(df1, df2, extra_param="test") @pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning") @pytest.mark.parametrize( "dfs", [ "default-index", "string-index", "named-index", "multi-index", "named-multi-index", ], indirect=True, ) @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"]) @pytest.mark.parametrize("predicate_kw", ["predicate", "op"]) def test_inner(self, predicate, predicate_kw, dfs): index, df1, df2, expected = dfs res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate}) exp = expected[predicate].dropna().copy() exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"}) exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64") if index == "default-index": exp[["index_left", "index_right"]] = exp[["index_left", "index_right"]].astype("int64") if index == "named-index": exp[["df1_ix", "df2_ix"]] = exp[["df1_ix", "df2_ix"]].astype("int64") exp = exp.set_index("df1_ix").rename( columns={"df2_ix": "index_right"}) if index in ["default-index", "string-index"]: exp = exp.set_index("index_left") exp.index.name = None if index == "multi-index": exp = exp.set_index(["level_0_x", "level_1_x" ]).rename(columns={ "level_0_y": "index_right0", "level_1_y": "index_right1" }) exp.index.names = df1.index.names if index == "named-multi-index": exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(columns={ "df2_ix1": "index_right0", "df2_ix2": "index_right1" }) exp.index.names = df1.index.names assert_frame_equal(res, exp) @pytest.mark.parametrize( "dfs", [ "default-index", "string-index", "named-index", "multi-index", "named-multi-index", ], indirect=True, ) @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"]) def test_left(self, predicate, dfs): index, df1, df2, expected = dfs res = sjoin(df1, df2, how="left", predicate=predicate) if index in ["default-index", "string-index"]: exp = expected[predicate].dropna(subset=["index_left"]).copy() elif index == "named-index": exp = expected[predicate].dropna(subset=["df1_ix"]).copy() elif index == "multi-index": exp = expected[predicate].dropna(subset=["level_0_x"]).copy() elif index == "named-multi-index": exp = expected[predicate].dropna(subset=["df1_ix1"]).copy() exp = exp.drop("geometry_y", axis=1).rename(columns={"geometry_x": "geometry"}) exp["df1"] = exp["df1"].astype("int64") if index == "default-index": exp["index_left"] = exp["index_left"].astype("int64") # TODO: in result the dtype is object res["index_right"] = res["index_right"].astype(float) elif index == "named-index": exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64") exp = exp.set_index("df1_ix").rename( columns={"df2_ix": "index_right"}) if index in ["default-index", "string-index"]: exp = exp.set_index("index_left") exp.index.name = None if index == "multi-index": exp = exp.set_index(["level_0_x", "level_1_x" ]).rename(columns={ "level_0_y": "index_right0", "level_1_y": "index_right1" }) exp.index.names = df1.index.names if index == "named-multi-index": exp = exp.set_index(["df1_ix1", "df1_ix2"]).rename(columns={ "df2_ix1": "index_right0", "df2_ix2": "index_right1" }) exp.index.names = df1.index.names assert_frame_equal(res, exp) def test_empty_join(self): # Check joins resulting in empty gdfs. polygons = geopandas.GeoDataFrame({ "col2": [1, 2], "geometry": [ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]), ], }) not_in = geopandas.GeoDataFrame({ "col1": [1], "geometry": [Point(-0.5, 0.5)] }) empty = sjoin(not_in, polygons, how="left", predicate="intersects") assert empty.index_right.isnull().all() empty = sjoin(not_in, polygons, how="right", predicate="intersects") assert empty.index_left.isnull().all() empty = sjoin(not_in, polygons, how="inner", predicate="intersects") assert empty.empty @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"]) @pytest.mark.parametrize( "empty", [ GeoDataFrame(geometry=[GeometryCollection(), GeometryCollection()]), GeoDataFrame(geometry=GeoSeries()), ], ) def test_join_with_empty(self, predicate, empty): # Check joins with empty geometry columns/dataframes. polygons = geopandas.GeoDataFrame({ "col2": [1, 2], "geometry": [ Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]), Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]), ], }) result = sjoin(empty, polygons, how="left", predicate=predicate) assert result.index_right.isnull().all() result = sjoin(empty, polygons, how="right", predicate=predicate) assert result.index_left.isnull().all() result = sjoin(empty, polygons, how="inner", predicate=predicate) assert result.empty @pytest.mark.parametrize("dfs", ["default-index", "string-index"], indirect=True) def test_sjoin_invalid_args(self, dfs): index, df1, df2, expected = dfs with pytest.raises(ValueError, match="'left_df' should be GeoDataFrame"): sjoin(df1.geometry, df2) with pytest.raises(ValueError, match="'right_df' should be GeoDataFrame"): sjoin(df1, df2.geometry) @pytest.mark.parametrize( "dfs", [ "default-index", "string-index", "named-index", "multi-index", "named-multi-index", ], indirect=True, ) @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"]) def test_right(self, predicate, dfs): index, df1, df2, expected = dfs res = sjoin(df1, df2, how="right", predicate=predicate) if index in ["default-index", "string-index"]: exp = expected[predicate].dropna(subset=["index_right"]).copy() elif index == "named-index": exp = expected[predicate].dropna(subset=["df2_ix"]).copy() elif index == "multi-index": exp = expected[predicate].dropna(subset=["level_0_y"]).copy() elif index == "named-multi-index": exp = expected[predicate].dropna(subset=["df2_ix1"]).copy() exp = exp.drop("geometry_x", axis=1).rename(columns={"geometry_y": "geometry"}) exp["df2"] = exp["df2"].astype("int64") if index == "default-index": exp["index_right"] = exp["index_right"].astype("int64") res["index_left"] = res["index_left"].astype(float) elif index == "named-index": exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64") exp = exp.set_index("df2_ix").rename( columns={"df1_ix": "index_left"}) if index in ["default-index", "string-index"]: exp = exp.set_index("index_right") exp = exp.reindex(columns=res.columns) exp.index.name = None if index == "multi-index": exp = exp.set_index(["level_0_y", "level_1_y" ]).rename(columns={ "level_0_x": "index_left0", "level_1_x": "index_left1" }) exp.index.names = df2.index.names if index == "named-multi-index": exp = exp.set_index(["df2_ix1", "df2_ix2"]).rename(columns={ "df1_ix1": "index_left0", "df1_ix2": "index_left1" }) exp.index.names = df2.index.names # GH 1364 fix of behaviour was done in pandas 1.1.0 if predicate == "within" and str( pd.__version__) >= LooseVersion("1.1.0"): exp = exp.sort_index() assert_frame_equal(res, exp, check_index_type=False)
def rd_sql(server, database, table=None, col_names=None, where_col=None, where_val=None, where_op='AND', geo_col=False, from_date=None, to_date=None, date_col=None, rename_cols=None, stmt=None, export_path=None): """ Function to import data from an MSSQL database. Requires the pymssql package. Parameters ---------- server : str The server name. e.g.: 'SQL2012PROD03' database : str The specific database within the server. e.g.: 'LowFlows' table : str The specific table within the database. e.g.: 'LowFlowSiteRestrictionDaily' col_names : list of str The column names that should be retrieved. e.g.: ['SiteID', 'BandNo', 'RecordNo'] where_col : str or dict Must be either a string with an associated where_val list or a dictionary of strings to lists.'. e.g.: 'SnapshotType' or {'SnapshotType': ['value1', 'value2']} where_val : list The WHERE query values for the where_col. e.g. ['value1', 'value2'] where_op : str If where_col is a dictionary and there are more than one key, then the operator that connects the where statements must be either 'AND' or 'OR'. geo_col : bool Is there a geometry column in the table?. from_date : str The start date in the form '2010-01-01'. to_date : str The end date in the form '2010-01-01'. date_col : str The SQL table column that contains the dates. stmt : str Custom SQL statement to be directly passed to the database table. This will ignore all prior arguments except server and database. export_path : str The export path for a csv file if desired. If None, then nothing is exported. Returns ------- DataFrame """ ## Create where statements if stmt is None: if table is None: raise ValueError( 'Must at least provide input for server, database, and table.') if col_names is not None: if isinstance(col_names, str): col_names = [col_names] col_names1 = [ '[' + i.encode('ascii', 'ignore') + ']' for i in col_names ] col_stmt = ', '.join(col_names1) else: col_stmt = '*' where_lst = sql_where_stmts(where_col=where_col, where_val=where_val, where_op=where_op, from_date=from_date, to_date=to_date, date_col=date_col) if isinstance(where_lst, list): stmt1 = "SELECT " + col_stmt + " FROM " + table + " where " + " and ".join( where_lst) else: stmt1 = "SELECT " + col_stmt + " FROM " + table elif isinstance(stmt, str): stmt1 = stmt else: raise ValueError('stmt must either be an SQL string or None.') ## Create connection to database and execute sql statement conn = connect(server, database=database) df = read_sql(stmt1, conn) conn.close() if rename_cols is not None: df.columns = rename_cols ## Read in geometry if required if geo_col & (stmt is not None): geometry, proj = rd_sql_geo(server=server, database=database, table=table, where_lst=where_lst) df = GeoDataFrame(df, geometry=geometry, crs=proj) ## save and return if export_path is not None: save_df(df, export_path, index=False) return (df)
'geometry': { 'type': 'LineString', 'coordinates': [list(point) for point in path] } } print("Finding all disasters within 0.5 degrees of path") # loads in my file of ufo sightings, earthquakes, etc disasters_DF = GeoDataFrame.from_file( "./Assignments/A05/assets/api/data/vols_eq_ufo/vols_eq_ufo1.shp" ).to_crs(crs="epsg:4326") # here is a sample path from Wichita Falls to Amarillo that I'm loading as a GeoDataFrame path_df = GeoDataFrame.from_features([path_geojson], crs="epsg:4326") # I create a buffer from the path, where a buffer is a polygon 0.1 degrees in radius from the path buffered_path = (path_df.buffer(0.5)).to_crs(crs="epsg:4326") # create a dataframe from the buffered path buffered_path_df = GeoDataFrame(buffered_path, geometry=buffered_path.geometry) buffered_path_df[0] = None # perform a spatial join of the buffered path and the ufo sighting, earthquakes, etc dataframe. # This will return all disasters within 0.1 degrees of the path join_results = GeoDataFrame( sjoin(disasters_DF, buffered_path_df, lsuffix="left")) # from here, dump the path, the buffered path, and the disasters 0.1 degrees from the path to files print("Creating files") dump( path_geojson, open( './Assignments/A05/assets/api/data/shortest_paths/' + str(target_city_name) + '.geojson', 'w')) dump( loads(buffered_path.to_json()), open(
def test_overlay_strict(how, keep_geom_type, geom_types): """ Test of mixed geometry types on input and output. Expected results initially generated using following snippet. polys1 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])]) df1 = gpd.GeoDataFrame({'col1': [1, 2], 'geometry': polys1}) polys2 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])]) df2 = gpd.GeoDataFrame({'geometry': polys2, 'col2': [1, 2, 3]}) lines1 = gpd.GeoSeries([LineString([(2, 0), (2, 4), (6, 4)]), LineString([(0, 3), (6, 3)])]) df3 = gpd.GeoDataFrame({'col3': [1, 2], 'geometry': lines1}) points1 = gpd.GeoSeries([Point((2, 2)), Point((3, 3))]) df4 = gpd.GeoDataFrame({'col4': [1, 2], 'geometry': points1}) params=["union", "intersection", "difference", "symmetric_difference", "identity"] stricts = [True, False] for p in params: for s in stricts: exp = gpd.overlay(df1, df2, how=p, keep_geom_type=s) if not exp.empty: exp.to_file('polys_{p}_{s}.geojson'.format(p=p, s=s), driver='GeoJSON') for p in params: for s in stricts: exp = gpd.overlay(df1, df3, how=p, keep_geom_type=s) if not exp.empty: exp.to_file('poly_line_{p}_{s}.geojson'.format(p=p, s=s), driver='GeoJSON') for p in params: for s in stricts: exp = gpd.overlay(df1, df4, how=p, keep_geom_type=s) if not exp.empty: exp.to_file('poly_point_{p}_{s}.geojson'.format(p=p, s=s), driver='GeoJSON') """ polys1 = GeoSeries([ Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]), ]) df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1}) polys2 = GeoSeries([ Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]), ]) df2 = GeoDataFrame({"geometry": polys2, "col2": [1, 2, 3]}) lines1 = GeoSeries( [LineString([(2, 0), (2, 4), (6, 4)]), LineString([(0, 3), (6, 3)])]) df3 = GeoDataFrame({"col3": [1, 2], "geometry": lines1}) points1 = GeoSeries([Point((2, 2)), Point((3, 3))]) df4 = GeoDataFrame({"col4": [1, 2], "geometry": points1}) if geom_types == "polys": result = overlay(df1, df2, how=how, keep_geom_type=keep_geom_type) elif geom_types == "poly_line": result = overlay(df1, df3, how=how, keep_geom_type=keep_geom_type) elif geom_types == "poly_point": result = overlay(df1, df4, how=how, keep_geom_type=keep_geom_type) elif geom_types == "line_poly": result = overlay(df3, df1, how=how, keep_geom_type=keep_geom_type) elif geom_types == "point_poly": result = overlay(df4, df1, how=how, keep_geom_type=keep_geom_type) try: expected = read_file( os.path.join( DATA, "strict", "{t}_{h}_{s}.geojson".format(t=geom_types, h=how, s=keep_geom_type), )) # the order depends on the spatial index used # so we sort the resultant dataframes to get a consistent order # independently of the spatial index implementation assert all(expected.columns == result.columns), "Column name mismatch" cols = list(set(result.columns) - set(["geometry"])) expected = expected.sort_values(cols, axis=0).reset_index(drop=True) result = result.sort_values(cols, axis=0).reset_index(drop=True) assert_geodataframe_equal( result, expected, normalize=True, check_column_type=False, check_less_precise=True, check_crs=False, check_dtype=False, ) except DriverError: # fiona >= 1.8 assert result.empty except OSError: # fiona < 1.8 assert result.empty
# LAND COVER NEW FILE_NAME = "/Users/mmusleh/git/flash_demo/ignore/land_cover_data_test_15807.csv" df = pd.read_csv(FILE_NAME) geometry=[load_wkt(geom.split(";")[1]).centroid for geom in df.st_asewkt] df["lon"] = [g.centroid.x for g in geometry] df["lat"] = [g.centroid.y for g in geometry] values = [0 if v==82 else (1 if v in [41,42,43] else 2) for v in df.val] # # 0: crops, 1: forest, 2: others colors = ['#A9AE85' if v==0 else ('#729565' if v==1 else "#73A7CC") for v in values] df['value'] = values df["color"] = colors gdf = GeoDataFrame(df, geometry=geometry) minx, miny, maxx, maxy = (gdf.total_bounds) midx = minx + (maxx-minx)/2 midy = miny + (maxy-miny)/2 q1 = box(minx, midy, midx , maxy) q2 = box(midx, midy , maxx, maxy) q3 = box(minx, miny , midx, midy) q4 = box(midx, miny , maxx, midy) for n, q in enumerate([q1,q2,q3,q4]) : with open (f'q{n}', 'w+') as w: w.write(str(q.centroid)) w.write(str(q))
def test_no_geometries(self): # keeps GeoDataFrame class (no DataFrame) data = {"A": range(3), "B": np.arange(3.0)} df = GeoDataFrame(data) assert type(df) == GeoDataFrame
def test_empty(self): df = GeoDataFrame() assert type(df) == GeoDataFrame df = GeoDataFrame({'A': [], 'B': []}, geometry=[]) assert type(df) == GeoDataFrame
def multi_poly_gdf(donut_geometry): """Create a multi-polygon GeoDataFrame.""" multi_poly = donut_geometry.unary_union out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:4326") out_df["attr"] = ["pool"] return out_df
def test_empty_to_file(self): input_empty_df = GeoDataFrame() tempfilename = os.path.join(self.tempdir, 'test.shp') with pytest.raises( ValueError, match="Cannot write empty DataFrame to file."): input_empty_df.to_file(tempfilename)
def two_line_gdf(): """Create Line Objects For Testing""" linea = LineString([(1, 1), (2, 2), (3, 2), (5, 3)]) lineb = LineString([(3, 4), (5, 7), (12, 2), (10, 5), (9, 7.5)]) return GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:4326")
def single_rectangle_gdf(): """Create a single rectangle for clipping.""" poly_inters = Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)]) gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:4326") gdf["attr2"] = "site-boundary" return gdf
def point_gdf(): """Create a point GeoDataFrame.""" pts = np.array([[2, 2], [3, 4], [9, 8], [-12, -15]]) return GeoDataFrame([Point(xy) for xy in pts], columns=["geometry"], crs="EPSG:4326")
def snap_points_to_near_line(lineShp, pointShp, epsg, workGrass, outPoints, location='overlap_pnts', api='grass', movesShp=None): """ Move points to overlap near line API's Available: * grass; * saga. """ if api == 'grass': """ Uses GRASS GIS to find near lines. """ import os import numpy from geopandas import GeoDataFrame from gasp.pyt.oss import fprop from gasp.gt.wenv.grs import run_grass from gasp.gt.fmshp import shp_to_obj from gasp.gt.toshp import df_to_shp # Create GRASS GIS Location grassBase = run_grass(workGrass, location=location, srs=epsg) import grass.script as grass import grass.script.setup as gsetup gsetup.init(grassBase, workGrass, location, 'PERMANENT') # Import some GRASS GIS tools from gasp.gt.prox import grs_near as near from gasp.gt.tbl.attr import geomattr_to_db from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp # Import data into GRASS GIS grsLines = shp_to_grs(lineShp, fprop(lineShp, 'fn', forceLower=True)) grsPoint = shp_to_grs(pointShp, fprop(pointShp, 'fn', forceLower=True)) # Get distance from points to near line near(grsPoint, grsLines, nearCatCol="tocat", nearDistCol="todistance") # Get coord of start/end points of polylines geomattr_to_db(grsLines, ['sta_pnt_x', 'sta_pnt_y'], 'start', 'line') geomattr_to_db(grsLines, ['end_pnt_x', 'end_pnt_y'], 'end', 'line') # Export data from GRASS GIS ogrPoint = grs_to_shp( grsPoint, os.path.join(workGrass, grsPoint + '.shp', 'point', asMultiPart=True)) ogrLine = grs_to_shp( grsLines, os.path.join(workGrass, grsLines + '.shp', 'point', asMultiPart=True)) # Points to GeoDataFrame pntDf = shp_to_obj(ogrPoint) # Lines to GeoDataFrame lnhDf = shp_to_obj(ogrLine) # Erase unecessary fields pntDf.drop(["todistance"], axis=1, inplace=True) lnhDf.drop([ c for c in lnhDf.columns.values if c != 'geometry' and c != 'cat' and c != 'sta_pnt_x' and c != 'sta_pnt_y' and c != 'end_pnt_x' and c != 'end_pnt_y' ], axis=1, inplace=True) # Join Geometries - Table with Point Geometry and Geometry of the # nearest line resultDf = pntDf.merge(lnhDf, how='inner', left_on='tocat', right_on='cat') # Move points resultDf['geometry'] = [ geoms[0].interpolate(geoms[0].project(geoms[1])) for geoms in zip(resultDf.geometry_y, resultDf.geometry_x) ] resultDf.drop(["geometry_x", "geometry_y", "cat_x", "cat_y"], axis=1, inplace=True) resultDf = GeoDataFrame(resultDf, crs={"init": 'epsg:{}'.format(epsg)}, geometry="geometry") # Check if points are equal to any start/end points resultDf["x"] = resultDf.geometry.x resultDf["y"] = resultDf.geometry.y resultDf["check"] = numpy.where( (resultDf["x"] == resultDf["sta_pnt_x"]) & (resultDf["y"] == resultDf["sta_pnt_y"]), 1, 0) resultDf["check"] = numpy.where( (resultDf["x"] == resultDf["end_pnt_x"]) & (resultDf["y"] == resultDf["end_pnt_y"]), 1, 0) # To file df_to_shp(resultDf, outPoints) elif api == 'saga': """ Snap Points to Lines using SAGA GIS """ from gasp import exec_cmd cmd = ("saga_cmd shapes_points 19 -INPUT {pnt} -SNAP {lnh} " "-OUTPUT {out}{mv}").format( pnt=pointShp, lnh=lineShp, out=outPoints, mv="" if not movesShp else " -MOVES {}".format(movesShp)) outcmd = exec_cmd(cmd) else: raise ValueError("{} is not available!".format(api)) return outPoints
def setup_method(self): self.t1 = Polygon([(0, 0), (1, 0), (1, 1)]) self.t2 = Polygon([(0, 0), (1, 1), (0, 1)]) self.t3 = Polygon([(2, 0), (3, 0), (3, 1)]) self.tz = Polygon([(1, 1, 1), (2, 2, 2), (3, 3, 3)]) self.tz1 = Polygon([(2, 2, 2), (1, 1, 1), (3, 3, 3)]) self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) self.sqz = Polygon([(1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4)]) self.t4 = Polygon([(0, 0), (3, 0), (3, 3), (0, 2)]) self.t5 = Polygon([(2, 0), (3, 0), (3, 3), (2, 3)]) self.inner_sq = Polygon( [(0.25, 0.25), (0.75, 0.25), (0.75, 0.75), (0.25, 0.75)] ) self.nested_squares = Polygon(self.sq.boundary, [self.inner_sq.boundary]) self.p0 = Point(5, 5) self.p3d = Point(5, 5, 5) self.g0 = GeoSeries( [ self.t1, self.t2, self.sq, self.inner_sq, self.nested_squares, self.p0, None, ] ) self.g1 = GeoSeries([self.t1, self.sq]) self.g2 = GeoSeries([self.sq, self.t1]) self.g3 = GeoSeries([self.t1, self.t2]) self.gz = GeoSeries([self.tz, self.sqz, self.tz1]) self.g3.crs = "epsg:4326" self.g4 = GeoSeries([self.t2, self.t1]) self.g4.crs = "epsg:4326" self.g_3d = GeoSeries([self.p0, self.p3d]) self.na = GeoSeries([self.t1, self.t2, Polygon()]) self.na_none = GeoSeries([self.t1, None]) self.a1 = self.g1.copy() self.a1.index = ["A", "B"] self.a2 = self.g2.copy() self.a2.index = ["B", "C"] self.esb = Point(-73.9847, 40.7484, 30.3244) self.sol = Point(-74.0446, 40.6893, 31.2344) self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326") self.pt2d = Point(-73.9847, 40.7484) self.landmarks_mixed = GeoSeries([self.esb, self.sol, self.pt2d], crs=4326) self.l1 = LineString([(0, 0), (0, 1), (1, 1)]) self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)]) self.g5 = GeoSeries([self.l1, self.l2]) self.g6 = GeoSeries([self.p0, self.t3]) self.g7 = GeoSeries([self.sq, self.t4]) self.g8 = GeoSeries([self.t1, self.t5]) self.empty = GeoSeries([]) self.all_none = GeoSeries([None, None]) self.empty_poly = Polygon() self.g9 = GeoSeries(self.g0, index=range(1, 8)) # Crossed lines self.l3 = LineString([(0, 0), (1, 1)]) self.l4 = LineString([(0, 1), (1, 0)]) self.crossed_lines = GeoSeries([self.l3, self.l4]) # Placeholder for testing, will just drop in different geometries # when needed self.gdf1 = GeoDataFrame( {"geometry": self.g1, "col0": [1.0, 2.0], "col1": ["geo", "pandas"]} ) self.gdf2 = GeoDataFrame( {"geometry": self.g1, "col3": [4, 5], "col4": ["rand", "string"]} ) self.gdf3 = GeoDataFrame( {"geometry": self.g3, "col3": [4, 5], "col4": ["rand", "string"]} ) self.gdfz = GeoDataFrame( {"geometry": self.gz, "col3": [4, 5, 6], "col4": ["rand", "string", "geo"]} )
def _split_exposure_highlow(exp_sub, mode, High_Value_Area_gdf): """ divide litpop exposure into high-value exposure and low-value exposure according to area queried in OSM, re-assign all low values to high-value centroids Parameters: exp_sub (exposure) mode (str) Returns: exp_sub_high (exposure) """ exp_sub_high = pd.DataFrame(columns=exp_sub.columns) exp_sub_low = pd.DataFrame(columns=exp_sub.columns) for i, pt in enumerate(exp_sub.geometry): if pt.within(High_Value_Area_gdf.loc[0]['geometry']): exp_sub_high = exp_sub_high.append(exp_sub.iloc[i]) else: exp_sub_low = exp_sub_low.append(exp_sub.iloc[i]) exp_sub_high = GeoDataFrame(exp_sub_high, crs=exp_sub.crs, geometry=exp_sub_high.geometry) exp_sub_low = GeoDataFrame(exp_sub_low, crs=exp_sub.crs, geometry=exp_sub_low.geometry) if mode == "nearest": # assign asset values of low-value points to nearest point in high-value df. pointsToAssign = exp_sub_high.geometry.unary_union exp_sub_high["addedValNN"] = 0 for i in range(0, len(exp_sub_low)): nearest = exp_sub_high.geometry == nearest_points(exp_sub_low.iloc[i].geometry, \ pointsToAssign)[1] #point exp_sub_high.addedValNN.loc[nearest] = exp_sub_low.iloc[i].value exp_sub_high["combinedValNN"] = exp_sub_high[['addedValNN', 'value']].sum(axis=1) exp_sub_high.rename(columns={'value': 'value_old', 'combinedValNN': 'value'},\ inplace=True) elif mode == "even": # assign asset values of low-value points evenly to points in high-value df. exp_sub_high['addedValeven'] = sum( exp_sub_low.value) / len(exp_sub_high) exp_sub_high["combinedValeven"] = exp_sub_high[[ 'addedValeven', 'value' ]].sum(axis=1) exp_sub_high.rename(columns={'value': 'value_old', 'combinedValeven': 'value'},\ inplace=True) elif mode == "proportional": #assign asset values of low-value points proportionally to value of points in high-value df. exp_sub_high['addedValprop'] = 0 for i in range(0, len(exp_sub_high)): asset_factor = exp_sub_high.iloc[i].value / sum(exp_sub_high.value) exp_sub_high.addedValprop.iloc[i] = asset_factor * sum( exp_sub_low.value) exp_sub_high["combinedValprop"] = exp_sub_high[[ 'addedValprop', 'value' ]].sum(axis=1) exp_sub_high.rename(columns={'value': 'value_old', 'combinedValprop': 'value'},\ inplace=True) else: print( "No proper re-assignment mode set. Please choose either nearest, even or proportional." ) return exp_sub_high
def make_traj(nodes, crs=CRS_METRIC, id=1, parent=None): nodes = [node.to_dict() for node in nodes] df = pd.DataFrame(nodes).set_index('t') geo_df = GeoDataFrame(df, crs=crs) return Trajectory(geo_df, id, parent=parent)
def get_gdf(self): crs = {'init': 'epsg:4326'} return(GeoDataFrame(self.get_names(), crs=crs, geometry=self.get_geo()))
p2 = Proj({'proj': 'aea', 'datum': 'WGS84', 'lon_0': '-96'}) for shape in source: if shape['properties']['GEOID'] in pops['GEOID'].values: if shape['geometry']['type'] == 'MultiPolygon': continue subshape = shape['geometry']['coordinates'][0] # project from latitude to longitude p1_points = np.array(subshape) p2_points = transform(p1, p2, p1_points[:, 0], p1_points[:, 1]) p2_points = np.array(p2_points).T # create polygon tract_polygons.append(Polygon(p2_points)) geoids.append(shape['properties']['GEOID']) tracts = GeoDataFrame(index=range(len(tract_polygons))) # initialize data tracts['region'] = tract_polygons tracts['geoid'] = geoids tracts['population'] = np.tile(np.nan, len(tracts)) tracts['area'] = np.tile(np.nan, len(tracts)) tracts.index = range(len(tracts)) # # trim tracts to nyc # read in nyc boundary nyc = nyc_boundary() areas = [] print 'Trimming tracts...' for i in range(len(tracts)): if i % 100 == 0:
def photos_location(buffer_shp, epsg_in, keyword=None, epsg_out=4326, onlySearchAreaContained=True, keyToUse=None): """ Search for data in Flickr and return a array with the same data buffer_shp cloud be a shapefile with a single buffer feature or a dict like: buffer_shp = { x: x_value, y: y_value, r: dist (in meters) } or a list or a tuple: buffer_shp = [x, y, radius] """ import pandas from shapely.geometry import Polygon, Point from shapely.wkt import loads from geopandas import GeoDataFrame from glass.g.gp.prox.bfing.obj import xy_to_buffer from glass.g.prop.feat.bf import getBufferParam from glass.g.prj.obj import prj_ogrgeom x_center, y_center, dist = getBufferParam(buffer_shp, epsg_in, outSRS=4326) # Retrive data from Flickr photos = search_photos(lat=y_center, lng=x_center, radius=float(dist) / 1000, keyword=keyword, apiKey=keyToUse) try: if not photos: # Return noData return 0 except: pass photos['longitude'] = photos['longitude'].astype(float) photos['latitude'] = photos['latitude'].astype(float) geoms = [Point(xy) for xy in zip(photos.longitude, photos.latitude)] gdata = GeoDataFrame(photos, crs='EPSG:4326', geometry=geoms) if onlySearchAreaContained: _x_center, _y_center, _dist = getBufferParam(buffer_shp, epsg_in, outSRS=3857) # Check if all retrieve points are within the search area search_area = xy_to_buffer(float(_x_center), float(_y_center), float(_dist)) search_area = prj_ogrgeom(search_area, 3857, 4326) search_area = loads(search_area.ExportToWkt()) gdata["tst_geom"] = gdata["geometry"].intersects(search_area) gdata = gdata[gdata["tst_geom"] == True] gdata.reset_index(drop=True, inplace=True) gdata["fid"] = gdata["id"] if "url_l" in gdata.columns.values: gdata["url"] = gdata["url_l"] else: gdata["url"] = 'None' gdata["description"] = gdata["_content"] # Drop irrelevant fields cols = list(gdata.columns.values) delCols = [] for col in cols: if col != 'geometry' and col != 'description' and \ col != 'fid' and col != 'url' and col != 'datetaken' \ and col != 'dateupload' and col != 'title': delCols.append(col) else: continue gdata.drop(delCols, axis=1, inplace=True) if epsg_out != 4326: gdata = gdata.to_crs('EPSG:{}'.format(str(epsg_out))) return gdata
import geopandas as gpd import os import pandas as pd os.chdir("path to working directory") # ### Create GeoDataFrames from geopandas import GeoDataFrame from shapely.geometry import Point, LineString shipping_gdf = GeoDataFrame( shipping, geometry=[Point(xy) for xy in zip(shipping.Long, shipping.Lat)]) noShipping_gdf = GeoDataFrame( noShipping, geometry=[Point(xy) for xy in zip(noShipping.Long, noShipping.Lat)]) hq_gdf = GeoDataFrame(hq, geometry=[Point(xy) for xy in zip(hq.Long, hq.Lat)]) hq_gdf.head() # ### Get adjusted lat/long coordinates # https://stackoverflow.com/questions/30740046/calculate-distance-to-nearest-feature-with-geopandas def nearest_poly(point, polygons): min_dist = polygons.distance(point).min() index = polygons.distance(point)[polygons.distance(point) == min_dist].index[0] return polygons.iat[index, 0] def getXY(pt):
def __geo_interface__(self): """Returns a GeoSeries as a python feature collection """ from geopandas import GeoDataFrame return GeoDataFrame({'geometry': self}).__geo_interface__
def setup_method(self): self.N = 10 self.points = GeoSeries(Point(i, i, i) for i in range(self.N)) values = np.arange(self.N) self.df = GeoDataFrame({'geometry': self.points, 'values': values})
def test_to_file_empty(tmpdir): input_empty_df = GeoDataFrame() tempfilename = os.path.join(str(tmpdir), "test.shp") with pytest.raises(ValueError, match="Cannot write empty DataFrame to file."): input_empty_df.to_file(tempfilename)
#Import Police Beats ch_pbeats = gpd.read_file(os.path.join(data_dir, 'BeatsPolice.shp')) #Project CRS WGS84 EPSG:4326 crs = {'init': 'epsg:4326'} #Change Latitude and Longitude names calls311_2017 = calls311_2017.rename(columns={ 'Latitude': 'Lat', 'Longitude': 'Lon' }) #Create a Point data with Lon and Lat g_calls311_2017 = [ Point(xy) for xy in zip(calls311_2017.Lon, calls311_2017.Lat) ] #Create the GeoDataframe gdf_calls311_2017 = GeoDataFrame(calls311_2017, crs=crs, geometry=g_calls311_2017) #Assign each call its correspondent Beat id_calls311_2017 = gpd.sjoin(ch_pbeats, gdf_calls311_2017, how="inner", op='intersects') #Filter Dataframes per type of call alley_2017 = id_calls311_2017[id_calls311_2017['Type of Service Request'] == 'Alley Light Out'] all_lights_2017 = id_calls311_2017[id_calls311_2017['Type of Service Request'] == 'Street Lights - All/Out'] one_light_2017 = id_calls311_2017[id_calls311_2017['Type of Service Request'] == 'Street Light Out']
def dfs(request): polys1 = GeoSeries([ Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]), Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]), Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]), ]) polys2 = GeoSeries([ Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]), Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]), Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]), ]) df1 = GeoDataFrame({"geometry": polys1, "df1": [0, 1, 2]}) df2 = GeoDataFrame({"geometry": polys2, "df2": [3, 4, 5]}) if request.param == "string-index": df1.index = ["a", "b", "c"] df2.index = ["d", "e", "f"] if request.param == "named-index": df1.index.name = "df1_ix" df2.index.name = "df2_ix" if request.param == "multi-index": i1 = ["a", "b", "c"] i2 = ["d", "e", "f"] df1 = df1.set_index([i1, i2]) df2 = df2.set_index([i2, i1]) if request.param == "named-multi-index": i1 = ["a", "b", "c"] i2 = ["d", "e", "f"] df1 = df1.set_index([i1, i2]) df2 = df2.set_index([i2, i1]) df1.index.names = ["df1_ix1", "df1_ix2"] df2.index.names = ["df2_ix1", "df2_ix2"] # construction expected frames expected = {} part1 = df1.copy().reset_index().rename(columns={"index": "index_left"}) part2 = (df2.copy().iloc[[ 0, 1, 1, 2 ]].reset_index().rename(columns={"index": "index_right"})) part1["_merge"] = [0, 1, 2] part2["_merge"] = [0, 0, 1, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["intersects"] = exp.drop("_merge", axis=1).copy() part1 = df1.copy().reset_index().rename(columns={"index": "index_left"}) part2 = df2.copy().reset_index().rename(columns={"index": "index_right"}) part1["_merge"] = [0, 1, 2] part2["_merge"] = [0, 3, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["contains"] = exp.drop("_merge", axis=1).copy() part1["_merge"] = [0, 1, 2] part2["_merge"] = [3, 1, 3] exp = pd.merge(part1, part2, on="_merge", how="outer") expected["within"] = exp.drop("_merge", axis=1).copy() return [request.param, df1, df2, expected]
def HAST_dataPrep(): gsTERRAINID = 'TERRAINID' gsWBID = 'WBID' #The input files and full path will be read from the GUI #Fetch file names from XML tree = ET.parse('settings.xml') sLUTPath = tree.find('.//LUTPath').text sGeoJsonFileName = tree.find('.//CBGeoJson').text sInputFileName = tree.find('.//InputFileName').text sTerrainIDFileName = tree.find('.//TerrainIDFName').text sSurfaceRoughnessFileName = tree.find('.//SurfaceRoughNess').text sDfltWbIdFileName = tree.find('.//WbIdFName').text sInputPath = tree.find('.//InputPath').text sSateID = tree.find('.//stateID').text sCBFldName = 'CENSUSBLOCK' sSBTFldName = 'SBTNAME' sTractIDFieldName = 'TRACT_ID_GEN' sPreProcessedDataFileName = os.path.splitext( sInputFileName)[0] + "_pre_processed.csv" #Logging setup LogFileName = tree.find('.//LogFileName').text Level = tree.find('.//Level').text if Level == 'INFO': logging.basicConfig(filename=LogFileName, filemode='w', level=logging.INFO) else: logging.basicConfig(filename=LogFileName, filemode='w', level=logging.DEBUG) logging.info(str(datetime.datetime.now()) + ' Pre-Processing Begin... ') #utility.popupmsg(sPreProcessedDataFileName) #Fecth field names of the input selected for item in tree.find('.//PreProcessingFields'): logging.debug( str(datetime.datetime.now()) + ' PreProcessingFields: ' + item.tag) if item.tag == 'SOID': sSoccIdFieldName = item.attrib['inputFieldName'] elif item.tag == 'WBID': sWbIDFieldName = item.attrib['inputFieldName'] elif item.tag == 'TerrainID': sTerrainIDFldName = item.attrib['inputFieldName'] elif item.tag == 'HUSBT': sHuSBTFldName = item.attrib['inputFieldName'] elif item.tag == 'Longitude': sLongitude = item.attrib['inputFieldName'] elif item.tag == 'Latitude': sLatitude = item.attrib['inputFieldName'] if sTerrainIDFldName == '': sTerrainIDFldName = gsTERRAINID if sWbIDFieldName == '': sWbIDFieldName = gsWBID #Read the input UDF dataset from the XML df_Input = pd.read_csv(sInputFileName, delimiter=None, encoding="ISO-8859-1") df_Input.columns = [x.upper() for x in df_Input.columns] #Check if TerrainID is a part of the input data (df_Input). If not then perform the following joins #If the user has provided the TerraID check if wbID is provided. #print("Validating input data set...") logging.info( str(datetime.datetime.now()) + " Validating inputs for required fields...") logging.debug(str(datetime.datetime.now()) + " Validating started...") if sTerrainIDFldName in df_Input.columns: #print("Yes" , sLUTPath + sTerrainIDFileName) logging.debug( str(datetime.datetime.now()) + " Inside checking TerrainID" + str(sLUTPath) + str(sTerrainIDFileName)) #Check the data if the entries are valid df_TerrainID = pd.read_csv(sLUTPath + sTerrainIDFileName, delimiter=None) logging.debug(str(datetime.datetime.now()) + ' Check 2: df_TerrainID ') #print(2) df_TerrainID.columns = [x.upper() for x in df_TerrainID.columns] logging.debug( str(datetime.datetime.now()) + ' Check 3: f_TerrainID.columns ' + str(df_TerrainID.columns)) #print(3) df_ValidateTr = pd.merge(df_Input.astype(str), df_TerrainID.astype(str), left_on=sTerrainIDFldName, right_on=gsTERRAINID, how="inner", suffixes=('_left', '_right')) logging.debug( str(datetime.datetime.now()) + ' Check 4: df_ValidateTr ') #print(4) numOfRowsInput = len(df_Input.index) numOfRowsmatched = len(df_ValidateTr.index) #print(str(numOfRowsmatched)) logging.debug( str(datetime.datetime.now()) + ' Number of Rows Matched: ' + str(numOfRowsmatched)) if numOfRowsmatched != numOfRowsInput: utility.popupmsg( "Please check TerrainIDs so that they match with the " + sSurfaceRoughnessFileName + " looktup table.") logging.info( str(datetime.datetime.now()) + " Please check TerrainIDs so that they match with the " + sSurfaceRoughnessFileName + " looktup table.") #print(df_TerrainID) sys.exit() elif sWbIDFieldName in df_Input.columns: #print("Checking field WbId") #print("Yes") logging.info( str(datetime.datetime.now()) + ' All TerrainIDs match! ') logging.debug( str(datetime.datetime.now()) + ' Checking field WbId: ' + str(sWbIDFieldName)) #Check the data if the entries are valid df_WbID = pd.read_csv(sLUTPath + sDfltWbIdFileName, delimiter=None) logging.debug(str(datetime.datetime.now()) + ' Check 5: df_WbID ') #print(5) df_WbID.columns = [x.upper() for x in df_WbID.columns] logging.debug( str(datetime.datetime.now()) + ' Check 6: df_WbID.columns ' + str(df_WbID.columns)) #print(6) df_ValidateWb = pd.merge(df_Input.astype(str), df_WbID.astype(str), left_on=sWbIDFieldName, right_on=gsWBID, how="inner", suffixes=('_left', '_right')) logging.debug( str(datetime.datetime.now()) + ' Check 7: df_ValidateWb ') #print(7) numOfRowsInput = len(df_Input.index) numOfRowsmatched = len(df_ValidateWb.index) logging.info( str(datetime.datetime.now()) + ' Num of Rows Matched: ' + str(numOfRowsmatched)) #print(str(numOfRowsmatched)) if numOfRowsmatched != numOfRowsInput: logging.debug( str(datetime.datetime.now()) + " Please check WbIds so that they match with the " + sDfltWbIdFileName[1:1 + len(sDfltWbIdFileName)] + " looktup table.") popupmsg("Please check WbIds so that they match with the " + sDfltWbIdFileName[1:1 + len(sDfltWbIdFileName)] + " looktup table.") sys.exit() logging.info( str(datetime.datetime.now()) + " TerrainIds and WbIds match. Please proceed to perform the analyses." ) #utility.popupmsg("TerrainIds and WbIds match. Please proceed to perform the analyses.") #sys.exit() #else: print("Pre-Processing the input to assign HU attributes...") logging.info( str(datetime.datetime.now()) + ' Pre-Processing input dataset to add the HU attributes...') #print("No") #CB data df_CB = gpd.read_file(sLUTPath + sGeoJsonFileName) df_CB.columns = [x.lower() for x in df_CB.columns ] #setting to lower for the spatial join logging.debug( str(datetime.datetime.now()) + ' Check 8: df_CB.columns ' + str(df_CB.columns)) #print(8) #SR LUT df_SuRCB = pd.read_csv(sLUTPath + sSurfaceRoughnessFileName, delimiter=None) df_SuRCB.columns = [x.upper() for x in df_SuRCB.columns] logging.debug( str(datetime.datetime.now()) + ' Check 9: df_SuRCB.columns ' + str(df_SuRCB.columns)) #print(9) #WbId LUT df_WbId = pd.read_csv(sLUTPath + sDfltWbIdFileName, delimiter=None) df_WbId.columns = [x.upper() for x in df_WbId.columns] logging.debug( str(datetime.datetime.now()) + ' Check 10: df_WbId.columns ' + str(df_WbId.columns)) #print(10) #Latitude and Longitude validation for the future #df_CheckLatLong = df_Input.apply(lambda row: (df_input['Longitude'].astype(str)=='' | df_input['Latitude'].astype(str)=='') , axis=1) #Longitude,Latitude field names now referenced from settings.xml geometry = [ Point(xy) for xy in zip(df_Input[sLongitude], df_Input[sLatitude]) ] crs = {'init': 'epsg:4326'} #logging.debug(str(datetime.datetime.now())+' Check 11: geometry ' + str(geometry)) #print(11) #Join between structure level data and Census block to fecth the CBID #Check if any geometries are NULL df_Input = GeoDataFrame(df_Input, geometry=geometry, crs=crs) logging.debug(str(datetime.datetime.now()) + ' Check 12: df_Input ') #print(12) #Join the structure level input points to the hzCensusblock_TIGER to fetch the CBID if sCBFldName in df_Input.columns: df_Input.rename(columns={sCBFldName: sCBFldName + '_OLD'}, inplace=True) points_CBId = gpd.sjoin(df_Input, df_CB, how="inner", op='intersects') points_CBId.columns = [x.upper() for x in points_CBId.columns] logging.debug( str(datetime.datetime.now()) + ' Check 13: points_CBId.columns ' + str(points_CBId.columns)) #Fetch Surface Roughness from huTerrainB in the respective state if sSateID != 'VI': if gsTERRAINID not in df_Input.columns: points_CBSR = pd.merge(points_CBId.astype(str), df_SuRCB.astype(str), on=sCBFldName, how="inner", suffixes=('_left', '_right')) points_CBSR.columns = [x.upper() for x in points_CBSR.columns] logging.debug( str(datetime.datetime.now()) + ' Check 14: points_CBSR.columns ' + str(points_CBSR.columns)) #TerrainID assignment from Surface Roughness Values #if sTerrainIDFldName == '': # sTerrainIDFldName = gsTERRAINID points_CBSR[sTerrainIDFldName] = points_CBSR.apply( lambda row: get_terrainId(row), axis=1) logging.debug( str(datetime.datetime.now()) + ' Check 15: points_CBSR[sTerrainIDFldName] ') else: points_CBSR = points_CBId else: #if sTerrainIDFldName == '': # sTerrainIDFldName = gsTERRAINID points_CBSR = points_CBId #df_Input.rename(columns={sCBFldName + '_OLD':sCBFldName}, inplace=True) if gsTERRAINID not in points_CBSR.columns: points_CBSR[sTerrainIDFldName] = 1 #Assign the WbID on the basis of the HUSBT in the input if 'WBID' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['WBID'], axis=1) points_CBSR = pd.merge(points_CBSR.astype(str), df_WbId.astype(str), left_on=sHuSBTFldName, right_on=sSBTFldName, how="inner", suffixes=('_left', '_right')) logging.debug(str(datetime.datetime.now()) + ' Check 16: points_CBSR ') logging.info( str(datetime.datetime.now()) + ' TERRAINID and WBID assignned... ') #TRACTID added - avoid fetching for each record points_CBSR[sTractIDFieldName] = points_CBSR[sCBFldName].str[:11] #del cols if '' in points_CBSR.columns: points_CBSR = points_CBSR.drop([''], axis=1) if 'GEOMETRY' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['GEOMETRY'], axis=1) if 'INDEX_RIGHT' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['INDEX_RIGHT'], axis=1) if 'OBJECTID_RIGHT' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['OBJECTID_RIGHT'], axis=1) if sSBTFldName in points_CBSR.columns: points_CBSR = points_CBSR.drop([sSBTFldName], axis=1) if 'SURFACEROUGHNESS' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['SURFACEROUGHNESS'], axis=1) if 'SRINDEX' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['SRINDEX'], axis=1) if 'CHARDESCRIPTION' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['CHARDESCRIPTION'], axis=1) if 'CASEID' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['CASEID'], axis=1) if 'NWINDCHAR' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['NWINDCHAR'], axis=1) if 'CENSUSBLOCK_OLD' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['CENSUSBLOCK_OLD'], axis=1) if 'OBJECTID' in points_CBSR.columns: points_CBSR = points_CBSR.drop(['OBJECTID'], axis=1) points_CBSR = points_CBSR.loc[:, ~points_CBSR.columns.str. contains('^UNNAMED')] points_CBSR = points_CBSR[points_CBSR.columns.dropna()] #print(points_CBSR) #XML assignments if TERRAINID, WBID not in base data item = tree.getroot().find('.//TerrainID') item.attrib['inputFieldName'] = sTerrainIDFldName #gsTERRAINID item = tree.getroot().find('.//WBID') item.attrib['inputFieldName'] = sWbIDFieldName #gsWBID item = tree.getroot().find('.//CensusBlockID') item.attrib['inputFieldName'] = sCBFldName tree.getroot().find( './/PreProcessedDataFileName').text = sPreProcessedDataFileName tree.write('settings.xml') #Making sure all column names are caps points_CBId.columns = [x.upper() for x in points_CBId.columns] points_CBSR.to_csv(sPreProcessedDataFileName) logging.info(str(datetime.datetime.now()) + ' Pre-Processing Complete...') print("Pre-Processing Complete...")
def df(): return GeoDataFrame({ "geometry": [Point(x, x) for x in range(3)], "value1": np.arange(3, dtype="int64"), "value2": np.array([1, 2, 1], dtype="int64"), })
def geomcol_gdf(): """Create a Mixed Polygon and LineString For Testing""" point = Point([(2, 3), (11, 4), (7, 2), (8, 9), (1, 13)]) poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)]) coll = GeometryCollection([point, poly]) return GeoDataFrame([1], geometry=[coll], crs="EPSG:4326")