Beispiel #1
0
def sliver_line():
    """Create a line that will create a point when clipped."""
    linea = LineString([(10, 5), (13, 5), (15, 5)])
    lineb = LineString([(1, 1), (2, 2), (3, 2), (5, 3), (12, 1)])
    return GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:4326")
Beispiel #2
0
 def test_suffixes(self, how: str, lsuffix: str, rsuffix: str,
                   expected_cols):
     left = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]})
     right = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]})
     joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
     assert set(joined.columns) == expected_cols | set(("geometry", ))
Beispiel #3
0
class TestSpatialJoin:
    @pytest.mark.parametrize(
        "how, lsuffix, rsuffix, expected_cols",
        [
            ("left", "left", "right", {"col_left", "col_right", "index_right"
                                       }),
            ("inner", "left", "right",
             {"col_left", "col_right", "index_right"}),
            ("right", "left", "right", {"col_left", "col_right", "index_left"
                                        }),
            ("left", "lft", "rgt", {"col_lft", "col_rgt", "index_rgt"}),
            ("inner", "lft", "rgt", {"col_lft", "col_rgt", "index_rgt"}),
            ("right", "lft", "rgt", {"col_lft", "col_rgt", "index_lft"}),
        ],
    )
    def test_suffixes(self, how: str, lsuffix: str, rsuffix: str,
                      expected_cols):
        left = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]})
        right = GeoDataFrame({"col": [1], "geometry": [Point(0, 0)]})
        joined = sjoin(left, right, how=how, lsuffix=lsuffix, rsuffix=rsuffix)
        assert set(joined.columns) == expected_cols | set(("geometry", ))

    @pytest.mark.parametrize("dfs", ["default-index", "string-index"],
                             indirect=True)
    def test_crs_mismatch(self, dfs):
        index, df1, df2, expected = dfs
        df1.crs = "epsg:4326"
        with pytest.warns(UserWarning, match="CRS mismatch between the CRS"):
            sjoin(df1, df2)

    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
    @pytest.mark.parametrize("op", ["intersects", "contains", "within"])
    def test_deprecated_op_param(self, dfs, op):
        _, df1, df2, _ = dfs
        with pytest.warns(FutureWarning, match="`op` parameter is deprecated"):
            sjoin(df1, df2, op=op)

    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
    @pytest.mark.parametrize("op", ["intersects", "contains", "within"])
    @pytest.mark.parametrize("predicate", ["contains", "within"])
    def test_deprecated_op_param_nondefault_predicate(self, dfs, op,
                                                      predicate):
        _, df1, df2, _ = dfs
        match = "use the `predicate` parameter instead"
        if op != predicate:
            warntype = UserWarning
            match = ("`predicate` will be overriden by the value of `op`" +
                     r"(.|\s)*" + match)
        else:
            warntype = FutureWarning
        with pytest.warns(warntype, match=match):
            sjoin(df1, df2, predicate=predicate, op=op)

    @pytest.mark.parametrize("dfs", ["default-index"], indirect=True)
    def test_unknown_kwargs(self, dfs):
        _, df1, df2, _ = dfs
        with pytest.raises(
                TypeError,
                match=
                r"sjoin\(\) got an unexpected keyword argument 'extra_param'",
        ):
            sjoin(df1, df2, extra_param="test")

    @pytest.mark.filterwarnings("ignore:The `op` parameter:FutureWarning")
    @pytest.mark.parametrize(
        "dfs",
        [
            "default-index",
            "string-index",
            "named-index",
            "multi-index",
            "named-multi-index",
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
    @pytest.mark.parametrize("predicate_kw", ["predicate", "op"])
    def test_inner(self, predicate, predicate_kw, dfs):
        index, df1, df2, expected = dfs

        res = sjoin(df1, df2, how="inner", **{predicate_kw: predicate})

        exp = expected[predicate].dropna().copy()
        exp = exp.drop("geometry_y",
                       axis=1).rename(columns={"geometry_x": "geometry"})
        exp[["df1", "df2"]] = exp[["df1", "df2"]].astype("int64")
        if index == "default-index":
            exp[["index_left",
                 "index_right"]] = exp[["index_left",
                                        "index_right"]].astype("int64")
        if index == "named-index":
            exp[["df1_ix", "df2_ix"]] = exp[["df1_ix",
                                             "df2_ix"]].astype("int64")
            exp = exp.set_index("df1_ix").rename(
                columns={"df2_ix": "index_right"})
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_left")
            exp.index.name = None
        if index == "multi-index":
            exp = exp.set_index(["level_0_x", "level_1_x"
                                 ]).rename(columns={
                                     "level_0_y": "index_right0",
                                     "level_1_y": "index_right1"
                                 })
            exp.index.names = df1.index.names
        if index == "named-multi-index":
            exp = exp.set_index(["df1_ix1",
                                 "df1_ix2"]).rename(columns={
                                     "df2_ix1": "index_right0",
                                     "df2_ix2": "index_right1"
                                 })
            exp.index.names = df1.index.names

        assert_frame_equal(res, exp)

    @pytest.mark.parametrize(
        "dfs",
        [
            "default-index",
            "string-index",
            "named-index",
            "multi-index",
            "named-multi-index",
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
    def test_left(self, predicate, dfs):
        index, df1, df2, expected = dfs

        res = sjoin(df1, df2, how="left", predicate=predicate)

        if index in ["default-index", "string-index"]:
            exp = expected[predicate].dropna(subset=["index_left"]).copy()
        elif index == "named-index":
            exp = expected[predicate].dropna(subset=["df1_ix"]).copy()
        elif index == "multi-index":
            exp = expected[predicate].dropna(subset=["level_0_x"]).copy()
        elif index == "named-multi-index":
            exp = expected[predicate].dropna(subset=["df1_ix1"]).copy()
        exp = exp.drop("geometry_y",
                       axis=1).rename(columns={"geometry_x": "geometry"})
        exp["df1"] = exp["df1"].astype("int64")
        if index == "default-index":
            exp["index_left"] = exp["index_left"].astype("int64")
            # TODO: in result the dtype is object
            res["index_right"] = res["index_right"].astype(float)
        elif index == "named-index":
            exp[["df1_ix"]] = exp[["df1_ix"]].astype("int64")
            exp = exp.set_index("df1_ix").rename(
                columns={"df2_ix": "index_right"})
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_left")
            exp.index.name = None
        if index == "multi-index":
            exp = exp.set_index(["level_0_x", "level_1_x"
                                 ]).rename(columns={
                                     "level_0_y": "index_right0",
                                     "level_1_y": "index_right1"
                                 })
            exp.index.names = df1.index.names
        if index == "named-multi-index":
            exp = exp.set_index(["df1_ix1",
                                 "df1_ix2"]).rename(columns={
                                     "df2_ix1": "index_right0",
                                     "df2_ix2": "index_right1"
                                 })
            exp.index.names = df1.index.names

        assert_frame_equal(res, exp)

    def test_empty_join(self):
        # Check joins resulting in empty gdfs.
        polygons = geopandas.GeoDataFrame({
            "col2": [1, 2],
            "geometry": [
                Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
                Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]),
            ],
        })
        not_in = geopandas.GeoDataFrame({
            "col1": [1],
            "geometry": [Point(-0.5, 0.5)]
        })
        empty = sjoin(not_in, polygons, how="left", predicate="intersects")
        assert empty.index_right.isnull().all()
        empty = sjoin(not_in, polygons, how="right", predicate="intersects")
        assert empty.index_left.isnull().all()
        empty = sjoin(not_in, polygons, how="inner", predicate="intersects")
        assert empty.empty

    @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
    @pytest.mark.parametrize(
        "empty",
        [
            GeoDataFrame(geometry=[GeometryCollection(),
                                   GeometryCollection()]),
            GeoDataFrame(geometry=GeoSeries()),
        ],
    )
    def test_join_with_empty(self, predicate, empty):
        # Check joins with empty geometry columns/dataframes.
        polygons = geopandas.GeoDataFrame({
            "col2": [1, 2],
            "geometry": [
                Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
                Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]),
            ],
        })
        result = sjoin(empty, polygons, how="left", predicate=predicate)
        assert result.index_right.isnull().all()
        result = sjoin(empty, polygons, how="right", predicate=predicate)
        assert result.index_left.isnull().all()
        result = sjoin(empty, polygons, how="inner", predicate=predicate)
        assert result.empty

    @pytest.mark.parametrize("dfs", ["default-index", "string-index"],
                             indirect=True)
    def test_sjoin_invalid_args(self, dfs):
        index, df1, df2, expected = dfs

        with pytest.raises(ValueError,
                           match="'left_df' should be GeoDataFrame"):
            sjoin(df1.geometry, df2)

        with pytest.raises(ValueError,
                           match="'right_df' should be GeoDataFrame"):
            sjoin(df1, df2.geometry)

    @pytest.mark.parametrize(
        "dfs",
        [
            "default-index",
            "string-index",
            "named-index",
            "multi-index",
            "named-multi-index",
        ],
        indirect=True,
    )
    @pytest.mark.parametrize("predicate", ["intersects", "contains", "within"])
    def test_right(self, predicate, dfs):
        index, df1, df2, expected = dfs

        res = sjoin(df1, df2, how="right", predicate=predicate)

        if index in ["default-index", "string-index"]:
            exp = expected[predicate].dropna(subset=["index_right"]).copy()
        elif index == "named-index":
            exp = expected[predicate].dropna(subset=["df2_ix"]).copy()
        elif index == "multi-index":
            exp = expected[predicate].dropna(subset=["level_0_y"]).copy()
        elif index == "named-multi-index":
            exp = expected[predicate].dropna(subset=["df2_ix1"]).copy()
        exp = exp.drop("geometry_x",
                       axis=1).rename(columns={"geometry_y": "geometry"})
        exp["df2"] = exp["df2"].astype("int64")
        if index == "default-index":
            exp["index_right"] = exp["index_right"].astype("int64")
            res["index_left"] = res["index_left"].astype(float)
        elif index == "named-index":
            exp[["df2_ix"]] = exp[["df2_ix"]].astype("int64")
            exp = exp.set_index("df2_ix").rename(
                columns={"df1_ix": "index_left"})
        if index in ["default-index", "string-index"]:
            exp = exp.set_index("index_right")
            exp = exp.reindex(columns=res.columns)
            exp.index.name = None
        if index == "multi-index":
            exp = exp.set_index(["level_0_y", "level_1_y"
                                 ]).rename(columns={
                                     "level_0_x": "index_left0",
                                     "level_1_x": "index_left1"
                                 })
            exp.index.names = df2.index.names
        if index == "named-multi-index":
            exp = exp.set_index(["df2_ix1",
                                 "df2_ix2"]).rename(columns={
                                     "df1_ix1": "index_left0",
                                     "df1_ix2": "index_left1"
                                 })
            exp.index.names = df2.index.names

        # GH 1364 fix of behaviour was done in pandas 1.1.0
        if predicate == "within" and str(
                pd.__version__) >= LooseVersion("1.1.0"):
            exp = exp.sort_index()

        assert_frame_equal(res, exp, check_index_type=False)
Beispiel #4
0
def rd_sql(server,
           database,
           table=None,
           col_names=None,
           where_col=None,
           where_val=None,
           where_op='AND',
           geo_col=False,
           from_date=None,
           to_date=None,
           date_col=None,
           rename_cols=None,
           stmt=None,
           export_path=None):
    """
    Function to import data from an MSSQL database. Requires the pymssql package.

    Parameters
    ----------
    server : str
        The server name. e.g.: 'SQL2012PROD03'
    database : str
        The specific database within the server. e.g.: 'LowFlows'
    table : str
        The specific table within the database. e.g.: 'LowFlowSiteRestrictionDaily'
    col_names : list of str
        The column names that should be retrieved. e.g.: ['SiteID', 'BandNo', 'RecordNo']
    where_col : str or dict
        Must be either a string with an associated where_val list or a dictionary of strings to lists.'. e.g.: 'SnapshotType' or {'SnapshotType': ['value1', 'value2']}
    where_val : list
        The WHERE query values for the where_col. e.g. ['value1', 'value2']
    where_op : str
        If where_col is a dictionary and there are more than one key, then the operator that connects the where statements must be either 'AND' or 'OR'.
    geo_col : bool
        Is there a geometry column in the table?.
    from_date : str
        The start date in the form '2010-01-01'.
    to_date : str
        The end date in the form '2010-01-01'.
    date_col : str
        The SQL table column that contains the dates.
    stmt : str
        Custom SQL statement to be directly passed to the database table. This will ignore all prior arguments except server and database.
    export_path : str
        The export path for a csv file if desired. If None, then nothing is exported.

    Returns
    -------
    DataFrame
    """

    ## Create where statements
    if stmt is None:

        if table is None:
            raise ValueError(
                'Must at least provide input for server, database, and table.')

        if col_names is not None:
            if isinstance(col_names, str):
                col_names = [col_names]
            col_names1 = [
                '[' + i.encode('ascii', 'ignore') + ']' for i in col_names
            ]
            col_stmt = ', '.join(col_names1)
        else:
            col_stmt = '*'

        where_lst = sql_where_stmts(where_col=where_col,
                                    where_val=where_val,
                                    where_op=where_op,
                                    from_date=from_date,
                                    to_date=to_date,
                                    date_col=date_col)

        if isinstance(where_lst, list):
            stmt1 = "SELECT " + col_stmt + " FROM " + table + " where " + " and ".join(
                where_lst)
        else:
            stmt1 = "SELECT " + col_stmt + " FROM " + table

    elif isinstance(stmt, str):
        stmt1 = stmt

    else:
        raise ValueError('stmt must either be an SQL string or None.')

    ## Create connection to database and execute sql statement
    conn = connect(server, database=database)
    df = read_sql(stmt1, conn)
    conn.close()
    if rename_cols is not None:
        df.columns = rename_cols

    ## Read in geometry if required
    if geo_col & (stmt is not None):
        geometry, proj = rd_sql_geo(server=server,
                                    database=database,
                                    table=table,
                                    where_lst=where_lst)
        df = GeoDataFrame(df, geometry=geometry, crs=proj)

    ## save and return
    if export_path is not None:
        save_df(df, export_path, index=False)

    return (df)
Beispiel #5
0
     'geometry': {
         'type': 'LineString',
         'coordinates': [list(point) for point in path]
     }
 }
 print("Finding all disasters within 0.5 degrees of path")
 # loads in my file of ufo sightings, earthquakes, etc
 disasters_DF = GeoDataFrame.from_file(
     "./Assignments/A05/assets/api/data/vols_eq_ufo/vols_eq_ufo1.shp"
 ).to_crs(crs="epsg:4326")
 # here is a sample path from Wichita Falls to Amarillo that I'm loading as a GeoDataFrame
 path_df = GeoDataFrame.from_features([path_geojson], crs="epsg:4326")
 # I create a buffer from the path, where a buffer is a polygon 0.1 degrees in radius from the path
 buffered_path = (path_df.buffer(0.5)).to_crs(crs="epsg:4326")
 # create a dataframe from the buffered path
 buffered_path_df = GeoDataFrame(buffered_path,
                                 geometry=buffered_path.geometry)
 buffered_path_df[0] = None
 # perform a spatial join of the buffered path and the ufo sighting, earthquakes, etc dataframe.
 #   This will return all disasters within 0.1 degrees of the path
 join_results = GeoDataFrame(
     sjoin(disasters_DF, buffered_path_df, lsuffix="left"))
 # from here, dump the path, the buffered path, and the disasters 0.1 degrees from the path to files
 print("Creating files")
 dump(
     path_geojson,
     open(
         './Assignments/A05/assets/api/data/shortest_paths/' +
         str(target_city_name) + '.geojson', 'w'))
 dump(
     loads(buffered_path.to_json()),
     open(
Beispiel #6
0
def test_overlay_strict(how, keep_geom_type, geom_types):
    """
    Test of mixed geometry types on input and output. Expected results initially
    generated using following snippet.

        polys1 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
                                Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
        df1 = gpd.GeoDataFrame({'col1': [1, 2], 'geometry': polys1})

        polys2 = gpd.GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
                                Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
                                Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])])
        df2 = gpd.GeoDataFrame({'geometry': polys2, 'col2': [1, 2, 3]})

        lines1 = gpd.GeoSeries([LineString([(2, 0), (2, 4), (6, 4)]),
                                LineString([(0, 3), (6, 3)])])
        df3 = gpd.GeoDataFrame({'col3': [1, 2], 'geometry': lines1})
        points1 = gpd.GeoSeries([Point((2, 2)),
                                 Point((3, 3))])
        df4 = gpd.GeoDataFrame({'col4': [1, 2], 'geometry': points1})

        params=["union", "intersection", "difference", "symmetric_difference",
                "identity"]
        stricts = [True, False]

        for p in params:
            for s in stricts:
                exp = gpd.overlay(df1, df2, how=p, keep_geom_type=s)
                if not exp.empty:
                    exp.to_file('polys_{p}_{s}.geojson'.format(p=p, s=s),
                                driver='GeoJSON')

        for p in params:
            for s in stricts:
                exp = gpd.overlay(df1, df3, how=p, keep_geom_type=s)
                if not exp.empty:
                    exp.to_file('poly_line_{p}_{s}.geojson'.format(p=p, s=s),
                                driver='GeoJSON')
        for p in params:
            for s in stricts:
                exp = gpd.overlay(df1, df4, how=p, keep_geom_type=s)
                if not exp.empty:
                    exp.to_file('poly_point_{p}_{s}.geojson'.format(p=p, s=s),
                                driver='GeoJSON')
    """
    polys1 = GeoSeries([
        Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
        Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
    ])
    df1 = GeoDataFrame({"col1": [1, 2], "geometry": polys1})

    polys2 = GeoSeries([
        Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]),
        Polygon([(-1, 1), (1, 1), (1, 3), (-1, 3)]),
        Polygon([(3, 3), (5, 3), (5, 5), (3, 5)]),
    ])
    df2 = GeoDataFrame({"geometry": polys2, "col2": [1, 2, 3]})
    lines1 = GeoSeries(
        [LineString([(2, 0), (2, 4), (6, 4)]),
         LineString([(0, 3), (6, 3)])])
    df3 = GeoDataFrame({"col3": [1, 2], "geometry": lines1})
    points1 = GeoSeries([Point((2, 2)), Point((3, 3))])
    df4 = GeoDataFrame({"col4": [1, 2], "geometry": points1})

    if geom_types == "polys":
        result = overlay(df1, df2, how=how, keep_geom_type=keep_geom_type)
    elif geom_types == "poly_line":
        result = overlay(df1, df3, how=how, keep_geom_type=keep_geom_type)
    elif geom_types == "poly_point":
        result = overlay(df1, df4, how=how, keep_geom_type=keep_geom_type)
    elif geom_types == "line_poly":
        result = overlay(df3, df1, how=how, keep_geom_type=keep_geom_type)
    elif geom_types == "point_poly":
        result = overlay(df4, df1, how=how, keep_geom_type=keep_geom_type)

    try:
        expected = read_file(
            os.path.join(
                DATA,
                "strict",
                "{t}_{h}_{s}.geojson".format(t=geom_types,
                                             h=how,
                                             s=keep_geom_type),
            ))

        # the order depends on the spatial index used
        # so we sort the resultant dataframes to get a consistent order
        # independently of the spatial index implementation
        assert all(expected.columns == result.columns), "Column name mismatch"
        cols = list(set(result.columns) - set(["geometry"]))
        expected = expected.sort_values(cols, axis=0).reset_index(drop=True)
        result = result.sort_values(cols, axis=0).reset_index(drop=True)

        assert_geodataframe_equal(
            result,
            expected,
            normalize=True,
            check_column_type=False,
            check_less_precise=True,
            check_crs=False,
            check_dtype=False,
        )

    except DriverError:  # fiona >= 1.8
        assert result.empty

    except OSError:  # fiona < 1.8
        assert result.empty
Beispiel #7
0
# LAND COVER NEW

FILE_NAME = "/Users/mmusleh/git/flash_demo/ignore/land_cover_data_test_15807.csv"
df = pd.read_csv(FILE_NAME)
geometry=[load_wkt(geom.split(";")[1]).centroid for geom in df.st_asewkt]
df["lon"] = [g.centroid.x for g in geometry]
df["lat"] = [g.centroid.y for g in geometry]
values = [0 if v==82 else (1 if v in [41,42,43] else 2) for v in df.val]

# # 0: crops, 1: forest, 2: others
colors = ['#A9AE85' if v==0 else ('#729565' if v==1 else "#73A7CC") for v in values]
df['value'] = values
df["color"] = colors

gdf = GeoDataFrame(df, geometry=geometry)

minx, miny, maxx, maxy = (gdf.total_bounds)
midx = minx + (maxx-minx)/2
midy = miny + (maxy-miny)/2
q1 = box(minx, midy, midx , maxy)
q2 = box(midx, midy , maxx, maxy)
q3 = box(minx, miny , midx, midy)
q4 = box(midx, miny , maxx, midy)



for n, q in enumerate([q1,q2,q3,q4]) :
    with open (f'q{n}', 'w+') as w: 
        w.write(str(q.centroid))
        w.write(str(q))
Beispiel #8
0
 def test_no_geometries(self):
     # keeps GeoDataFrame class (no DataFrame)
     data = {"A": range(3), "B": np.arange(3.0)}
     df = GeoDataFrame(data)
     assert type(df) == GeoDataFrame
Beispiel #9
0
    def test_empty(self):
        df = GeoDataFrame()
        assert type(df) == GeoDataFrame

        df = GeoDataFrame({'A': [], 'B': []}, geometry=[])
        assert type(df) == GeoDataFrame
Beispiel #10
0
def multi_poly_gdf(donut_geometry):
    """Create a multi-polygon GeoDataFrame."""
    multi_poly = donut_geometry.unary_union
    out_df = GeoDataFrame(geometry=GeoSeries(multi_poly), crs="EPSG:4326")
    out_df["attr"] = ["pool"]
    return out_df
Beispiel #11
0
 def test_empty_to_file(self):
     input_empty_df = GeoDataFrame()
     tempfilename = os.path.join(self.tempdir, 'test.shp')
     with pytest.raises(
         ValueError, match="Cannot write empty DataFrame to file."):
         input_empty_df.to_file(tempfilename)
Beispiel #12
0
def two_line_gdf():
    """Create Line Objects For Testing"""
    linea = LineString([(1, 1), (2, 2), (3, 2), (5, 3)])
    lineb = LineString([(3, 4), (5, 7), (12, 2), (10, 5), (9, 7.5)])
    return GeoDataFrame([1, 2], geometry=[linea, lineb], crs="EPSG:4326")
Beispiel #13
0
def single_rectangle_gdf():
    """Create a single rectangle for clipping."""
    poly_inters = Polygon([(0, 0), (0, 10), (10, 10), (10, 0), (0, 0)])
    gdf = GeoDataFrame([1], geometry=[poly_inters], crs="EPSG:4326")
    gdf["attr2"] = "site-boundary"
    return gdf
Beispiel #14
0
def point_gdf():
    """Create a point GeoDataFrame."""
    pts = np.array([[2, 2], [3, 4], [9, 8], [-12, -15]])
    return GeoDataFrame([Point(xy) for xy in pts],
                        columns=["geometry"],
                        crs="EPSG:4326")
Beispiel #15
0
def snap_points_to_near_line(lineShp,
                             pointShp,
                             epsg,
                             workGrass,
                             outPoints,
                             location='overlap_pnts',
                             api='grass',
                             movesShp=None):
    """
    Move points to overlap near line
    
    API's Available:
    * grass;
    * saga.
    """

    if api == 'grass':
        """
        Uses GRASS GIS to find near lines.
        """

        import os
        import numpy
        from geopandas import GeoDataFrame
        from gasp.pyt.oss import fprop
        from gasp.gt.wenv.grs import run_grass
        from gasp.gt.fmshp import shp_to_obj
        from gasp.gt.toshp import df_to_shp

        # Create GRASS GIS Location
        grassBase = run_grass(workGrass, location=location, srs=epsg)

        import grass.script as grass
        import grass.script.setup as gsetup
        gsetup.init(grassBase, workGrass, location, 'PERMANENT')

        # Import some GRASS GIS tools
        from gasp.gt.prox import grs_near as near
        from gasp.gt.tbl.attr import geomattr_to_db
        from gasp.gt.toshp.cff import shp_to_grs, grs_to_shp

        # Import data into GRASS GIS
        grsLines = shp_to_grs(lineShp, fprop(lineShp, 'fn', forceLower=True))

        grsPoint = shp_to_grs(pointShp, fprop(pointShp, 'fn', forceLower=True))

        # Get distance from points to near line
        near(grsPoint, grsLines, nearCatCol="tocat", nearDistCol="todistance")

        # Get coord of start/end points of polylines
        geomattr_to_db(grsLines, ['sta_pnt_x', 'sta_pnt_y'], 'start', 'line')
        geomattr_to_db(grsLines, ['end_pnt_x', 'end_pnt_y'], 'end', 'line')

        # Export data from GRASS GIS
        ogrPoint = grs_to_shp(
            grsPoint,
            os.path.join(workGrass,
                         grsPoint + '.shp',
                         'point',
                         asMultiPart=True))

        ogrLine = grs_to_shp(
            grsLines,
            os.path.join(workGrass,
                         grsLines + '.shp',
                         'point',
                         asMultiPart=True))

        # Points to GeoDataFrame
        pntDf = shp_to_obj(ogrPoint)
        # Lines to GeoDataFrame
        lnhDf = shp_to_obj(ogrLine)

        # Erase unecessary fields
        pntDf.drop(["todistance"], axis=1, inplace=True)
        lnhDf.drop([
            c for c in lnhDf.columns.values
            if c != 'geometry' and c != 'cat' and c != 'sta_pnt_x'
            and c != 'sta_pnt_y' and c != 'end_pnt_x' and c != 'end_pnt_y'
        ],
                   axis=1,
                   inplace=True)

        # Join Geometries - Table with Point Geometry and Geometry of the
        # nearest line
        resultDf = pntDf.merge(lnhDf,
                               how='inner',
                               left_on='tocat',
                               right_on='cat')

        # Move points
        resultDf['geometry'] = [
            geoms[0].interpolate(geoms[0].project(geoms[1]))
            for geoms in zip(resultDf.geometry_y, resultDf.geometry_x)
        ]

        resultDf.drop(["geometry_x", "geometry_y", "cat_x", "cat_y"],
                      axis=1,
                      inplace=True)

        resultDf = GeoDataFrame(resultDf,
                                crs={"init": 'epsg:{}'.format(epsg)},
                                geometry="geometry")

        # Check if points are equal to any start/end points
        resultDf["x"] = resultDf.geometry.x
        resultDf["y"] = resultDf.geometry.y

        resultDf["check"] = numpy.where(
            (resultDf["x"] == resultDf["sta_pnt_x"]) &
            (resultDf["y"] == resultDf["sta_pnt_y"]), 1, 0)

        resultDf["check"] = numpy.where(
            (resultDf["x"] == resultDf["end_pnt_x"]) &
            (resultDf["y"] == resultDf["end_pnt_y"]), 1, 0)

        # To file
        df_to_shp(resultDf, outPoints)

    elif api == 'saga':
        """
        Snap Points to Lines using SAGA GIS
        """

        from gasp import exec_cmd

        cmd = ("saga_cmd shapes_points 19 -INPUT {pnt} -SNAP {lnh} "
               "-OUTPUT {out}{mv}").format(
                   pnt=pointShp,
                   lnh=lineShp,
                   out=outPoints,
                   mv="" if not movesShp else " -MOVES {}".format(movesShp))

        outcmd = exec_cmd(cmd)

    else:
        raise ValueError("{} is not available!".format(api))

    return outPoints
    def setup_method(self):
        self.t1 = Polygon([(0, 0), (1, 0), (1, 1)])
        self.t2 = Polygon([(0, 0), (1, 1), (0, 1)])
        self.t3 = Polygon([(2, 0), (3, 0), (3, 1)])
        self.tz = Polygon([(1, 1, 1), (2, 2, 2), (3, 3, 3)])
        self.tz1 = Polygon([(2, 2, 2), (1, 1, 1), (3, 3, 3)])
        self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)])
        self.sqz = Polygon([(1, 1, 1), (2, 2, 2), (3, 3, 3), (4, 4, 4)])
        self.t4 = Polygon([(0, 0), (3, 0), (3, 3), (0, 2)])
        self.t5 = Polygon([(2, 0), (3, 0), (3, 3), (2, 3)])
        self.inner_sq = Polygon(
            [(0.25, 0.25), (0.75, 0.25), (0.75, 0.75), (0.25, 0.75)]
        )
        self.nested_squares = Polygon(self.sq.boundary, [self.inner_sq.boundary])
        self.p0 = Point(5, 5)
        self.p3d = Point(5, 5, 5)
        self.g0 = GeoSeries(
            [
                self.t1,
                self.t2,
                self.sq,
                self.inner_sq,
                self.nested_squares,
                self.p0,
                None,
            ]
        )
        self.g1 = GeoSeries([self.t1, self.sq])
        self.g2 = GeoSeries([self.sq, self.t1])
        self.g3 = GeoSeries([self.t1, self.t2])
        self.gz = GeoSeries([self.tz, self.sqz, self.tz1])
        self.g3.crs = "epsg:4326"
        self.g4 = GeoSeries([self.t2, self.t1])
        self.g4.crs = "epsg:4326"
        self.g_3d = GeoSeries([self.p0, self.p3d])
        self.na = GeoSeries([self.t1, self.t2, Polygon()])
        self.na_none = GeoSeries([self.t1, None])
        self.a1 = self.g1.copy()
        self.a1.index = ["A", "B"]
        self.a2 = self.g2.copy()
        self.a2.index = ["B", "C"]
        self.esb = Point(-73.9847, 40.7484, 30.3244)
        self.sol = Point(-74.0446, 40.6893, 31.2344)
        self.landmarks = GeoSeries([self.esb, self.sol], crs="epsg:4326")
        self.pt2d = Point(-73.9847, 40.7484)
        self.landmarks_mixed = GeoSeries([self.esb, self.sol, self.pt2d], crs=4326)
        self.l1 = LineString([(0, 0), (0, 1), (1, 1)])
        self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)])
        self.g5 = GeoSeries([self.l1, self.l2])
        self.g6 = GeoSeries([self.p0, self.t3])
        self.g7 = GeoSeries([self.sq, self.t4])
        self.g8 = GeoSeries([self.t1, self.t5])
        self.empty = GeoSeries([])
        self.all_none = GeoSeries([None, None])
        self.empty_poly = Polygon()
        self.g9 = GeoSeries(self.g0, index=range(1, 8))

        # Crossed lines
        self.l3 = LineString([(0, 0), (1, 1)])
        self.l4 = LineString([(0, 1), (1, 0)])
        self.crossed_lines = GeoSeries([self.l3, self.l4])

        # Placeholder for testing, will just drop in different geometries
        # when needed
        self.gdf1 = GeoDataFrame(
            {"geometry": self.g1, "col0": [1.0, 2.0], "col1": ["geo", "pandas"]}
        )
        self.gdf2 = GeoDataFrame(
            {"geometry": self.g1, "col3": [4, 5], "col4": ["rand", "string"]}
        )
        self.gdf3 = GeoDataFrame(
            {"geometry": self.g3, "col3": [4, 5], "col4": ["rand", "string"]}
        )
        self.gdfz = GeoDataFrame(
            {"geometry": self.gz, "col3": [4, 5, 6], "col4": ["rand", "string", "geo"]}
        )
def _split_exposure_highlow(exp_sub, mode, High_Value_Area_gdf):
    """ divide litpop exposure into high-value exposure and low-value exposure
    according to area queried in OSM, re-assign all low values to high-value centroids
    Parameters:
        exp_sub (exposure)
        mode (str)
    Returns:
        exp_sub_high (exposure)
    """

    exp_sub_high = pd.DataFrame(columns=exp_sub.columns)
    exp_sub_low = pd.DataFrame(columns=exp_sub.columns)
    for i, pt in enumerate(exp_sub.geometry):
        if pt.within(High_Value_Area_gdf.loc[0]['geometry']):
            exp_sub_high = exp_sub_high.append(exp_sub.iloc[i])
        else:
            exp_sub_low = exp_sub_low.append(exp_sub.iloc[i])

    exp_sub_high = GeoDataFrame(exp_sub_high,
                                crs=exp_sub.crs,
                                geometry=exp_sub_high.geometry)
    exp_sub_low = GeoDataFrame(exp_sub_low,
                               crs=exp_sub.crs,
                               geometry=exp_sub_low.geometry)

    if mode == "nearest":
        # assign asset values of low-value points to nearest point in high-value df.
        pointsToAssign = exp_sub_high.geometry.unary_union
        exp_sub_high["addedValNN"] = 0
        for i in range(0, len(exp_sub_low)):
            nearest = exp_sub_high.geometry == nearest_points(exp_sub_low.iloc[i].geometry, \
                                                              pointsToAssign)[1] #point
            exp_sub_high.addedValNN.loc[nearest] = exp_sub_low.iloc[i].value
        exp_sub_high["combinedValNN"] = exp_sub_high[['addedValNN',
                                                      'value']].sum(axis=1)
        exp_sub_high.rename(columns={'value': 'value_old', 'combinedValNN': 'value'},\
                            inplace=True)

    elif mode == "even":
        # assign asset values of low-value points evenly to points in high-value df.
        exp_sub_high['addedValeven'] = sum(
            exp_sub_low.value) / len(exp_sub_high)
        exp_sub_high["combinedValeven"] = exp_sub_high[[
            'addedValeven', 'value'
        ]].sum(axis=1)
        exp_sub_high.rename(columns={'value': 'value_old', 'combinedValeven': 'value'},\
                            inplace=True)

    elif mode == "proportional":
        #assign asset values of low-value points proportionally to value of points in high-value df.
        exp_sub_high['addedValprop'] = 0
        for i in range(0, len(exp_sub_high)):
            asset_factor = exp_sub_high.iloc[i].value / sum(exp_sub_high.value)
            exp_sub_high.addedValprop.iloc[i] = asset_factor * sum(
                exp_sub_low.value)
        exp_sub_high["combinedValprop"] = exp_sub_high[[
            'addedValprop', 'value'
        ]].sum(axis=1)
        exp_sub_high.rename(columns={'value': 'value_old', 'combinedValprop': 'value'},\
                            inplace=True)

    else:
        print(
            "No proper re-assignment mode set. Please choose either nearest, even or proportional."
        )

    return exp_sub_high
Beispiel #18
0
def make_traj(nodes, crs=CRS_METRIC, id=1, parent=None):
    nodes = [node.to_dict() for node in nodes]
    df = pd.DataFrame(nodes).set_index('t')
    geo_df = GeoDataFrame(df, crs=crs)
    return Trajectory(geo_df, id, parent=parent)
Beispiel #19
0
 def get_gdf(self):
     crs = {'init': 'epsg:4326'}
     return(GeoDataFrame(self.get_names(), crs=crs, geometry=self.get_geo()))
Beispiel #20
0
    p2 = Proj({'proj': 'aea', 'datum': 'WGS84', 'lon_0': '-96'})
    for shape in source:
        if shape['properties']['GEOID'] in pops['GEOID'].values:
            if shape['geometry']['type'] == 'MultiPolygon':
                continue
            subshape = shape['geometry']['coordinates'][0]
            # project from latitude to longitude
            p1_points = np.array(subshape)
            p2_points = transform(p1, p2, p1_points[:, 0], p1_points[:, 1])
            p2_points = np.array(p2_points).T

            # create polygon
            tract_polygons.append(Polygon(p2_points))
            geoids.append(shape['properties']['GEOID'])

tracts = GeoDataFrame(index=range(len(tract_polygons)))
# initialize data
tracts['region'] = tract_polygons
tracts['geoid'] = geoids
tracts['population'] = np.tile(np.nan, len(tracts))
tracts['area'] = np.tile(np.nan, len(tracts))
tracts.index = range(len(tracts))

# # trim tracts to nyc
# read in nyc boundary
nyc = nyc_boundary()

areas = []
print 'Trimming tracts...'
for i in range(len(tracts)):
    if i % 100 == 0:
Beispiel #21
0
def photos_location(buffer_shp,
                    epsg_in,
                    keyword=None,
                    epsg_out=4326,
                    onlySearchAreaContained=True,
                    keyToUse=None):
    """
    Search for data in Flickr and return a array with the same data
    
    buffer_shp cloud be a shapefile with a single buffer feature or a dict
    like:
    buffer_shp = {
        x: x_value,
        y: y_value,
        r: dist (in meters)
    }
    or a list or a tuple:
    buffer_shp = [x, y, radius]
    """

    import pandas
    from shapely.geometry import Polygon, Point
    from shapely.wkt import loads
    from geopandas import GeoDataFrame
    from glass.g.gp.prox.bfing.obj import xy_to_buffer
    from glass.g.prop.feat.bf import getBufferParam
    from glass.g.prj.obj import prj_ogrgeom

    x_center, y_center, dist = getBufferParam(buffer_shp, epsg_in, outSRS=4326)

    # Retrive data from Flickr
    photos = search_photos(lat=y_center,
                           lng=x_center,
                           radius=float(dist) / 1000,
                           keyword=keyword,
                           apiKey=keyToUse)

    try:
        if not photos:
            # Return noData
            return 0
    except:
        pass

    photos['longitude'] = photos['longitude'].astype(float)
    photos['latitude'] = photos['latitude'].astype(float)

    geoms = [Point(xy) for xy in zip(photos.longitude, photos.latitude)]
    gdata = GeoDataFrame(photos, crs='EPSG:4326', geometry=geoms)

    if onlySearchAreaContained:
        _x_center, _y_center, _dist = getBufferParam(buffer_shp,
                                                     epsg_in,
                                                     outSRS=3857)
        # Check if all retrieve points are within the search area
        search_area = xy_to_buffer(float(_x_center), float(_y_center),
                                   float(_dist))
        search_area = prj_ogrgeom(search_area, 3857, 4326)
        search_area = loads(search_area.ExportToWkt())

        gdata["tst_geom"] = gdata["geometry"].intersects(search_area)
        gdata = gdata[gdata["tst_geom"] == True]

        gdata.reset_index(drop=True, inplace=True)

    gdata["fid"] = gdata["id"]

    if "url_l" in gdata.columns.values:
        gdata["url"] = gdata["url_l"]
    else:
        gdata["url"] = 'None'

    gdata["description"] = gdata["_content"]

    # Drop irrelevant fields
    cols = list(gdata.columns.values)
    delCols = []

    for col in cols:
        if col != 'geometry' and  col != 'description' and \
            col != 'fid' and col != 'url' and col != 'datetaken' \
            and col != 'dateupload' and col != 'title':
            delCols.append(col)
        else:
            continue

    gdata.drop(delCols, axis=1, inplace=True)

    if epsg_out != 4326:
        gdata = gdata.to_crs('EPSG:{}'.format(str(epsg_out)))

    return gdata
Beispiel #22
0
import geopandas as gpd
import os
import pandas as pd

os.chdir("path to working directory")

# ### Create GeoDataFrames

from geopandas import GeoDataFrame
from shapely.geometry import Point, LineString

shipping_gdf = GeoDataFrame(
    shipping, geometry=[Point(xy) for xy in zip(shipping.Long, shipping.Lat)])
noShipping_gdf = GeoDataFrame(
    noShipping,
    geometry=[Point(xy) for xy in zip(noShipping.Long, noShipping.Lat)])
hq_gdf = GeoDataFrame(hq, geometry=[Point(xy) for xy in zip(hq.Long, hq.Lat)])
hq_gdf.head()

# ### Get adjusted lat/long coordinates
# https://stackoverflow.com/questions/30740046/calculate-distance-to-nearest-feature-with-geopandas


def nearest_poly(point, polygons):
    min_dist = polygons.distance(point).min()
    index = polygons.distance(point)[polygons.distance(point) ==
                                     min_dist].index[0]
    return polygons.iat[index, 0]


def getXY(pt):
 def __geo_interface__(self):
     """Returns a GeoSeries as a python feature collection
     """
     from geopandas import GeoDataFrame
     return GeoDataFrame({'geometry': self}).__geo_interface__
Beispiel #24
0
 def setup_method(self):
     self.N = 10
     self.points = GeoSeries(Point(i, i, i) for i in range(self.N))
     values = np.arange(self.N)
     self.df = GeoDataFrame({'geometry': self.points, 'values': values})
Beispiel #25
0
def test_to_file_empty(tmpdir):
    input_empty_df = GeoDataFrame()
    tempfilename = os.path.join(str(tmpdir), "test.shp")
    with pytest.raises(ValueError,
                       match="Cannot write empty DataFrame to file."):
        input_empty_df.to_file(tempfilename)
Beispiel #26
0
#Import Police Beats
ch_pbeats = gpd.read_file(os.path.join(data_dir, 'BeatsPolice.shp'))
#Project CRS WGS84 EPSG:4326
crs = {'init': 'epsg:4326'}
#Change Latitude and Longitude names
calls311_2017 = calls311_2017.rename(columns={
    'Latitude': 'Lat',
    'Longitude': 'Lon'
})
#Create a Point data with Lon and Lat
g_calls311_2017 = [
    Point(xy) for xy in zip(calls311_2017.Lon, calls311_2017.Lat)
]
#Create the GeoDataframe
gdf_calls311_2017 = GeoDataFrame(calls311_2017,
                                 crs=crs,
                                 geometry=g_calls311_2017)
#Assign each call its correspondent Beat
id_calls311_2017 = gpd.sjoin(ch_pbeats,
                             gdf_calls311_2017,
                             how="inner",
                             op='intersects')

#Filter Dataframes per type of call
alley_2017 = id_calls311_2017[id_calls311_2017['Type of Service Request'] ==
                              'Alley Light Out']
all_lights_2017 = id_calls311_2017[id_calls311_2017['Type of Service Request']
                                   == 'Street Lights - All/Out']
one_light_2017 = id_calls311_2017[id_calls311_2017['Type of Service Request']
                                  == 'Street Light Out']
Beispiel #27
0
def dfs(request):
    polys1 = GeoSeries([
        Polygon([(0, 0), (5, 0), (5, 5), (0, 5)]),
        Polygon([(5, 5), (6, 5), (6, 6), (5, 6)]),
        Polygon([(6, 0), (9, 0), (9, 3), (6, 3)]),
    ])

    polys2 = GeoSeries([
        Polygon([(1, 1), (4, 1), (4, 4), (1, 4)]),
        Polygon([(4, 4), (7, 4), (7, 7), (4, 7)]),
        Polygon([(7, 7), (10, 7), (10, 10), (7, 10)]),
    ])

    df1 = GeoDataFrame({"geometry": polys1, "df1": [0, 1, 2]})
    df2 = GeoDataFrame({"geometry": polys2, "df2": [3, 4, 5]})

    if request.param == "string-index":
        df1.index = ["a", "b", "c"]
        df2.index = ["d", "e", "f"]

    if request.param == "named-index":
        df1.index.name = "df1_ix"
        df2.index.name = "df2_ix"

    if request.param == "multi-index":
        i1 = ["a", "b", "c"]
        i2 = ["d", "e", "f"]
        df1 = df1.set_index([i1, i2])
        df2 = df2.set_index([i2, i1])

    if request.param == "named-multi-index":
        i1 = ["a", "b", "c"]
        i2 = ["d", "e", "f"]
        df1 = df1.set_index([i1, i2])
        df2 = df2.set_index([i2, i1])
        df1.index.names = ["df1_ix1", "df1_ix2"]
        df2.index.names = ["df2_ix1", "df2_ix2"]

    # construction expected frames
    expected = {}

    part1 = df1.copy().reset_index().rename(columns={"index": "index_left"})
    part2 = (df2.copy().iloc[[
        0, 1, 1, 2
    ]].reset_index().rename(columns={"index": "index_right"}))
    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [0, 0, 1, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["intersects"] = exp.drop("_merge", axis=1).copy()

    part1 = df1.copy().reset_index().rename(columns={"index": "index_left"})
    part2 = df2.copy().reset_index().rename(columns={"index": "index_right"})
    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [0, 3, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["contains"] = exp.drop("_merge", axis=1).copy()

    part1["_merge"] = [0, 1, 2]
    part2["_merge"] = [3, 1, 3]
    exp = pd.merge(part1, part2, on="_merge", how="outer")
    expected["within"] = exp.drop("_merge", axis=1).copy()

    return [request.param, df1, df2, expected]
Beispiel #28
0
def HAST_dataPrep():

    gsTERRAINID = 'TERRAINID'
    gsWBID = 'WBID'

    #The input files and full path will be read from the GUI
    #Fetch file names from XML
    tree = ET.parse('settings.xml')
    sLUTPath = tree.find('.//LUTPath').text
    sGeoJsonFileName = tree.find('.//CBGeoJson').text
    sInputFileName = tree.find('.//InputFileName').text
    sTerrainIDFileName = tree.find('.//TerrainIDFName').text
    sSurfaceRoughnessFileName = tree.find('.//SurfaceRoughNess').text
    sDfltWbIdFileName = tree.find('.//WbIdFName').text
    sInputPath = tree.find('.//InputPath').text
    sSateID = tree.find('.//stateID').text
    sCBFldName = 'CENSUSBLOCK'
    sSBTFldName = 'SBTNAME'
    sTractIDFieldName = 'TRACT_ID_GEN'
    sPreProcessedDataFileName = os.path.splitext(
        sInputFileName)[0] + "_pre_processed.csv"

    #Logging setup
    LogFileName = tree.find('.//LogFileName').text
    Level = tree.find('.//Level').text
    if Level == 'INFO':
        logging.basicConfig(filename=LogFileName,
                            filemode='w',
                            level=logging.INFO)
    else:
        logging.basicConfig(filename=LogFileName,
                            filemode='w',
                            level=logging.DEBUG)

    logging.info(str(datetime.datetime.now()) + ' Pre-Processing Begin... ')

    #utility.popupmsg(sPreProcessedDataFileName)

    #Fecth field names of the input selected
    for item in tree.find('.//PreProcessingFields'):
        logging.debug(
            str(datetime.datetime.now()) + ' PreProcessingFields: ' + item.tag)
        if item.tag == 'SOID':
            sSoccIdFieldName = item.attrib['inputFieldName']
        elif item.tag == 'WBID':
            sWbIDFieldName = item.attrib['inputFieldName']
        elif item.tag == 'TerrainID':
            sTerrainIDFldName = item.attrib['inputFieldName']
        elif item.tag == 'HUSBT':
            sHuSBTFldName = item.attrib['inputFieldName']
        elif item.tag == 'Longitude':
            sLongitude = item.attrib['inputFieldName']
        elif item.tag == 'Latitude':
            sLatitude = item.attrib['inputFieldName']

    if sTerrainIDFldName == '':
        sTerrainIDFldName = gsTERRAINID

    if sWbIDFieldName == '':
        sWbIDFieldName = gsWBID
    #Read the input UDF dataset from the XML
    df_Input = pd.read_csv(sInputFileName,
                           delimiter=None,
                           encoding="ISO-8859-1")
    df_Input.columns = [x.upper() for x in df_Input.columns]

    #Check if TerrainID is a part of the input data (df_Input). If not then perform the following joins
    #If the user has provided the TerraID check if wbID is provided.
    #print("Validating input data set...")
    logging.info(
        str(datetime.datetime.now()) +
        " Validating inputs for required fields...")
    logging.debug(str(datetime.datetime.now()) + " Validating started...")
    if sTerrainIDFldName in df_Input.columns:
        #print("Yes" , sLUTPath + sTerrainIDFileName)
        logging.debug(
            str(datetime.datetime.now()) + " Inside checking TerrainID" +
            str(sLUTPath) + str(sTerrainIDFileName))

        #Check the data if the entries are valid
        df_TerrainID = pd.read_csv(sLUTPath + sTerrainIDFileName,
                                   delimiter=None)
        logging.debug(str(datetime.datetime.now()) + ' Check 2: df_TerrainID ')
        #print(2)

        df_TerrainID.columns = [x.upper() for x in df_TerrainID.columns]
        logging.debug(
            str(datetime.datetime.now()) + ' Check 3: f_TerrainID.columns ' +
            str(df_TerrainID.columns))
        #print(3)

        df_ValidateTr = pd.merge(df_Input.astype(str),
                                 df_TerrainID.astype(str),
                                 left_on=sTerrainIDFldName,
                                 right_on=gsTERRAINID,
                                 how="inner",
                                 suffixes=('_left', '_right'))
        logging.debug(
            str(datetime.datetime.now()) + ' Check 4: df_ValidateTr ')
        #print(4)

        numOfRowsInput = len(df_Input.index)
        numOfRowsmatched = len(df_ValidateTr.index)

        #print(str(numOfRowsmatched))
        logging.debug(
            str(datetime.datetime.now()) + ' Number of Rows Matched: ' +
            str(numOfRowsmatched))
        if numOfRowsmatched != numOfRowsInput:
            utility.popupmsg(
                "Please check TerrainIDs so that they match with the " +
                sSurfaceRoughnessFileName + " looktup table.")
            logging.info(
                str(datetime.datetime.now()) +
                " Please check TerrainIDs so that they match with the " +
                sSurfaceRoughnessFileName + " looktup table.")
            #print(df_TerrainID)
            sys.exit()
        elif sWbIDFieldName in df_Input.columns:
            #print("Checking field WbId")
            #print("Yes")
            logging.info(
                str(datetime.datetime.now()) + ' All TerrainIDs match! ')

            logging.debug(
                str(datetime.datetime.now()) + ' Checking field WbId: ' +
                str(sWbIDFieldName))

            #Check the data if the entries are valid
            df_WbID = pd.read_csv(sLUTPath + sDfltWbIdFileName, delimiter=None)
            logging.debug(str(datetime.datetime.now()) + ' Check 5: df_WbID ')
            #print(5)

            df_WbID.columns = [x.upper() for x in df_WbID.columns]
            logging.debug(
                str(datetime.datetime.now()) + ' Check 6: df_WbID.columns ' +
                str(df_WbID.columns))
            #print(6)

            df_ValidateWb = pd.merge(df_Input.astype(str),
                                     df_WbID.astype(str),
                                     left_on=sWbIDFieldName,
                                     right_on=gsWBID,
                                     how="inner",
                                     suffixes=('_left', '_right'))
            logging.debug(
                str(datetime.datetime.now()) + ' Check 7: df_ValidateWb ')
            #print(7)

            numOfRowsInput = len(df_Input.index)
            numOfRowsmatched = len(df_ValidateWb.index)
            logging.info(
                str(datetime.datetime.now()) + ' Num of Rows Matched: ' +
                str(numOfRowsmatched))
            #print(str(numOfRowsmatched))

            if numOfRowsmatched != numOfRowsInput:
                logging.debug(
                    str(datetime.datetime.now()) +
                    " Please check WbIds so that they match with the " +
                    sDfltWbIdFileName[1:1 + len(sDfltWbIdFileName)] +
                    " looktup table.")
                popupmsg("Please check WbIds so that they match with the " +
                         sDfltWbIdFileName[1:1 + len(sDfltWbIdFileName)] +
                         " looktup table.")
                sys.exit()
        logging.info(
            str(datetime.datetime.now()) +
            " TerrainIds and WbIds match. Please proceed to perform the analyses."
        )
        #utility.popupmsg("TerrainIds and WbIds match. Please proceed to perform the analyses.")
        #sys.exit()
    #else:
    print("Pre-Processing the input to assign HU attributes...")
    logging.info(
        str(datetime.datetime.now()) +
        ' Pre-Processing input dataset to add the HU attributes...')
    #print("No")

    #CB data
    df_CB = gpd.read_file(sLUTPath + sGeoJsonFileName)
    df_CB.columns = [x.lower() for x in df_CB.columns
                     ]  #setting to lower for the spatial join
    logging.debug(
        str(datetime.datetime.now()) + ' Check 8: df_CB.columns ' +
        str(df_CB.columns))
    #print(8)

    #SR LUT
    df_SuRCB = pd.read_csv(sLUTPath + sSurfaceRoughnessFileName,
                           delimiter=None)
    df_SuRCB.columns = [x.upper() for x in df_SuRCB.columns]
    logging.debug(
        str(datetime.datetime.now()) + ' Check 9: df_SuRCB.columns ' +
        str(df_SuRCB.columns))
    #print(9)

    #WbId LUT
    df_WbId = pd.read_csv(sLUTPath + sDfltWbIdFileName, delimiter=None)
    df_WbId.columns = [x.upper() for x in df_WbId.columns]
    logging.debug(
        str(datetime.datetime.now()) + ' Check 10: df_WbId.columns ' +
        str(df_WbId.columns))
    #print(10)

    #Latitude and Longitude validation for the future
    #df_CheckLatLong = df_Input.apply(lambda row: (df_input['Longitude'].astype(str)=='' | df_input['Latitude'].astype(str)=='') , axis=1)

    #Longitude,Latitude field names now referenced from settings.xml
    geometry = [
        Point(xy) for xy in zip(df_Input[sLongitude], df_Input[sLatitude])
    ]
    crs = {'init': 'epsg:4326'}
    #logging.debug(str(datetime.datetime.now())+' Check 11: geometry ' + str(geometry))
    #print(11)

    #Join between structure level data and Census block to fecth the CBID
    #Check if any geometries are NULL
    df_Input = GeoDataFrame(df_Input, geometry=geometry, crs=crs)
    logging.debug(str(datetime.datetime.now()) + ' Check 12: df_Input ')
    #print(12)

    #Join the structure level input points to the hzCensusblock_TIGER to fetch the CBID
    if sCBFldName in df_Input.columns:
        df_Input.rename(columns={sCBFldName: sCBFldName + '_OLD'},
                        inplace=True)
    points_CBId = gpd.sjoin(df_Input, df_CB, how="inner", op='intersects')
    points_CBId.columns = [x.upper() for x in points_CBId.columns]
    logging.debug(
        str(datetime.datetime.now()) + ' Check 13: points_CBId.columns ' +
        str(points_CBId.columns))

    #Fetch Surface Roughness from huTerrainB in the respective state
    if sSateID != 'VI':
        if gsTERRAINID not in df_Input.columns:
            points_CBSR = pd.merge(points_CBId.astype(str),
                                   df_SuRCB.astype(str),
                                   on=sCBFldName,
                                   how="inner",
                                   suffixes=('_left', '_right'))
            points_CBSR.columns = [x.upper() for x in points_CBSR.columns]
            logging.debug(
                str(datetime.datetime.now()) +
                ' Check 14: points_CBSR.columns ' + str(points_CBSR.columns))
            #TerrainID assignment from Surface Roughness Values
            #if sTerrainIDFldName == '':
            #    sTerrainIDFldName = gsTERRAINID
            points_CBSR[sTerrainIDFldName] = points_CBSR.apply(
                lambda row: get_terrainId(row), axis=1)
            logging.debug(
                str(datetime.datetime.now()) +
                ' Check 15: points_CBSR[sTerrainIDFldName] ')
        else:
            points_CBSR = points_CBId
    else:
        #if sTerrainIDFldName == '':
        #    sTerrainIDFldName = gsTERRAINID
        points_CBSR = points_CBId
        #df_Input.rename(columns={sCBFldName + '_OLD':sCBFldName}, inplace=True)
        if gsTERRAINID not in points_CBSR.columns:
            points_CBSR[sTerrainIDFldName] = 1

    #Assign the WbID on the basis of the HUSBT in the input
    if 'WBID' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['WBID'], axis=1)
    points_CBSR = pd.merge(points_CBSR.astype(str),
                           df_WbId.astype(str),
                           left_on=sHuSBTFldName,
                           right_on=sSBTFldName,
                           how="inner",
                           suffixes=('_left', '_right'))
    logging.debug(str(datetime.datetime.now()) + ' Check 16: points_CBSR ')
    logging.info(
        str(datetime.datetime.now()) + ' TERRAINID and WBID assignned... ')

    #TRACTID added - avoid fetching for each record
    points_CBSR[sTractIDFieldName] = points_CBSR[sCBFldName].str[:11]

    #del cols
    if '' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop([''], axis=1)
    if 'GEOMETRY' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['GEOMETRY'], axis=1)
    if 'INDEX_RIGHT' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['INDEX_RIGHT'], axis=1)
    if 'OBJECTID_RIGHT' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['OBJECTID_RIGHT'], axis=1)
    if sSBTFldName in points_CBSR.columns:
        points_CBSR = points_CBSR.drop([sSBTFldName], axis=1)
    if 'SURFACEROUGHNESS' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['SURFACEROUGHNESS'], axis=1)
    if 'SRINDEX' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['SRINDEX'], axis=1)
    if 'CHARDESCRIPTION' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['CHARDESCRIPTION'], axis=1)
    if 'CASEID' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['CASEID'], axis=1)
    if 'NWINDCHAR' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['NWINDCHAR'], axis=1)
    if 'CENSUSBLOCK_OLD' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['CENSUSBLOCK_OLD'], axis=1)
    if 'OBJECTID' in points_CBSR.columns:
        points_CBSR = points_CBSR.drop(['OBJECTID'], axis=1)
    points_CBSR = points_CBSR.loc[:, ~points_CBSR.columns.str.
                                  contains('^UNNAMED')]
    points_CBSR = points_CBSR[points_CBSR.columns.dropna()]

    #print(points_CBSR)
    #XML assignments if TERRAINID, WBID not in base data
    item = tree.getroot().find('.//TerrainID')
    item.attrib['inputFieldName'] = sTerrainIDFldName  #gsTERRAINID

    item = tree.getroot().find('.//WBID')
    item.attrib['inputFieldName'] = sWbIDFieldName  #gsWBID

    item = tree.getroot().find('.//CensusBlockID')
    item.attrib['inputFieldName'] = sCBFldName

    tree.getroot().find(
        './/PreProcessedDataFileName').text = sPreProcessedDataFileName

    tree.write('settings.xml')

    #Making sure all column names are caps
    points_CBId.columns = [x.upper() for x in points_CBId.columns]
    points_CBSR.to_csv(sPreProcessedDataFileName)

    logging.info(str(datetime.datetime.now()) + ' Pre-Processing Complete...')
    print("Pre-Processing Complete...")
def df():
    return GeoDataFrame({
        "geometry": [Point(x, x) for x in range(3)],
        "value1": np.arange(3, dtype="int64"),
        "value2": np.array([1, 2, 1], dtype="int64"),
    })
Beispiel #30
0
def geomcol_gdf():
    """Create a Mixed Polygon and LineString For Testing"""
    point = Point([(2, 3), (11, 4), (7, 2), (8, 9), (1, 13)])
    poly = Polygon([(3, 4), (5, 2), (12, 2), (10, 5), (9, 7.5)])
    coll = GeometryCollection([point, poly])
    return GeoDataFrame([1], geometry=[coll], crs="EPSG:4326")