Ejemplo n.º 1
0
 def test_sjoin_values(self):
     # GH190
     self.polydf.index = [1, 3, 4, 5, 6]
     df = sjoin(self.pointdf, self.polydf, how='left')
     self.assertEquals(df.shape, (21,8))
     df = sjoin(self.polydf, self.pointdf, how='left')
     self.assertEquals(df.shape, (12,8))
Ejemplo n.º 2
0
 def test_sjoin_values(self):
     # GH190
     self.polydf.index = [1, 3, 4, 5, 6]
     df = sjoin(self.pointdf, self.polydf, how='left')
     assert df.shape == (21, 8)
     df = sjoin(self.polydf, self.pointdf, how='left')
     assert df.shape == (12, 8)
Ejemplo n.º 3
0
    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        assert df.shape == (21, 8)
        assert df.loc[1]['BoroName'] == 'Staten Island'

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        assert df.shape == (21, 8)
        assert np.isnan(df.loc[1]['Shape_Area'])
Ejemplo n.º 4
0
 def test_sjoin_right(self):
     # the inverse of left
     df = sjoin(self.pointdf, self.polydf, how="right")
     df2 = sjoin(self.polydf, self.pointdf, how="left")
     assert df.shape == (12, 8)
     assert df.shape == df2.shape
     for i, row in df.iterrows():
         assert row.geometry.type == 'MultiPolygon'
     for i, row in df2.iterrows():
         assert row.geometry.type == 'MultiPolygon'
Ejemplo n.º 5
0
 def test_sjoin_right(self):
     # the inverse of left
     df = sjoin(self.pointdf, self.polydf, how="right")
     df2 = sjoin(self.polydf, self.pointdf, how="left")
     self.assertEquals(df.shape, (12, 8))
     self.assertEquals(df.shape, df2.shape)
     for i, row in df.iterrows():
         self.assertEquals(row.geometry.type, 'MultiPolygon')
     for i, row in df2.iterrows():
         self.assertEquals(row.geometry.type, 'MultiPolygon')
Ejemplo n.º 6
0
    def test_sjoin_op(self):
        # points within polygons
        df = sjoin(self.pointdf, self.polydf, how="left", op="within")
        self.assertEquals(df.shape, (21,8))
        self.assertEquals(df.ix[1]['BoroName'], 'Staten Island')

        # points contain polygons? never happens so we should have nulls
        df = sjoin(self.pointdf, self.polydf, how="left", op="contains")
        self.assertEquals(df.shape, (21, 8))
        self.assertTrue(np.isnan(df.ix[1]['Shape_Area']))
Ejemplo n.º 7
0
    def test_sjoin_invalid_args(self, dfs):
        index, df1, df2, expected = dfs

        with pytest.raises(ValueError,
                           match="'left_df' should be GeoDataFrame"):
            res = sjoin(df1.geometry, df2)

        with pytest.raises(ValueError,
                           match="'right_df' should be GeoDataFrame"):
            res = sjoin(df1, df2.geometry)
Ejemplo n.º 8
0
    def test_no_overlapping_geometry(self):
        # Note: these tests are for correctly returning GeoDataFrame
        # when result of the join is empty

        df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how='inner')
        df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how='left')
        df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how='right')

        # Recent Pandas development has introduced a new way of handling merges
        # this change has altered the output when no overlapping geometries
        if str(pd.__version__) > LooseVersion('0.18.1'):
            right_idxs = pd.Series(range(0, 5), name='index_right',
                                   dtype='int64')
        else:
            right_idxs = pd.Series(name='index_right', dtype='int64')

        expected_inner_df = pd.concat(
            [self.pointdf.iloc[:0],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.drop('geometry', axis=1).iloc[:0]],
            axis=1)

        expected_inner = GeoDataFrame(
            expected_inner_df, crs={'init': 'epsg:4326', 'no_defs': True})

        expected_right_df = pd.concat(
            [self.pointdf.drop('geometry', axis=1).iloc[:0],
             pd.concat([pd.Series(name='index_left', dtype='int64'),
                        right_idxs],
                       axis=1),
             self.polydf],
            axis=1)

        expected_right = GeoDataFrame(
            expected_right_df, crs={'init': 'epsg:4326', 'no_defs': True})\
            .set_index('index_right')

        expected_left_df = pd.concat(
            [self.pointdf.iloc[17:],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.iloc[:0].drop('geometry', axis=1)],
            axis=1)

        expected_left = GeoDataFrame(
            expected_left_df, crs={'init': 'epsg:4326', 'no_defs': True})

        assert expected_inner.equals(df_inner)
        assert expected_right.equals(df_right)
        assert expected_left.equals(df_left)
Ejemplo n.º 9
0
 def test_empty_join(self):
     # Check empty joins
     polygons = geopandas.GeoDataFrame({'col2': [1, 2], 
                                        'geometry':  [Polygon([(0, 0), (1, 0), 
                                                               (1, 1), (0, 1)]), 
                                                      Polygon([(1, 0), (2, 0), 
                                                               (2, 1), (1, 1)])
                                                     ]})
     not_in = geopandas.GeoDataFrame({'col1': [1], 
                          'geometry': [Point(-0.5, 0.5)]})
     empty = sjoin(not_in, polygons, how='left', op='intersects')
     assert empty.index_right.isnull().all()
     empty = sjoin(not_in, polygons, how='right', op='intersects')
     assert empty.index_left.isnull().all()
     empty = sjoin(not_in, polygons, how='inner', op='intersects')
     assert empty.empty
def assign_taxi_zones(df, lon_var, lat_var, locid_var):
    """Joins DataFrame with Taxi Zones shapefile.
    This function takes longitude values provided by `lon_var`, and latitude
    values provided by `lat_var` in DataFrame `df`, and performs a spatial join
    with the NYC taxi_zones shapefile. 
    The shapefile is hard coded in, as this function makes a hard assumption of
    latitude and longitude coordinates. It also assumes latitude=0 and 
    longitude=0 is not a datapoint that can exist in your dataset. Which is 
    reasonable for a dataset of New York, but bad for a global dataset.
    Only rows where `df.lon_var`, `df.lat_var` are reasonably near New York,
    and `df.locid_var` is set to np.nan are updated. 
    Parameters
    ----------
    df : pandas.DataFrame or dask.DataFrame
        DataFrame containing latitudes, longitudes, and location_id columns.
    lon_var : string
        Name of column in `df` containing longitude values. Invalid values 
        should be np.nan.
    lat_var : string
        Name of column in `df` containing latitude values. Invalid values 
        should be np.nan
    locid_var : string
        Name of column in `df` containing taxi_zone location ids. Rows with
        valid, nonzero values are not overwritten. 
    """

    import geopandas
    from shapely.geometry import Point


    localdf = df[[lon_var, lat_var, locid_var]].copy()
    # localdf = localdf.reset_index()
    localdf[lon_var] = localdf[lon_var].fillna(value=0.)
    localdf[lat_var] = localdf[lat_var].fillna(value=0.)
    localdf['replace_locid'] = (localdf[locid_var].isnull()
                                & (localdf[lon_var] != 0.)
                                & (localdf[lat_var] != 0.))

    if (np.any(localdf['replace_locid'])):
        shape_df = geopandas.read_file('../shapefiles/taxi_zones.shp')
        shape_df.drop(['OBJECTID', "Shape_Area", "Shape_Leng", "borough", "zone"],
                      axis=1, inplace=True)
        shape_df = shape_df.to_crs({'init': 'epsg:4326'})

        try:
            local_gdf = geopandas.GeoDataFrame(
                localdf, crs={'init': 'epsg:4326'},
                geometry=[Point(xy) for xy in
                          zip(localdf[lon_var], localdf[lat_var])])

            local_gdf = geopandas.sjoin(
                local_gdf, shape_df, how='left', op='within')

            return local_gdf.LocationID.rename(locid_var)
        except ValueError as ve:
            print(ve)
            print(ve.stacktrace())
            return df[locid_var]
    else:
        return df[locid_var]
Ejemplo n.º 11
0
 def test_sjoin_left(self):
     df = sjoin(self.pointdf, self.polydf, how='left')
     assert df.shape == (21, 8)
     for i, row in df.iterrows():
         assert row.geometry.type == 'Point'
     assert 'pointattr1' in df.columns
     assert 'BoroCode' in df.columns
Ejemplo n.º 12
0
 def test_sjoin_left(self):
     df = sjoin(self.pointdf, self.polydf, how='left')
     self.assertEquals(df.shape, (21,8))
     for i, row in df.iterrows():
         self.assertEquals(row.geometry.type, 'Point')
     self.assertTrue('pointattr1' in df.columns)
     self.assertTrue('BoroCode' in df.columns)
Ejemplo n.º 13
0
 def test_sjoin_named_index(self, how):
     #original index names should be unchanged
     pointdf2 = self.pointdf.copy()
     pointdf2.index.name = 'pointid'
     df = sjoin(pointdf2, self.polydf, how=how)
     assert pointdf2.index.name == 'pointid'
     assert self.polydf.index.name == None
Ejemplo n.º 14
0
 def test_sjoin_inner(self):
     # GH637
     countries = self.world[["geometry", "name"]]
     countries = countries.rename(columns={"name": "country"})
     cities_with_country = sjoin(self.cities, countries, how="inner",
                                 op="intersects")
     assert cities_with_country.shape == (172, 4)
Ejemplo n.º 15
0
 def test_geometry_name(self):
     # test sjoin is working with other geometry name
     polydf_original_geom_name = self.polydf.geometry.name
     self.polydf = (self.polydf.rename(columns={'geometry': 'new_geom'})
                               .set_geometry('new_geom'))
     assert polydf_original_geom_name != self.polydf.geometry.name
     res = sjoin(self.polydf, self.pointdf, how="left")
     assert self.polydf.geometry.name == res.geometry.name
Ejemplo n.º 16
0
 def test_geometry_name(self):
     # test sjoin is working with other geometry name
     polydf_original_geom_name = self.polydf.geometry.name
     self.polydf = (self.polydf.rename(columns={'geometry': 'new_geom'})
                               .set_geometry('new_geom'))
     self.assertNotEqual(polydf_original_geom_name, self.polydf.geometry.name)
     res = sjoin(self.polydf, self.pointdf, how="left")
     self.assertEqual(self.polydf.geometry.name, res.geometry.name)
Ejemplo n.º 17
0
    def test_no_overlapping_geometry(self):
        # Note: these tests are for correctly returning GeoDataFrame
        # when result of the join is empty

        df_inner = sjoin(self.pointdf.iloc[17:], self.polydf, how='inner')
        df_left = sjoin(self.pointdf.iloc[17:], self.polydf, how='left')
        df_right = sjoin(self.pointdf.iloc[17:], self.polydf, how='right')

        expected_inner_df = pd.concat(
            [self.pointdf.iloc[:0],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.drop('geometry', axis=1).iloc[:0]],
            axis=1)

        expected_inner = GeoDataFrame(
            expected_inner_df, crs={'init': 'epsg:4326', 'no_defs': True})

        expected_right_df = pd.concat(
            [self.pointdf.drop('geometry', axis=1).iloc[:0],
             pd.concat([pd.Series(name='index_left', dtype='int64'),
                        pd.Series(name='index_right', dtype='int64')],
                       axis=1),
             self.polydf],
            axis=1)

        expected_right = GeoDataFrame(
            expected_right_df, crs={'init': 'epsg:4326', 'no_defs': True})\
            .set_index('index_right')

        expected_left_df = pd.concat(
            [self.pointdf.iloc[17:],
             pd.Series(name='index_right', dtype='int64'),
             self.polydf.iloc[:0].drop('geometry', axis=1)],
            axis=1)

        expected_left = GeoDataFrame(
            expected_left_df, crs={'init': 'epsg:4326', 'no_defs': True})

        assert expected_inner.equals(df_inner)
        assert expected_right.equals(df_right)
        assert expected_left.equals(df_left)
Ejemplo n.º 18
0
    def test_inner(self, op, dfs):
        index, df1, df2, expected = dfs

        res = sjoin(df1, df2, how='inner', op=op)

        exp = expected[op].dropna().copy()
        exp = exp.drop('geometry_y', axis=1).rename(
            columns={'geometry_x': 'geometry'})
        exp[['df1', 'df2']] = exp[['df1', 'df2']].astype('int64')
        if index == 'default-index':
            exp[['index_left', 'index_right']] = \
                exp[['index_left', 'index_right']].astype('int64')
        exp = exp.set_index('index_left')
        exp.index.name = None

        assert_frame_equal(res, exp)
Ejemplo n.º 19
0
    def test_right(self, op, dfs):
        index, df1, df2, expected = dfs

        res = sjoin(df1, df2, how='right', op=op)

        exp = expected[op].dropna(subset=['index_right']).copy()
        exp = exp.drop('geometry_x', axis=1).rename(
            columns={'geometry_y': 'geometry'})
        exp['df2'] = exp['df2'].astype('int64')
        if index == 'default-index':
            exp['index_right'] = exp['index_right'].astype('int64')
            res['index_left'] = res['index_left'].astype(float)
        exp = exp.set_index('index_right')
        exp = exp.reindex(columns=res.columns)

        assert_frame_equal(res, exp, check_index_type=False)
Ejemplo n.º 20
0
    def test_left(self, op, dfs):
        index, df1, df2, expected = dfs

        res = sjoin(df1, df2, how='left', op=op)

        exp = expected[op].dropna(subset=['index_left']).copy()
        exp = exp.drop('geometry_y', axis=1).rename(
            columns={'geometry_x': 'geometry'})
        exp['df1'] = exp['df1'].astype('int64')
        if index == 'default-index':
            exp['index_left'] = exp['index_left'].astype('int64')
            # TODO: in result the dtype is object
            res['index_right'] = res['index_right'].astype(float)
        exp = exp.set_index('index_left')
        exp.index.name = None

        assert_frame_equal(res, exp)
Ejemplo n.º 21
0
 def test_crs_mismatch(self, dfs):
     index, df1, df2, expected = dfs
     df1.crs = {'init': 'epsg:4326', 'no_defs': True}
     with pytest.warns(UserWarning):
         sjoin(df1, df2)
Ejemplo n.º 22
0
 def test_sjoin_outer(self):
     df = sjoin(self.pointdf, self.polydf, how="outer")
     assert df.shape == (21, 8)
def city_directions():
    """
    Purpose:    To create a path between any two USA cities using the NA roads dataset
    Input:      None
    Output:     The path distance in miles OR -1 if no path exists
    """
    # data looks like this: [ "cityname1", "cityname2" ]
    citySource, cityDest = request.args.get("cityArgs", None).split(',')
    citySource = (unquote(citySource)).title()
    cityDest = (unquote(cityDest)).title()
    source_target_list = []
    # only run if both source and destination cities are populated with text
    if citySource and cityDest:
        source_target_list = load_city_docs(citySource, cityDest)
        # identify the source city
        source_city = source_target_list[0]
        source_city_coords = tuple(source_city['geometry']['coordinates'])
        source_city_name = source_city['properties']['name']
        # locate the nearest road segment(s) to source city
        nearest_roads_to_source = [
            list(linestring.coords)
            for linestring in list(usrails_and_roads_DF.iloc[list(
                usrail_and_roads_SI.nearest((source_city_coords[0],
                                             source_city_coords[1],
                                             source_city_coords[0],
                                             source_city_coords[1]),
                                            num_results=1))].geometry)
        ]

        target_city = source_target_list[1]
        target_city_coords = tuple(target_city['geometry']['coordinates'])
        target_city_name = target_city['properties']['name']
        # find the closest road segment(s) to the target city
        nearest_roads_to_target = [
            list(linestring.coords)
            for linestring in list(usrails_and_roads_DF.iloc[list(
                usrail_and_roads_SI.nearest((target_city_coords[0],
                                             target_city_coords[1],
                                             target_city_coords[0],
                                             target_city_coords[1]),
                                            num_results=1))].geometry)
        ]

        # adds the source city to the NA roads graph
        if source_city_coords not in US_road_graph:
            add_city_to_graph(source_city_coords, source_city_name,
                              nearest_roads_to_source, US_road_graph)

        # adds the target city to the NA roads graph
        if target_city_coords not in US_road_graph:
            add_city_to_graph(target_city_coords, target_city_name,
                              nearest_roads_to_target, US_road_graph)

        # if a path exists between the two cities, create a geojson feature of it
        if has_path(US_road_graph, source_city_coords, target_city_coords):
            total_distance, path = single_source_dijkstra(US_road_graph,
                                                          source_city_coords,
                                                          target_city_coords,
                                                          weight=distance)
            path_geojson = {
                'type': 'feature',
                'properties': {
                    'source': source_city_name,
                    'destination': target_city_name,
                    'distance': total_distance
                },
                'geometry': {
                    'type': 'LineString',
                    'coordinates': [list(point) for point in path]
                }
            }
            # load the path into a GeoDataFrame for processing
            path_df = GeoDataFrame.from_features([path_geojson],
                                                 crs="epsg:4326")
            # use the buffer method to produce a Polygon of 0.2 degrees thickness surrounding the path
            buffered_path = (path_df.buffer(0.2)).to_crs(crs="epsg:4326")
            # create a dataframe from the buffered path
            buffered_path_df = GeoDataFrame(buffered_path,
                                            geometry=buffered_path.geometry)
            buffered_path_df[0] = None
            # perform a spatial join of the buffered path and the ufo sightings, earthquakes, etc dataframe.
            #   This will return all disasters within 0.2 degrees of the path
            join_results = GeoDataFrame(
                sjoin(disasters_DF, buffered_path_df, lsuffix="left"))
            # from here, dump the path, the buffered path, and the disasters 0.2 degrees from the path to files
            #   for the front end to visualize
            dump(
                path_geojson,
                open(
                    './Assignments/A05/assets/api/data/shortest_paths/' +
                    source_city_name + '_' + target_city_name + '.geojson',
                    'w'))
            dump(
                loads(buffered_path.to_json()),
                open(
                    './Assignments/A05/assets/api/data/shortest_paths/buffered.geojson',
                    'w'))
            dump(
                loads(join_results.to_json(show_bbox=False)),
                open(
                    './Assignments/A05/assets/api/data/shortest_paths/closest_points.geojson',
                    'w'))
            return str(total_distance)
        else:
            return "-1"
Ejemplo n.º 24
0
 def test_sjoin_bad_op(self):
     # AttributeError: 'Point' object has no attribute 'spandex'
     with pytest.raises(ValueError):
         sjoin(self.pointdf, self.polydf, how="left", op="spandex")
def flows_catchs():
    ## Identfy catchments and flowlines to HUCs

    ## Find the HUC12s that intersect with the input polygon
    #shapefile = 'data/TX-Counties/Young/TX-County-Young.shp'
    shape = gpd.read_file(args.input)

    if args.huc12:
        #huc12 = 'data/WBD_National_GDB/WBD_National_GDB.shp/WBDHU12.shp'
        ## TODO: Extend to any HUC input
        hucs = gpd.read_file(args.huc12, mask=shape)
        ## TODO: Make separate flowline and catchment input optional
        if args.nhd:
            hucs = hucs[['HUC12', 'geometry']]
    elif args.hydrobasins:
        hucs = gpd.read_file(args.hydrobasins, mask=shape)
        ## TODO: Make separate flowline and catchment input optional
        if args.hydrosheds_basins:
            hucs = hucs[['HYBAS_ID', 'geometry']]
    ## TODO: Make separate flowline and catchment input optional
    #elif args.hydrobasins_basins:
    else:
        raise (ValueError("Missing basin data"))

    #nhd = 'data/NFIEGeo_12.gdb'
    ## Find the flowlines whose representative points are in these HUC12s
    if args.nhd:
        flows = gpd.read_file(args.nhd, layer='Flowline', mask=hucs)
    elif args.hydrosheds_rivers:
        flows = gpd.read_file(args.hydrosheds_rivers, mask=hucs)
    ## TODO: Make separate flowline and catchment input optional
    else:
        raise (ValueError("Missing flowline data"))

    flows.drop(columns=[
        'Shape_Length', 'Shape_Area', 'AreaSqKM', 'index_left', 'index_right'
    ],
               inplace=True,
               errors='ignore')
    flows.reset_index(inplace=True)
    flows.set_index('COMID', inplace=True)
    flows.sort_index(inplace=True)

    flows_rep = flows.copy()
    flows_rep['geometry'] = flows.representative_point()

    if flows_rep.crs != hucs.crs:
        flows_rep = gpd.sjoin(flows_rep,
                              hucs.to_crs(flows_rep.crs),
                              op='intersects',
                              how='inner')
    else:
        flows_rep = gpd.sjoin(flows_rep, hucs, op='intersects', how='inner')
    flows_rep.drop(columns=['index_left', 'index_right'],
                   inplace=True,
                   errors='ignore')

    ## Find the catchments corresponding with these flowlines
    catchs = gpd.read_file(args.nhd, layer='Catchment')
    catchs.drop(columns=['index_left', 'index_right'],
                inplace=True,
                errors='ignore')
    catchs.reset_index(inplace=True)
    catchs.set_index('FEATUREID', inplace=True)
    catchs.sort_index(inplace=True)
    catchs = catchs[catchs.index.isin(flows_rep.index)]
    ## Find the flowlines corresponding with these cactchments
    ##  (Note: this line is optional.
    ##  Commenting it out will result in non-COMID-identified flowlines)
    flows = flows[flows.index.isin(catchs.index)]

    ## Determine which HUC12s each of the flowlines and catchments belong to
    flows.loc[flows.index, 'HUC12'] = flows_rep.loc[flows.index, 'HUC12']
    catchs.loc[catchs.index, 'HUC12'] = flows.loc[catchs.index, 'HUC12']

    flows.loc[flows['StreamOrde'] == 0, 'Roughness'] = .99
    flows.loc[flows['StreamOrde'] == 1, 'Roughness'] = .2
    flows.loc[flows['StreamOrde'] == 2, 'Roughness'] = .1
    flows.loc[flows['StreamOrde'] == 3, 'Roughness'] = .065
    flows.loc[flows['StreamOrde'] == 4, 'Roughness'] = .045
    flows.loc[flows['StreamOrde'] == 5, 'Roughness'] = .03
    flows.loc[flows['StreamOrde'] == 6, 'Roughness'] = .01
    flows.loc[flows['StreamOrde'] == 7, 'Roughness'] = .025

    flows = flows[flows.is_valid]
    catchs = catchs[catchs.is_valid]
    catchs = catchs[catchs.index.isin(flows.index)]
    flows = flows[flows.index.isin(catchs.index)]

    return (flows, catchs)
Ejemplo n.º 26
0
 def test_sjoin_duplicate_column_name(self):
     pointdf2 = self.pointdf.rename(columns={'pointattr1': 'Shape_Area'})
     df = sjoin(pointdf2, self.polydf, how="left")
     self.assertTrue('Shape_Area_left' in df.columns)
     self.assertTrue('Shape_Area_right' in df.columns)
Ejemplo n.º 27
0
import numpy as np
import pandas as pd
import os
import geopandas as gpd

#This script appends information on the ADM1 region that contains each Cities4Forest city and saves it to a new file.

#Change to directory
working_dir = '/Users/kristine/WRI/Cities4Forests/Defra_Watersheds'
os.chdir(working_dir)

#Load city locations
city_locations_file = '/Users/kristine/WRI/Cities4Forests/Defra_Watersheds/Cities4Forests Watersheds/Cities4Forests Watersheds.shp'
city_locations = gpd.read_file(city_locations_file)

#Load adm1 polygons
adm1_file = '/Users/kristine/WRI/gadm36_levels_shp/gadm36_1.shp'
adm1 = gpd.read_file(adm1_file)

#Spatially join files, which preserves the geometry of the first dataset, while adding the attributes of the right dataset
merged_files = gpd.sjoin(city_locations, adm1[['NAME_0','NAME_1', 'geometry']], how='left', op='intersects')

#Select desired columns
merged_files = merged_files[['City', 'City Simpl', 'Country', 'Watersheds', 'Latitude', 'Longitude', 'Tree Cover', 'Biomass Lo', 
    'Restoratio', 'Carbon Seq', 'Aqueduct W', 'Overlaps w', 'Watershed','NAME_0', 'NAME_1']]

#Save to a CSV file
merged_files.to_csv('Cities4Forests Watersheds with ADM1.csv',index=False)

            Polygon([(XleftOrigin, Ytop), (XrightOrigin, Ytop),
                     (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)]))
        Ytop = Ytop - height
        Ybottom = Ybottom - height
    XleftOrigin = XleftOrigin + width
    XrightOrigin = XrightOrigin + width

#Nous créons un géodataframe de type polygone
grid = gpd.GeoDataFrame({'geometry': polygons})
#Déclaration d'un système de projection (4326 = système WGS 84)
grid.set_crs(epsg=4326, inplace=True)

#Ce coefficient servira pour créer des barres visibles à grande échelle sur la carte
#MODIFIABLE
coef_exageration = 500
#Jointure spatiale entre les points et les cellules de la grille que nous avons créé
#Autrement dit : on associe les points avec la cellule dans laquelle ces points se trouvent. Ainsi, pour chaque point,
#nous créons une cellule supplémentaire
dfsjoin = gpd.sjoin(grid, points)  #Spatial join Points to polygons

#On donne un id à chaque cellule associée
dfsjoin["id"] = dfsjoin.index + 1
# On fusionne les cellules par id et nous comptons le nombre de cellules fusionnées, ce qui nous donne le nombre de points dans la cellule.
dataFinal = dfsjoin.dissolve(by='id', aggfunc='count')
#on ajoute une variable hauteur, basé sur le champ count (ce qui servira pour l'extrusion des polygones sur la carte)
#dataFinal['height'] = dataFinal['name']*coef_exageration
#Renommage du champ name en value. Le champ name contient le compte des points par cellule. Nous le renommons.
dataFinal = dataFinal.rename(columns={'name': 'value'})
#A MODIFIER
dataFinal.to_file("./departements-france-2020-12-21.geojson", driver="GeoJSON")
Ejemplo n.º 29
0
empty_grid.reset_index(drop=True, inplace=True)

grid = empty_grid

###

#adm

#coords_geom = [Point(xy) for xy in zip(grid.lon, grid.lat)]
#coords_df = gpd.GeoDataFrame(grid['cell_id'], crs='epsg:4326', geometry=coords_geom)
#del coords_geom

adm = gpd.read_file(path + "/gadm36_AFG_2.geojson")
adm = adm[["GID_1", "NAME_1", "GID_2", "NAME_2", "geometry"]]

grid = gpd.sjoin(grid, adm, how="left")
grid = grid[~grid.index.duplicated(keep="first")]

grid.drop(["index_right"], axis=1, inplace=True)

#temp = temp[["cell_id", "GID_1", "NAME_1", "GID_2", "NAME_2"]]

#idx = temp["cell_id"].duplicated()*1
#temp = temp.loc[idx==0, : ]

#grid = grid.merge(temp, how="left", on="cell_id")

#grid.drop(grid[grid.NAME_2.isnull()].index, inplace=True)

#grid.reset_index(drop=True, inplace=True)
Ejemplo n.º 30
0
def osm_for_region(i, ret_df, key, tags, dist, area):
    _addresses = addresses[addresses.Reg_Code == i]

    print("Grabbing data in region {} of 5".format(i + 1))
    pbar = '--------------------------------------------------'
    prog = '| {0:.0%}'.format(0)

    # 2. Load building data for the region
    if len(_addresses) > 0:

        # merge all building data from the South/West
        if key == 'building':
            if i == 3:
                path = USA[key][i]
                gdf = gdf_from_json(path)
                gdf1 = gdf_from_json(USA[key][4])
                gdf2 = gdf_from_json(USA[key][5])
                gdf3 = gdf_from_json(USA[key][6])
                gdf = gdf.append([gdf1, gdf2, gdf3], ignore_index=True)
            elif i == 4:
                path = USA[key][7]
                gdf = gdf_from_json(path)
                gdf1 = gdf_from_json(USA[key][8])
                gdf2 = gdf_from_json(USA[key][9])
                gdf3 = gdf_from_json(USA[key][10])
                gdf = gdf.append([gdf1, gdf2, gdf3], ignore_index=True)
            else:
                path = USA[key][i]
                gdf = gdf_from_json(path)
        else:
            path = USA[key][i]
            gdf = gdf_from_json(path)

        print(pbar + prog, end='\r')
    else:
        print("No addresses in region {}".format(i + 1))

    progress = 0
    for k in _addresses.index:
        progress += 1

        # Create new vector to append at end of iteration
        coords = _addresses['Lat_Lon'][k]
        ID = _addresses['ID'][k]
        Add = _addresses['Address'][k]
        Shi = _addresses['Shipping'][k]
        df = pd.DataFrame({
            'ID': [ID],
            'Address': [Add],
            'Lat_Lon': [coords],
            'Shipping': [Shi]
        })

        # 3. Create bounding box as GeoDataFrame
        b = fp.bbox_from_point(coords, dist)

        # 4. Filter gdf by bounding box
        if key == 'highway':
            # To get intersecting highways, we cannot use standard cx filtering
            bb = box(b[3], b[1], b[2], b[0])
            bb = Polygon(bb)
            bbox = gpd.GeoSeries([bb])
            bbox = gpd.GeoDataFrame({'geometry': bbox})
            # Filter gdf for all data intersecting Polygon bbox
            osm_in_bbox = gpd.sjoin(bbox, gdf, how='left', op='intersects')
        else:
            osm_in_bbox = gdf.cx[b[3]:b[2], b[1]:b[0]]

        for tag in tags:
            x = osm_in_bbox[osm_in_bbox[key] == tag]
            y = tag + '_' + key[0].upper()
            df[y] = [len(x)]
            if area == True:
                t = y + "_area"
                df[t] = [sum(x['geometry'].area)]

        # 6. Store as new columns in DataFrame
        ret_df = ret_df.append(df)

    return ret_df
file2 = drive.CreateFile({'id': '1tkhDiyW4eHp9IGyFmH4iO9_F1lrj701h'})
file2.GetContentFile('landkreise.shp')
file3 = drive.CreateFile({'id': '1RE5rCRsw_hRH7nZkNLYljEGbPiN63_eW'})
file3.GetContentFile('landkreise.shx')
file4 = drive.CreateFile({'id': '1vh5JNv8UvmIKWXRecHrvzWnY8qSrlbj6'})
file4.GetContentFile('landkreise.cpg')
file5 = drive.CreateFile({'id': '156d84fK3IuC_comNOnO7f4nPt9b-P1Zm'})
file5.GetContentFile('landkreise.dbf')
file6 = drive.CreateFile({'id': '1YQh84mF2qiiHX0j_fcuZa-xkWaLOtPZK'})
file6.GetContentFile('landkreise.prj')

df_kreise = gpd.read_file('landkreise.shp')
df_kreise.crs = {'init': 'epsg:4326'}

# match the two
matched_kreise = gpd.sjoin(geo_google_id_df, df_kreise, how='right')

# treatment
test_kreise = matched_kreise.groupby(
    'WARNCELLID').index_left.count().value_counts().sort_index()
treat_kreise = matched_kreise.groupby('WARNCELLID').apply(
    lambda x: x.sample(frac=0.5, random_state=1))['index_left'].to_frame()
treat_kreise['treatment'] = 1

matched_kreise = matched_kreise.merge(treat_kreise,
                                      on='index_left',
                                      how='left')
matched_kreise['treatment'] = matched_kreise.treatment.fillna(value=0)
#matched_kreise.groupby('treatment').count()
#points_kreise = matched_kreise[['Longitude','Latitude','treatment','Name']].dropna()
#geo_points_kreise = gpd.GeoDataFrame(points_kreise, geometry=gpd.points_from_xy(points_kreise.Longitude, points_kreise.Latitude))
def output_files(arguments):
    #def output(flow_key,flowshu12shape,catchshu12shape,hu12catchs,avail_hu12catchs_group,args,prefix,dst_crs,mem_estimates):
    ## Output catchments, flowlines, roughnesses, and rasters

    try:

        def output_nhd(flows, catchs, hu):
            ## For each HUC, write the flowlines, catchments, and roughnesses corresponding to it

            out_path = os.path.join(subdirectory, 'Flowlines.shp')
            my_file = Path(out_path)
            #if my_file.is_file() and not arguments[1].args.overwrite and not arguments[1].args.overwrite_flowlines:
            if (my_file.is_file() and not arguments[5].overwrite
                    and not arguments[5].overwrite_flowlines):
                #if my_file.is_file() and not args.overwrite and not args.overwrite_flowlines:
                pass
            else:
                my_file.unlink(missing_ok=True)
                #flowshu12shape[flowshu12shape['HUC12']==hu].reset_index().to_file(out_path)
                flows.reset_index().to_file(out_path)

            out_path = os.path.join(subdirectory, 'Roughness.csv')
            my_file = Path(out_path)
            #if my_file.is_file() and not arguments[1].args.overwrite and not arguments[1].args.overwrite_roughnesses:
            if (my_file.is_file() and not arguments[5].overwrite
                    and not arguments[5].overwrite_roughnesses):
                #if my_file.is_file() and not args.overwrite and not args.overwrite_roughnesses:
                pass
            else:
                my_file.unlink(missing_ok=True)
                with open(out_path, 'w', newline='') as outcsv:
                    writer = csv.writer(outcsv)
                    writer.writerow(['COMID', 'StreamOrde', 'Roughness'])
                    for comid in np.sort(flows.index.unique()):
                        writer.writerow([
                            comid, flows.loc[comid, 'StreamOrde'],
                            flows.loc[comid, 'Roughness']
                        ])

            out_path = os.path.join(subdirectory, 'Catchments.shp')
            my_file = Path(out_path)
            #if my_file.is_file() and not arguments[1].args.overwrite and not arguments[1].args.overwrite_catchments:
            if (my_file.is_file() and not arguments[5].overwrite
                    and not arguments[5].overwrite_catchments):
                #if my_file.is_file() and not args.overwrite and not args.overwrite_catchments:
                pass
            else:
                my_file.unlink(missing_ok=True)
                #catchshu12shape[catchshu12shape['HUC12']==hu].reset_index().to_file(out_path)
                catchs.reset_index().to_file(out_path)

        def get_mosaic(avail_hucscatchs_group, hu, break_hu, dst_crs):
            ## Get mosaic of DEMs for each HUC

            def append_check(src_files_to_mosaic, var, subdirectory, hu):
                ## Check each raster's resolution in this HUC

                if any(np.float16(i) > 1. for i in var.res):
                    out_path = os.path.join(subdirectory, "gt1m.err")
                    Path(out_path).touch()
                    print('WARNING: >1m raster input for HUC12: ' + str(hu))
                    sys.stdout.flush()
                else:
                    src_res_min_to_mosaic.append(min(var.res))
                    src_res_max_to_mosaic.append(min(var.res))
                    src_x_to_mosaic.append(var.res[0])
                    src_y_to_mosaic.append(var.res[1])
                    src_files_to_mosaic.append(var)

                return (src_files_to_mosaic, src_res_min_to_mosaic,
                        src_res_max_to_mosaic, src_x_to_mosaic,
                        src_y_to_mosaic)

            ## Reproject the mosaic to DEM tiles pertaining to each HUC
            dem_fps = list(avail_hucscatchs_group['stampede2name'])
            src_files_to_mosaic = []
            src_res_min_to_mosaic = []
            src_res_max_to_mosaic = []
            src_x_to_mosaic = []
            src_y_to_mosaic = []
            memfile = {}
            for fp in dem_fps:
                memfile[fp] = MemoryFile()
            for fp in dem_fps:
                with rasterio.open(fp) as src:
                    transform, width, height = calculate_default_transform(
                        src.crs, dst_crs, src.width, src.height, *src.bounds)
                    out_meta = src.meta.copy()
                    out_meta.update({
                        'crs': dst_crs,
                        'transform': transform,
                        'width': width,
                        'height': height
                    })

                    ## Don't do an expensive reprojection if projection
                    ##  already correct
                    ## TODO: This with statement may need to be changed
                    ##  back to an equals
                    with memfile[fp].open(**out_meta) as dst:
                        if src.meta == out_meta:
                            dst.write(src.read())
                        else:
                            for i in range(1, src.count + 1):
                                reproject(source=rasterio.band(src, i),
                                          destination=rasterio.band(dst, i),
                                          src_transform=src.transform,
                                          src_crs=src.crs,
                                          dst_transform=dst.transform,
                                          dst_crs=dst.crs,
                                          resampling=Resampling.nearest)
                        src_files_to_mosaic,
                        src_res_min_to_mosaic,
                        src_res_max_to_mosaic,
                        src_x_to_mosaic,
                        src_y_to_mosaic = append_check(src_files_to_mosaic,
                                                       dst, subdirectory, hu)

            if len(src_files_to_mosaic) == 0:

                out_path = os.path.join(subdirectory, "allGT1m.err")
                Path(out_path).touch()
                print('WARNING: Found no <=1m raster input data for HUC12: ' +
                      str(hu))
                sys.stdout.flush()

                break_hu = True
                mosaic_tuple = ()
                return (break_hu, mosaic_tuple)

            else:

                src_files_to_mosaic = pd.DataFrame(
                    data={
                        'Files': src_files_to_mosaic,
                        'min(resolution)': src_res_min_to_mosaic,
                        'max(resolution)': src_res_max_to_mosaic
                    })
                src_files_to_mosaic.sort_values(
                    by=['min(resolution)', 'max(resolution)'], inplace=True)
                mosaic, out_trans = merge(list(src_files_to_mosaic['Files']),
                                          res=(max(src_x_to_mosaic),
                                               max(src_y_to_mosaic)))
                for src in src_files_to_mosaic['Files']:
                    src.close()
                out_meta = src.meta.copy()
                out_meta.update({
                    "driver": 'GTiff',
                    "height": mosaic.shape[1],
                    "width": mosaic.shape[2],
                    "transform": out_trans,
                    "crs": dst_crs
                })
                for keyvalue in memfile.items():
                    keyvalue[1].close()

                mosaic_tuple = (mosaic, out_meta)
                return (break_hu, mosaic_tuple)

        def output_raster(hu_buff_geom, mosaic, out_meta, path_elevation):
            ## Crop and output the mosaic to the buffered catchments of each HUC

            with MemoryFile() as memfile:
                with memfile.open(**out_meta) as dataset:
                    dataset.write(mosaic)
                with memfile.open(**out_meta) as dataset:
                    out_image, out_trans = rasterio.mask.mask(dataset,
                                                              hu_buff_geom,
                                                              crop=True)

            out_meta.update({
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_trans
            })

            with rasterio.open(path_elevation, "w", **out_meta) as dst:
                dst.write(out_image)

        #subdirectory = os.path.join(arguments[1].args.directory, arguments[1].prefix+'-'+str(arguments[0]))
        subdirectory = os.path.join(arguments[5].directory,
                                    arguments[6] + '-' + str(arguments[0]))
        #subdirectory = os.path.join(args.directory, prefix+'-'+str(flow_key))
        Path(subdirectory).mkdir(parents=True, exist_ok=True)

        path_notime = os.path.join(subdirectory,
                                   "jobNoTimeLeftWhileProcessing.err")
        Path(path_notime).touch()

        path_gt1m = os.path.join(subdirectory, "allGT1m.err")
        file_gt1m = Path(path_gt1m)
        path_enclose = os.path.join(subdirectory,
                                    "rasterDataDoesNotEnclose.err")
        file_enclose = Path(path_enclose)

        if file_gt1m.is_file() or file_enclose.is_file():

            pass

        else:

            #output_nhd(arguments[1].flowshu12shape,arguments[1].catchshu12shape,arguments[0])
            output_nhd(arguments[1], arguments[2], arguments[0])
            #output_nhd(flowshu12shape,catchshu12shape,flow_key)

            path_elevation = os.path.join(subdirectory, 'Elevation.tif')
            file_elevation = Path(path_elevation)
            #if file_elevation.is_file() and not arguments[1].args.overwrite and not arguments[1].args.overwrite_rasters:
            if (file_elevation.is_file() and not arguments[5].overwrite
                    and not arguments[5].overwrite_rasters):
                #if file_elevation.is_file() and not args.overwrite and not args.overwrite_rasters:

                pass

            else:

                file_elevation.unlink(missing_ok=True)

                #avail_hu12catchs_group = arguments[1].avail_hu12catchs_grouped.get_group(arguments[0])
                break_hu = False

                #break_hu, mosaic_tuple = get_mosaic(avail_hu12catchs_group,arguments[0],break_hu,arguments[1].dst_crs)
                break_hu, mosaic_tuple = get_mosaic(arguments[4], arguments[0],
                                                    break_hu, arguments[7])
                #break_hu, mosaic_tuple = get_mosaic(avail_hu12catchs_group,flow_key,break_hu,dst_crs)

                if break_hu != True:

                    with rasterio.Env():
                        results = ({
                            'properties': {
                                'Elevation': v
                            },
                            'geometry': s
                        } for i, (s, v) in enumerate(
                            shapes((mosaic_tuple[0] == mosaic_tuple[1]
                                    ['nodata']).astype(np.int16),
                                   mask=mosaic_tuple[0] != mosaic_tuple[1]
                                   ['nodata'],
                                   transform=mosaic_tuple[1]['transform'])))
                    geoms = list(results)
                    raster = gpd.GeoDataFrame.from_features(
                        geoms, crs=mosaic_tuple[1]['crs'])

                    #hu_buff = arguments[1].hu12catchs.loc[[arguments[0]]].drop(columns=['index_left','index_right'],errors='ignore').to_crs(mosaic_tuple[1]['crs'])
                    hu_buff = arguments[3].to_crs(mosaic_tuple[1]['crs'])
                    #hu_buff = hu12catchs.to_crs(mosaic_tuple[1]['crs'])
                    hu_buff_geom = list(hu_buff['geometry'])

                    if len(
                            gpd.sjoin(hu_buff,
                                      raster,
                                      op='within',
                                      how='inner').index) == 0:
                        out_path = os.path.join(
                            subdirectory, "rasterDataDoesNotEnclose.err")
                        Path(out_path).touch()
                        print(
                            'WARNING: <=1m raster input data does not enclose HUC12: '
                            + str(arguments[0]))
                        #print('WARNING: <=1m raster input data does not enclose HUC12: '+str(flow_key))
                        sys.stdout.flush()
                    else:
                        #print('GOING IN OUTPUT RASTER\t',arguments[0])
                        #print('GOING IN OUTPUT RASTER\t',flows_key)
                        output_raster(hu_buff_geom, mosaic_tuple[0],
                                      mosaic_tuple[1], path_elevation)

        #mem_estimates[flows_key] = 0.
        Path(path_notime).unlink()

    except OSError as e:
        Path(path_notime).unlink()
        out_path = os.path.join(subdirectory, "OS.err")
        Path(out_path).touch()
        with open(out_path, 'w') as f:
            #f.write("{}".format(e))
            f.write(str(e))
        print('[ERROR] OSError on HUC12: ' + str(arguments[0]))
        print(e)
        sys.stdout.flush()
        #if arguments[1].args.log:
        if arguments[5].log:
            #if args.log:
            logging.debug('[ERROR] OSError on HUC ' + str(arguments[0]))
            #logging.debug('HUC '+str(flow_key))

    except Exception as e:

        #if arguments[1].args.log:
        if arguments[5].log:
            #if args.log:
            logging.debug('[EXCEPTION] on HUC ' + str(arguments[0]))
            #logging.debug('HUC '+str(flow_key))
        return (ExceptionWrapper(e))
def available(hucscatchs):
    ## Identify each DEM tile file for our study area

    ## Find the DEM tiles that intersect with these buffered HUC12 catchments
    #availibility = 'data/TNRIS-LIDAR-Availability-20191213.shp/TNRIS-LIDAR-Availability-20191213.shp'
    avail = gpd.read_file(args.availability)
    avail_hucscatchs = gpd.sjoin(avail,
                                 hucscatchs.to_crs(avail.crs),
                                 op='intersects',
                                 how='inner')
    ## Construct an exact path for each DEM tile
    fnexts = ['.dem', '.img']
    for fnext in fnexts:
        avail_hucscatchs['demname'] = avail_hucscatchs['demname'].str.replace(
            fnext + '$', '')
    for dirname in avail_hucscatchs['dirname'].unique():
        stampede2names = []
        #raster = '/scratch/projects/tnris/tnris-lidardata'
        basename = os.path.join(args.lidar, dirname, 'dem') + os.sep
        for fnext in fnexts:
            avail_hucscatchs['demname'] = avail_hucscatchs[
                'demname'].str.replace(fnext + '$', '')
            stampede2names.extend(glob.glob(basename + '*' + fnext))
        direxts = set([
            os.path.splitext(os.path.basename(name))[1]
            for name in stampede2names
        ])
        ## If more than one vector image extension found in a DEM project,
        ##  then figure out each file's extension individually
        ## TODO: Test this against stratmap-2013-50cm-ellis-henderson-hill-johnson-navarro
        if len(direxts) > 1:
            for demname in avail_hucscatchs.loc[avail_hucscatchs['dirname'] ==
                                                dirname, 'demname'].unique():
                truth_dirname = avail_hucscatchs['dirname'] == dirname
                truth_demname = avail_hucscatchs['demname'] == demname
                truth = np.logical_and(truth_dirname, truth_demname)
                for fnext in fnexts:
                    stampede2name = avail_hucscatchs.loc[
                        truth, 'demname'].apply(
                            lambda x: os.path.join(basename, x + fnext))
                    if glob.glob(stampede2name.iloc[0]):
                        break
                    #else:
                avail_hucscatchs.loc[truth, 'stampede2name'] = stampede2name
        ## Else do all the files in a DEM project at once
        elif len(direxts) == 1:
            stampede2name = avail_hucscatchs.loc[
                avail_hucscatchs['dirname'] == dirname, 'demname'].apply(
                    lambda x: os.path.join(basename, x + list(direxts)[0]))
            stampede2name.drop_duplicates(inplace=True)
            p = Path(basename)
            for subp in p.rglob('*'):
                if len(stampede2name[stampede2name.str.lower() == str(
                        subp).lower()].index) > 0:
                    stampede2name.loc[stampede2name[stampede2name.str.lower(
                    ) == subp.as_posix().lower()].index[0]] = subp.as_posix()
            stampede2name = stampede2name[stampede2name.isin(
                [subp.as_posix() for subp in list(p.rglob('*'))])]
            avail_hucscatchs.loc[avail_hucscatchs['dirname'] == dirname,
                                 'stampede2name'] = stampede2name
        else:
            continue
    avail_hucscatchs.dropna(subset=['stampede2name'], inplace=True)
    avail_hucscatchs_grouped = avail_hucscatchs.groupby('index_right')

    return (avail_hucscatchs_grouped)
Ejemplo n.º 34
0
def travelshedwt(arrt):
    bk=bkpt.copy()
    if destination.loc[i,'direction']=='in':
        url=doserver+'otp/routers/default/isochrone?batch=true&mode=WALK,TRANSIT'
        url+='&fromPlace='+destination.loc[i,'latlong']+'&toPlace='+destination.loc[i,'latlong']
        url+='&arriveBy=true&date='+typicaldate+'&time='+arrt+'&maxTransfers='+str(maxTransfers)
        url+='&maxWalkDistance='+str(maxWalkDistance)+'&clampInitialWait=-1'+cutoff
        headers={'Accept':'application/json'}  
        req=requests.get(url=url,headers=headers)
        js=req.json()
        iso=gpd.GeoDataFrame.from_features(js,crs={'init': 'epsg:4326'})
        bk['T'+arrt[0:2]+arrt[3:5]]=999
        cut=range(cutoffend,cutoffstart,-cutoffinterval)
        if (iso.loc[iso['time']==cut[0]*60,'geometry'].notna()).bool():
            try:
                bkiso=gpd.sjoin(bk,iso.loc[iso['time']==cut[0]*60],how='left',op='within')
                bkiso=bkiso.loc[pd.notnull(bkiso['time']),'blockid']
                bk.loc[bk['blockid'].isin(bkiso),'T'+arrt[0:2]+arrt[3:5]]=cut[0]-cutoffinterval/2
            except ValueError:
                print(destination.loc[i,'id']+' '+arrt+' '+
                      str(cut[0])+'-minute isochrone has no Census Block in it!')
            for k in range(0,(len(cut)-1)):
                if (iso.loc[iso['time']==cut[k+1]*60,'geometry'].notna()).bool():
                    if len(bk.loc[bk['T'+arrt[0:2]+arrt[3:5]]==cut[k]-cutoffinterval/2])!=0:
                        try:
                            bkiso=gpd.sjoin(bk.loc[bk['T'+arrt[0:2]+arrt[3:5]]==cut[k]-cutoffinterval/2],
                                            iso.loc[iso['time']==cut[k+1]*60],how='left',op='within')
                            bkiso=bkiso.loc[pd.notnull(bkiso['time']),'blockid']
                            bk.loc[bk['blockid'].isin(bkiso),'T'+arrt[0:2]+arrt[3:5]]=cut[k+1]-cutoffinterval/2
                        except ValueError:
                            print(destination.loc[i,'id']+' '+arrt+' '+
                                  str(cut[k+1])+'-minute isochrone has no Census Block in it!')
                    else:
                        print(destination.loc[i,'id']+' '+arrt+' '+
                              str(cut[k])+'-minute isochrone has no Census Block in it!')
                else:
                    print(destination.loc[i,'id']+' '+arrt+' '+
                          str(cut[k+1])+'-minute isochrone has no geometry!')
        else:
            print(destination.loc[i,'id']+' '+arrt+' '+
                  str(cut[0])+'-minute isochrone has no geometry!')
        bk['T'+arrt[0:2]+arrt[3:5]]=bk['T'+arrt[0:2]+arrt[3:5]].replace(999,np.nan)
        bk=bk.drop(['lat','long','geometry'],axis=1)
        bk=bk.set_index('blockid')
        return bk
    elif destination.loc[i,'direction']=='out':
        url=doserver+'otp/routers/default/isochrone?batch=true&mode=WALK,TRANSIT'
        url+='&fromPlace='+destination.loc[i,'latlong']
        url+='&date='+typicaldate+'&time='+arrt+'&maxTransfers='+str(maxTransfers)
        url+='&maxWalkDistance='+str(maxWalkDistance)+'&clampInitialWait=0'+cutoff
        headers={'Accept':'application/json'}  
        req=requests.get(url=url,headers=headers)
        js=req.json()
        iso=gpd.GeoDataFrame.from_features(js,crs={'init': 'epsg:4326'})
        bk['T'+arrt[0:2]+arrt[3:5]]=999
        cut=range(cutoffend,cutoffstart,-cutoffinterval)
        if (iso.loc[iso['time']==cut[0]*60,'geometry'].notna()).bool():
            try:     
                bkiso=gpd.sjoin(bk,iso.loc[iso['time']==cut[0]*60],how='left',op='within')
                bkiso=bkiso.loc[pd.notnull(bkiso['time']),'blockid']
                bk.loc[bk['blockid'].isin(bkiso),'T'+arrt[0:2]+arrt[3:5]]=cut[0]-cutoffinterval/2
            except ValueError:
                print(destination.loc[i,'id']+' '+arrt+' '+
                      str(cut[0])+'-minute isochrone has no Census Block in it!')            
            for k in range(0,(len(cut)-1)):
                if (iso.loc[iso['time']==cut[k+1]*60,'geometry'].notna()).bool():
                    if len(bk.loc[bk['T'+arrt[0:2]+arrt[3:5]]==cut[k]-cutoffinterval/2])!=0:
                        try:
                            bkiso=gpd.sjoin(bk.loc[bk['T'+arrt[0:2]+arrt[3:5]]==cut[k]-cutoffinterval/2],
                                            iso.loc[iso['time']==cut[k+1]*60],how='left',op='within')
                            bkiso=bkiso.loc[pd.notnull(bkiso['time']),'blockid']
                            bk.loc[bk['blockid'].isin(bkiso),'T'+arrt[0:2]+arrt[3:5]]=cut[k+1]-cutoffinterval/2
                        except ValueError:
                            print(destination.loc[i,'id']+' '+arrt+' '+
                                  str(cut[k+1])+'-minute isochrone has no Census Block in it!')
                    else:
                        print(destination.loc[i,'id']+' '+arrt+' '+
                              str(cut[k])+'-minute isochrone has no Census Block in it!')
                else:
                    print(destination.loc[i,'id']+' '+arrt+' '+
                          str(cut[k+1])+'-minute isochrone has no geometry!')
        else:
            print(destination.loc[i,'id']+' '+arrt+' '+
                  str(cut[0])+'-minute isochrone has no geometry!')        
        bk['T'+arrt[0:2]+arrt[3:5]]=bk['T'+arrt[0:2]+arrt[3:5]].replace(999,np.nan)
        bk=bk.drop(['lat','long','geometry'],axis=1)
        bk=bk.set_index('blockid')
        return bk
Ejemplo n.º 35
0
 def test_sjoin_inner(self):
     df = sjoin(self.pointdf, self.polydf, how="inner")
     self.assertEquals(df.shape, (11, 8))
Ejemplo n.º 36
0
def go(acs_zips_csv, fac_path, shp_file, zip_to_zta_csv):

    df_acs = pd.read_csv(acs_zips_csv)
    df_acs['zip code tabulation area'] = df_acs[
        'zip code tabulation area'].apply(lambda x: '{0:0>5}'.format(x))

    race_cat = {
        'B02001_002E': 'white alone',
        'B02001_003E': 'black alone',
        'B02001_004E': 'native alone',
        'B02001_005E': 'asian alone',
        'B02001_006E': 'pacific alone',
        'B02001_007E': 'other alone',
        'B02001_008E': 'two or more',
        'B02001_009E': 'two or more some other'
    }

    inc_cat = {
        'B19001_002E': 'less10k',
        'B19001_003E': '10kto15k',
        'B19001_004E': '15kto20k',
        'B19001_005E': '20kto25k',
        'B19001_006E': '25kto30k',
        'B19001_007E': '30kto35k',
        'B19001_008E': '35kto40k',
        'B19001_009E': '40kto45k',
        'B19001_010E': '45kto50k',
        'B19001_011E': '50kto55k',
        'B19001_012E': '60kto75k',
        'B19001_013E': '75kto100k',
        'B19001_014E': '100kto125k',
        'B19001_015E': '125kto145k',
        'B19001_016E': '150kto200k',
        'B19001_017E': '200kmore'
    }

    family_cat = {
        'B11016_003E': '2 person',
        'B11016_004E': '3 person',
        'B11016_005E': '4 person',
        'B11016_006E': '5 person',
        'B11016_007E': '6 person',
        'B11016_008E': '7plusperson'
    }

    ratio_pov_cat_fam = {
        'B17026_002E': 'under_p5',
        'B17026_003E': 'p5top74',
        'B17026_004E': 'p75top99',
        'B17026_005E': '1to1p24',
        'B17026_006E': '1p25to1p49',
        'B17026_007E': '1p50to1p74',
        'B17026_008E': '1p75to1p84',
        'B17026_009E': '1p85to1p99',
        'B17026_010E': '2to2p99',
        'B17026_011E': '3to3p99',
        'B17026_012E': '4to4p99',
        'B17026_013E': '5andover'
    }

    ratio_pov_cat_peop = {
        'C17002_002E': 'under_p5',
        'C17002_003E': 'p5top99',
        'C17002_004E': '1to1p24',
        'C17002_005E': '1p25to1p49',
        'C17002_006E': '1p50to1p84',
        'C17002_007E': '1p85to1p99',
        'C17002_008E': '2andver'
    }

    pop_cat = {'B01003_001E': 'population'}
    med_inc_cat = {'B19013_001E': 'median income'}

    educ_cat = {
        'B15003_002E': 'no school',
        'B15003_003E': 'nursery',
        'B15003_004E': 'kindergarten',
        'B15003_005E': '1stgrade',
        'B15003_006E': '2ndgrade',
        'B15003_007E': '3rdgrade',
        'B15003_008E': '4thgrade',
        'B15003_009E': '5thgrade',
        'B15003_010E': '6thgrade',
        'B15003_011E': '7thgrade',
        'B15003_012E': '8thgrade',
        'B15003_013E': '9thgrade',
        'B15003_014E': '10thgrade',
        'B15003_015E': '11thgrade',
        'B15003_016E': '12thgrade',
        'B15003_017E': 'regular_hsd',
        'B15003_018E': 'ged',
        'B15003_019E': 'some college',
        'B15003_020E': 'some college no degree',
        'B15003_02E1': 'associate degree',
        'B15003_022E': 'bachelor',
        'B15003_023E': 'master',
        'B15003_023E': 'professional school',
        'B15003_024E': 'doctorate'
    }

    dcat = {
        'B02001_00': race_cat,
        'B19001_00': inc_cat,
        'B11016_00': family_cat,
        'B17026_00': ratio_pov_cat_fam,
        'C17002_00': ratio_pov_cat_peop,
        'C17002_00': ratio_pov_cat_peop
    }

    unique_cat = {'B01003_00': pop_cat, 'B19013_00': med_inc_cat}

    fac = exp.read_data(fac_path)

    geo_zcta = gpd.read_file(shp_file)

    fac = fac.astype({"ZIP_CODE": str})
    fac['ZIP_CODE'] = fac['ZIP_CODE'].str.split('-').str[0]

    pref = {
        'Race': 'B02001_00',
        'Income': 'B19001_00',
        'Family Size': 'B11016_00',
        'Income Poverty': 'B17026_00'
    }

    zip_to_zta = pd.read_excel(zip_to_zta_csv,
                               converters={
                                   'ZIP_CODE': '{:0>5}'.format,
                                   'ZCTA': '{:0>5}'.format
                               })

    drop_fac = [
        'ALAND10', 'AWATER10', 'INTPTLAT10', 'INTPTLON10', 'LATITUDE83',
        'LONGITUDE83', 'MTFCC10', 'PO_NAME', 'STATE', 'ZIP_TYPE',
        'Zip_join_type', 'index_right'
    ]

    fac_miss, fac_nonmiss = af.keep_miss_nonmiss(fac,
                                                 ['LONGITUDE83', 'LATITUDE83'])
    fac_nonmiss_geom = fac_nonmiss.apply(
        lambda x: Point([x['LONGITUDE83'], x['LATITUDE83']]), axis=1)
    geo_fac_nonmiss = gpd.GeoDataFrame(fac_nonmiss, geometry=fac_nonmiss_geom)
    geo_fac_nonmiss_zcta = gpd.sjoin(geo_fac_nonmiss,
                                     geo_zcta,
                                     how="left",
                                     op='intersects')
    geo_fac_nonmiss_acs = pd.merge(geo_fac_nonmiss_zcta,
                                   df_acs,
                                   how='inner',
                                   right_on=['zip code tabulation area'],
                                   left_on=['ZCTA5CE10'])

    fac_miss_zta = pd.merge(fac_miss, zip_to_zta, how='left', on="ZIP_CODE")
    geo_fac_miss_acs = pd.merge(fac_miss_zta,
                                df_acs,
                                how='inner',
                                right_on=['zip code tabulation area'],
                                left_on=['ZCTA'])
    entire_fac = pd.concat([geo_fac_nonmiss_acs, geo_fac_miss_acs])
    af.drop_features(entire_fac, drop_fac)

    for key, val in dcat.items():
        entire_fac = af.unite_the_perc(key, entire_fac, val)

    for _, val in unique_cat.items():
        af.change_cat(entire_fac, val)

    for _, val in unique_cat.items():
        af.change_cat(entire_fac, val)

    entire_fac.to_csv('acs_joined.csv')

    return entire_fac
Ejemplo n.º 37
0
 def test_sjoin_outer(self):
     df = sjoin(self.pointdf, self.polydf, how="outer")
     self.assertEquals(df.shape, (21,8))
Ejemplo n.º 38
0
geofences = gpd.read_file(zipfile)

## load gps points and convert to geopandas dataframe
gps_points = pd.read_csv("Nestle_March_Kodigo_GPS-Formatted-v2.csv")
gps_points = gpd.GeoDataFrame(
    gps_points,
    geometry=gpd.points_from_xy(gps_points["longitude"],
                                gps_points["latitude"]),
)

## add crs to gps_points data
gps_points.set_crs(epsg=4326, inplace=True)

### Spatial join
points_inside_geofence = gpd.sjoin(gps_points,
                                   geofences,
                                   how="inner",
                                   op="within")

# select columns and rename
points_inside_geofence = points_inside_geofence.loc[:, [
    "plateno", "name", "created_left"
]]
points_inside_geofence = points_inside_geofence.rename(
    columns={
        "name": "geofence_name",
        "created_left": "datestamp"
    })

# convert to datetime
data = points_inside_geofence.sort_values(by="datestamp")
Ejemplo n.º 39
0
                                                             miny, maxy)
logger.debug('Where clause for query: {}'.format(where))

count = count_table(danco_lyr, where=where, table=True, noh=noh)
logger.debug('Count for table with where clause: {:,}'.format(count))

#%%
usfs_fps = gpd.GeoDataFrame()
offset = 0
while offset < count:
    logger.debug('Loading records: {:,} - {:,}'.format(offset, offset+limit))
    # Load footprints
    fps = query_footprint(danco_lyr, where=where, limit=limit, offset=offset, noh=noh)
    # Intersect to find USFS footprints
    logger.debug('Identifying records on USFS land...')
    slice_usfs_fps = gpd.sjoin(fps, usfs, op='within')
    logger.debug('USFS records found: {}'.format(len(slice_usfs_fps)))
    # Merge to master dataframe
    usfs_fps = pd.concat([usfs_fps, slice_usfs_fps])
    logger.debug('Total USFS records found: {}'.format(len(usfs_fps)))
    # Increase offset
    offset += limit

usfs_fps_catids = set(usfs_fps['catalogid'])
#%%
# Remove onhand IDs
# oh = onhand_ids()
# mfp_ids = pgc_ids()
# usfs_noh = [x for x in usfs_fps_catids if x not in oh]
# usfs_nmfp = [x for x in usfs_fps_catids if x not in mfp_ids]
Ejemplo n.º 40
0
def main(argv):

    parser = argparse.ArgumentParser('Foursquare mapping to a spatial grid.')

    parser.add_argument('-i', '--input',
                        help='POIs file with relative coordinates.',
                        action='store',
                        dest='input',
                        required=True,
                        type=str)

    parser.add_argument('-p', '--prefix',
                        action='store',
                        dest='prefix',
                        help='Prefix for the filename specifying the city name.',
                        required=True,
                        type=str)

    parser.add_argument('-g', '--grid',
                        help='Input grid for the mapping. If crs is not WGS84, specify it with the param -c',
                        action='store',
                        dest='grid',
                        required=True,
                        type=str)

    parser.add_argument('-c', '--crs',
                        help='Coordinate Reference System for the input grid. It is requested only if it is different from WGS84.',
                        action='store',
                        dest='crs',
                        default='epsg:4326',
                        type=str)

    parser.add_argument('-o', '--outputfolder',
                        help='Output folder where to save the mapped file.',
                        action='store',
                        dest='outputfolder',
                        required='True',
                        type=str)

    parser.add_argument('-lat', '--latitude',
                        help='Latitude name.',
                        action='store',
                        dest='latitude',
                        default='latitude',
                        type=str)

    parser.add_argument('-long', '--longitude',
                        help='Longitude name.',
                        action='store',
                        dest='longitude',
                        default='longitude',
                        type=str)

    parser.add_argument('-v', '--verbose',
                        help='Level of output verbosity.',
                        action='store',
                        dest='verbosity',
                        default=0,
                        type=int,
                        nargs="?")

    args = parser.parse_args()

    latitude = args.latitude
    longitude = args.longitude

    if(args.verbosity == 1):
        logger.setLevel(logging.INFO)

    elif(args.verbosity == 2):
        logger.setLevel(logger.DEBUG)

    # Load the grid
    logger.info("Load the grid")
    gdf = gpd.GeoDataFrame.from_file(args.grid)
    gdf.crs = {'init': args.crs}

    if args.crs != 'epsg:4326':
        gdf = gdf.to_crs({'init': 'epsg:4326'})

    # Load POIs
    logger.info("Load POIs")
    df = pd.DataFrame(pd.read_csv(args.input, sep=",", low_memory=False))

    # Create Point from latitude, longitude pairs and build a GeoDataFrame
    logger.info("Build geometry")
    geometry = [Point(xy) for xy in zip(df[longitude], df[latitude])]
    data = gpd.GeoDataFrame(df, crs={'init': 'epsg:4326'}, geometry=geometry)
    data.to_crs(gdf.crs, inplace=True)

    # Check Geometry Validity
    ans = data.geometry.is_valid
    invalid = ans[ans == False]
    data.drop(invalid.index, axis=0, inplace=True)

    # Spatial Join with the grid to associate each entry to the related cell ('within') - LEFT
    join = gpd.sjoin(gdf[['cellID', 'geometry']], data, how='left', op='within')

    # Remove additional columns
    join.drop(['index_right', 'geometry'], axis=1, inplace=True)

    # Save output
    logger.info("Save output file")
    outputfile = os.path.abspath(os.path.join(args.outputfolder, args.prefix + "_mapped_foursquare_pois.csv"))
    join.to_csv(outputfile, index=False, sep='\t', float_format='%.6f')
Ejemplo n.º 41
0
# In[16]:

#read shape file
state_shp = gpd.read_file('./states_21basic/states.shp')

# In[17]:

#setting the same coordinate system as london ward. This ensures the points in the datasets aligns with the points
#in the shapefile, so that the locations are pinned properly on the map
USA_covid_19_point = USA_covid_19_point.to_crs(state_shp.crs)

# In[18]:

#spatial join of shapefile and acorn data
USA_covid_19_join = gpd.sjoin(USA_covid_19_point,
                              state_shp,
                              how="inner",
                              op='intersects')

# In[19]:

#see output
print(USA_covid_19_join.head())

# In[20]:

#group states
confirmed = pd.DataFrame(
    USA_covid_19_join[['state', 'index_right',
                       'confirmed']].groupby('index_right')['confirmed'].sum())
deaths = pd.DataFrame(
    USA_covid_19_join[['state', 'index_right',
Ejemplo n.º 42
0
def rasterize(tif,
              geojson,
              out,
              size=DEFAULT_IMG_WINDOW,
              stride=DEFAULT_STRIDE_WINDOW):
    """
	start rasterizing tif file. here we have window with (args.size) and chunking tif image
	with size. this image are numpy array which is not stored in disk at this time. Geojson
	will be applyed on these image chunks with proper transformation for each polygone.

	chunks and masked images will be used as training dataset for Deep Learning model.
	tif image can be extra large, tools is able to handle it.

	Arg:
		tif - tif image path
		geojson - geojson file
		out - output directory
		size - size of window that user wants, default (h=512, w=512)  
		stride - stride size for sliding window, default same as window
	"""

    # read tif
    rst = rasterio.open(tif)

    # read geojson
    scene_labels_gdf = gpd.read_file(geojson)

    out_tif = os.path.join(out, 'tif')  # make tif dir
    out_mask = os.path.join(out, 'mask')  # make mask dir

    os.makedirs(out_tif, exist_ok=True)
    os.makedirs(out_mask, exist_ok=True)

    total = imageUtils.total_chunks_in_image(tif, window=size, stride=stride)

    with tqdm(total=total, desc='Masking progress') as pbar:
        # get image from sliding window
        for each in imageUtils.get_image_chunks(tif,
                                                window_size=size,
                                                stride=stride):

            # for win, arr in get_image_chunks(rst, window_size=(win_sz, win_sz)):
            img_window, img_arr, index = each[0], each[1], each[2]
            pbar.n = index
            pbar.refresh()

            # 'miny', 'maxx', and 'maxy'
            # (807592.8560103609, 620885.095643373, 807611.1959577247, 620903.4357975163)
            bounds = rasterio.windows.bounds(img_window, rst.meta['transform'])

            # shapely.geometry.polygon.Polygon for full image chunk (512, 512) in tiff
            win_box = box(*bounds)
            win_box_gdf = gpd.GeoDataFrame(geometry=[win_box],
                                           crs=rst.meta['crs'])
            win_box_gdf = win_box_gdf.to_crs(CRS.from_epsg(4326))

            try:
                # get chip from geopanda and bouning boxes with csr: 4326(Marcater)
                gdf_chip = gpd.sjoin(scene_labels_gdf,
                                     win_box_gdf,
                                     how='inner',
                                     op='intersects')
            except AttributeError as ae:
                pass

            # check if chip has data
            if not gdf_chip.empty:
                burn_val = 255
                shapes = [(geom, burn_val) for geom in gdf_chip.geometry]

                # transform
                chip_tfm = rasterio.transform.from_bounds(
                    *win_box_gdf.bounds.values[0], *size)
                label_arr = rasterio.features.rasterize(shapes,
                                                        out_shape=size,
                                                        transform=chip_tfm)

                img_tif_name = os.path.join(out_tif,
                                            str(index))  # make tif dir
                img_mask_name = os.path.join(out_mask,
                                             str(index))  # make mask dir

                # change dimension of chip image to save as tif
                win_arr = np.moveaxis(img_arr, 0, 2)
                tiffUtils.save_as_tif(win_arr,
                                      chip_tfm=chip_tfm,
                                      name=img_tif_name)

                # save mask image as png
                imageUtils.save_as_png(label_arr, img_mask_name)
Ejemplo n.º 43
0
def aggregateByGrid(df, field, summary, gridSize):
    """
    Aggregates the specified field with chosen summary type and user
        defined grid size. returns aggregated grids with summary

    Parameters
    ----------
    df : geopandas dataframe
    field : string
        field to be summarized.
    summary : string
        type of summary to be sumarized. eg. min, max,sum, median
    gridSize : float
        the size of grid on same unit as geodataframe coordinates.

    Returns
    -------
    geodataframe
        Aggregated grids with summary on it

    """
    def round_down(num, divisor):
        return floor(num / divisor) * divisor

    def round_up(num, divisor):
        return ceil(num / divisor) * divisor

    # Get crs from data
    sourceCRS = df.crs
    targetCRS = "EPSG:3857"
    # Reproject to Mercator\
    df = df.to_crs(targetCRS)
    # Get bounds
    xmin, ymin, xmax, ymax = df.total_bounds
    print(xmin, ymin, xmax, ymax)
    height, width = gridSize, gridSize
    top, left = round_up(ymax, height), round_down(xmin, width)
    bottom, right = round_down(ymin, height), round_up(xmax, width)

    rows = int((top - bottom) / height)+1
    cols = int((right - left) / width)+1

    XleftOrigin = left
    XrightOrigin = left + width
    YtopOrigin = top
    YbottomOrigin = top - height
    polygons = []
    for i in range(cols):
        Ytop = YtopOrigin
        Ybottom = YbottomOrigin
        for j in range(rows):
            polygons.append(Polygon([(XleftOrigin, Ytop),
                                     (XrightOrigin, Ytop),
                                     (XrightOrigin, Ybottom),
                                     (XleftOrigin, Ybottom)]))
            Ytop = Ytop - height
            Ybottom = Ybottom - height
        XleftOrigin = XleftOrigin + width
        XrightOrigin = XrightOrigin + width

    grid = gpd.GeoDataFrame({'geometry': polygons})
    grid.crs = df.crs

    # Assign gridid
    numGrid = len(grid)
    grid['gridId'] = list(range(numGrid))

    # Identify gridId for each point
    points_identified = gpd.sjoin(df, grid, op='within')

    # group points by gridid and calculate mean Easting,
    # store it as dataframe
    # delete if field already exists
    if field in grid.columns:
        del grid[field]
    grouped = points_identified.groupby('gridId')[field].agg(summary)
    grouped_df = pd.DataFrame(grouped)

    new_grid = grid.join(grouped_df, on='gridId').fillna(0)
    grid = new_grid.to_crs(sourceCRS)
    summarized_field = summary+"_"+field
    final_grid = grid.rename(columns={field: summarized_field})
    final_grid = final_grid[final_grid[summarized_field] > 0].sort_values(
        by=summarized_field, ascending=False)
    final_grid[summarized_field] = round(final_grid[summarized_field], 1)
    final_grid['x_centroid'], final_grid['y_centroid'] = \
        final_grid.geometry.centroid.x, final_grid.geometry.centroid.y
    return final_grid
    locations_df = gpd.GeoDataFrame(locations_df, geometry=geom)
    locations_df = locations_df[['location_id', 'geometry']]
    locations_df.crs = {'init': 'epsg:4326'}
    print(' Creating locations data frame [DONE]')
    
    # Read in the County boundaries
    print(' Reading county shapefile', end="\r", flush=True)
    counties_df = gpd.read_file('gz_2010_us_050_00_500k.shp')
    counties_df = counties_df.to_crs(locations_df.crs)
    counties_df['county_fips'] = counties_df['STATE'] + counties_df['COUNTY']
    counties_df['state_fips'] = counties_df['STATE']
    counties_df = counties_df[['county_fips', 'state_fips', 'geometry']]
    print(' Reading county shapefile [DONE]')

    print(' Preforming spatial join', end="\r", flush=True)
    gdf = gpd.sjoin(locations_df, counties_df, op='within')
    gdf.reset_index(inplace=True)
    gdf = gdf[['location_id', 'state_fips', 'county_fips']]
    gdf.to_sql('location', con=db, index=False)
    print(' Preforming spatial join [DONE]')

locations = gdf.set_index('location_id').to_dict('index')

# Get the list of location ids we need to search for
location_ids = list(gdf.location_id)

try:
    c.execute('SELECT * FROM patent_location LIMIT 1')
except:
    c.execute('CREATE TABLE patent_location (patent_id text, location_id text)')
    values_to_insert = list()
Ejemplo n.º 45
0
def spatioTemporalAggregation(df, field, summary, gridSize):
    """
    Aggregates the given field on hour and weekday basis.
    Prepares data for mosaic plot
    FOR THIS TO WORK YOU NEED TO INSTALL RTree or Rtree-linux!!!
    # TODO This function is poorly performing
    Parameters
    ----------
    df : geopandas dataframe
    field : string
        field to be summarized.
    summary : string
        type of summary to be sumarized. eg. min, max,sum, median
    gridSize : float
        the size of grid on same unit as geodataframe coordinates.

    Returns
    -------
    geodataframes: one each for larger grid and other for subgrids
        (for visualization purpose only)
        Aggregated grids with summary on it

    """
    def round_down(num, divisor):
        return floor(num / divisor) * divisor

    def round_up(num, divisor):
        return ceil(num / divisor) * divisor

    # Get crs from data
    sourceCRS = df.crs
    targetCRS = "epsg:3857"
    # Reproject to Mercator\
    df = df.to_crs(targetCRS)

    # Get bounds
    xmin, ymin, xmax, ymax = df.total_bounds
    height, width = gridSize, gridSize
    top, left = round_up(ymax, height), round_down(xmin, width)
    bottom, right = round_down(ymin, height), round_up(xmax, width)

    rows = int((top - bottom) / height)+1
    cols = int((right - left) / width)+1

    XleftOrigin = left
    XrightOrigin = left + width
    YtopOrigin = top
    YbottomOrigin = top - height
    polygons = []

    for i in range(cols):
        Ytop = YtopOrigin
        Ybottom = YbottomOrigin
        for j in range(rows):
            polygons.append(Polygon(
                [(XleftOrigin, Ytop), (XrightOrigin, Ytop),
                 (XrightOrigin, Ybottom), (XleftOrigin, Ybottom)]))
            Ytop = Ytop - height
            Ybottom = Ybottom - height
        XleftOrigin = XleftOrigin + width
        XrightOrigin = XrightOrigin + width

    grid = gpd.GeoDataFrame({'geometry': polygons})
    grid.crs = (targetCRS)

    # Assign gridid
    numGrid = len(grid)
    grid['gridId'] = list(range(numGrid))

    # Identify gridId for each point

    df['hour'] = df['time'].apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%dT%H:%M:%S+00:00')).dt.hour
    df['weekday'] = df['time'].apply(
        lambda x: datetime.datetime.strptime(
            x, '%Y-%m-%dT%H:%M:%S+00:00')).dt.dayofweek

    # df['hour'] = pd.to_datetime(df['time']).dt.hour
    # df['weekday'] = pd.to_datetime(df['time']).dt.dayofweek

    points_identified = gpd.sjoin(df, grid, op='within')

    # group points by gridid and calculate mean Easting,
    # store it as dataframe
    # delete if field already exists
    if field in grid.columns:
        del grid[field]

    # Aggregate by weekday, hour and grid
    grouped = points_identified.groupby(
        ['gridId', 'weekday', 'hour']).agg({field: [summary]})
    grouped = grouped.reset_index()
    grouped.columns = grouped.columns.map("_".join)
    modified_fieldname = field+"_"+summary

    # Create Subgrids
    subgrid, mainGrid, rowNum, columnNum, value = [], [], [], [], []
    unikGrid = grouped['gridId_'].unique()
    print('running; wait till you see "finished"')
    for currentGrid in unikGrid:
        dataframe = grid[grid['gridId'] == currentGrid]
        xmin, ymin, xmax, ymax = dataframe.total_bounds
        xminn, xmaxx, yminn, ymaxx = xmin + \
            (xmax-xmin)*0.05, xmax-(xmax-xmin)*0.05, ymin + \
            (ymax-ymin)*0.05, ymax-(ymax-ymin)*0.05
        rowOffset = (ymaxx-yminn)/24.0
        colOffset = (xmaxx - xminn)/7.0
        tmp = (grouped['gridId_'] == currentGrid)
        for i in range(7):
            tmp2=(grouped['weekday_'] == i)
            for j in range(24):
                topy, bottomy, leftx, rightx = ymaxx-j*rowOffset, ymaxx - \
                    (j+1)*rowOffset, xminn+i * \
                    colOffset, xminn+(i+1)*colOffset
                subgrid.append(
                    Polygon([(leftx, topy), (rightx, topy),
                             (rightx, bottomy), (leftx, bottomy)]))
                mainGrid.append(currentGrid)
                rowNum.append(j)
                columnNum.append(i)
                if len(grouped[tmp
                       & tmp2
                       & (grouped['hour_'] == j)]) != 0:
                    this_value = grouped[
                        tmp
                        & tmp2
                        & (grouped['hour_'] == j)].iloc[0][
                            modified_fieldname]
                    value.append(this_value)
                else:
                    value.append(np.nan)
    subgrid_gpd = gpd.GeoDataFrame({'geometry': subgrid})
    subgrid_gpd.crs = targetCRS
    # Reproject to Mercator\
    subgrid_gpd = subgrid_gpd.to_crs(sourceCRS)
    subgrid_gpd['gridId'] = mainGrid
    subgrid_gpd['Weekday'] = columnNum
    subgrid_gpd['hour'] = rowNum
    subgrid_gpd['gridId'] = subgrid_gpd.apply(lambda x: str(
        x['gridId'])+"_"+str(x['Weekday'])+"_"+str(x['hour']), axis=1)
    subgrid_gpd[modified_fieldname] = value
    subgrid_gpd = subgrid_gpd.dropna()
    grid = grid.to_crs(sourceCRS)
    grid = grid[grid['gridId'].isin(unikGrid)]
    print('finished')
    return grid, subgrid_gpd
Ejemplo n.º 46
0
def main():

    logging.config.fileConfig(fname=os.path.join('config', 'log.config'),
                              disable_existing_loggers=False)

    # Get the logger specified in the file
    f_handler = logging.FileHandler(os.path.join('logs',
                                                 'generate_fusiun.log'))
    f_handler.setLevel(logging.DEBUG)
    log = logging.getLogger(__name__)
    f_format = logging.Formatter(
        '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
    f_handler.setFormatter(f_format)
    log.addHandler(f_handler)

    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-p',
        '--prob-minimum',
        default=0.4,
        dest='prob_minimum',
        help='Minimum probability to be considered as fire grids')
    parser.add_argument('-s',
                        '--start-date',
                        dest='start_date_str',
                        help='Start date')
    parser.add_argument('-e',
                        '--end-date',
                        dest='end_date_str',
                        help='End date')
    parser.add_argument('--ahi-hotspot-folder',
                        dest='ahi_folder',
                        default=os.path.join('..', 'data', 'raw', 'hotspots',
                                             'ahi'),
                        help='AHI hotspot folder')
    parser.add_argument('--viirs-hotspot-folder',
                        dest='viirs_folder',
                        default=os.path.join('..', 'data', 'raw', 'hotspots',
                                             'viirs'),
                        help='VIIRS hotspot folder')
    parser.add_argument('--modis-hotspot-folder',
                        dest='modis_folder',
                        default=os.path.join('..', 'data', 'raw', 'hotspots',
                                             'modis'),
                        help='MODIS hotspot folder')
    parser.add_argument('-o',
                        '--output-folder',
                        dest="out_file_path",
                        default=os.path.join('..', 'data'),
                        help="Specify output folder")
    parser.add_argument('-g',
                        '--grid',
                        dest="grid_shp",
                        default=os.path.join(
                            '..', 'references', 'shapefile',
                            '2km_grid_ASEAN_land_clipped.shp'),
                        help="Specify grid .shp file")
    parser.add_argument('-n',
                        '--name',
                        dest="prefix_name",
                        default='FUSIUN_NRT_2km_',
                        help="Prefix for output file names")

    args = parser.parse_args()
    log.debug(args)
    prob_minimum = args.prob_minimum

    with open(os.path.join('config', 'config.json'), "r") as read_file:
        json_config = json.load(read_file)

    clipping_box = json_config['parameters']['clipping_box']
    sat_resolution_meter = json_config['sat_resolution_meter']
    shapefile_path = json_config['shapefile']['path']
    bounding_box = json_config['plotting']['bounding_box']
    dpi = json_config['plotting']['dpi']
    fusiun_ml_model_fpath = json_config['fusiun_ml_model']['path']
    fusiun_predict_features = json_config['fusiun_ml_model'][
        'predict_features']
    h8_ml_model_fpath = json_config['h8_ml_model']['path']
    h8_predict_features = json_config['h8_ml_model']['predict_features']
    low_prob_thres = json_config['parameters']['low_prob_thres']
    med_prob_thres = json_config['parameters']['med_prob_thres']

    #read in grid shapefile
    try:
        df_grid = geopandas.read_file(args.grid_shp)
        df_grid.crs = {'init': 'epsg:3857'}
        log.debug(args.grid_shp + ' loaded successfully!')
    except Exception as e:
        log.error(args.grid_shp + ' cannot be loaded !')
        exit()

    start_date = datetime.strptime(args.start_date_str, "%Y-%m-%d %H:%M")
    date_process = start_date
    end_date = datetime.strptime(args.end_date_str, "%Y-%m-%d %H:%M")

    geo_hs = geohotspot.GeoHotspot()

    log.info('Reading hotspot .txt files')

    while date_process <= (end_date + timedelta(days=1)):
        h8_files = os.path.join(
            args.ahi_folder, "H08_*" + date_process.strftime('%Y%m%d_') +
            "*_L2WLFbet_FLDK.06001_06001.csv")
        geo_hs.parse_jaxa_hotspot_txt(file_path=h8_files)

        jp1_files = os.path.join(args.viirs_folder,
                                 date_process.strftime('%Y%m%d') + "*JP1*.txt")
        geo_hs.parse_viirs_afedr_txt(file_path=jp1_files, sat_name='NOAA20')

        npp_files = os.path.join(args.viirs_folder,
                                 date_process.strftime('%Y%m%d') + "*NPP*.txt")
        geo_hs.parse_viirs_afedr_txt(file_path=npp_files, sat_name='NPP')

        modis_files = os.path.join(
            args.modis_folder,
            "*14." + date_process.strftime('%y%j') + "*.txt")
        geo_hs.parse_modis_mod14_txt(file_path=modis_files)

        date_process = date_process + timedelta(days=1)

    # remove hotspots outside of clipping area
    geo_hs.clip_hotspot(clipping_box)
    # reject hotspots due to sun glint
    #geo_hs.reject_sunglint_hs('Himawari-8/9', max_sunglint_angle)

    geo_df = geo_hs.hs_df.copy()
    geo_df['aqua_weight'] = 0.0
    geo_df['terra_weight'] = 0.0
    geo_df['n20_weight'] = 0.0
    geo_df['npp_weight'] = 0.0
    geo_df['geo_weight'] = 0.0
    geo_df['confidence'] = geo_df['confidence'].fillna(0)
    geo_df.astype({'geo_weight': 'float64', 'confidence': 'float64'})

    geo_df['date'] = pd.to_datetime(geo_df['date'], format="%d/%m/%Y %H:%M:%S")
    # selects period of interest
    geo_df = geo_df[(geo_df['date'] >= start_date)
                    & (geo_df['date'] <= end_date)]
    log.debug(geo_df['date'].unique())
    log.debug(geo_df[['satellite', 'date']].groupby(['satellite']).count())

    try:
        h8_ml_model = load(h8_ml_model_fpath)
        log.debug('Loaded trained H8 ML model from ' + h8_ml_model_fpath)
        log.debug(f'Model pipeline: {h8_ml_model}')
        geo_df.loc[geo_df['satellite'] == 'Himawari-8/9',
                   'geo_weight'] = h8_ml_model.predict_proba(
                       geo_df.loc[geo_df['satellite'] == 'Himawari-8/9',
                                  h8_predict_features])[:, 1]
        log.info('Added in probabilities using H8 Gradient Boosting Model.')
    except Exception as e:
        log.exception(e)

    geo_df.loc[geo_df['satellite'] == 'TERRA', 'terra_weight'] = \
        geo_df.loc[geo_df['satellite'] == 'TERRA', 'confidence'] / 100.0
    geo_df.loc[geo_df['satellite'] == 'AQUA', 'aqua_weight'] = \
        geo_df.loc[geo_df['satellite'] == 'AQUA', 'confidence'] / 100.0
    geo_df.loc[geo_df['satellite'] == 'JP1_LATE', 'n20_weight'] = \
        geo_df.loc[geo_df['satellite'] == 'JP1_LATE', 'confidence'] / 100.0
    geo_df.loc[geo_df['satellite'] == 'NPP_LATE', 'npp_weight'] = \
        geo_df.loc[geo_df['satellite'] == 'NPP_LATE', 'confidence'] / 100.0

    # count number of Himawari observations
    geo_obs_count = int((end_date - start_date).seconds / 600)
    # normalize the weight for Himawari
    geo_df['geo_weight'] = geo_df['geo_weight'] / geo_obs_count

    # round to 8 decimals to save storage
    geo_df = geo_df.round(8)

    try:
        gdf = geopandas.GeoDataFrame(geo_df,
                                     geometry=geopandas.points_from_xy(
                                         geo_df.lon, geo_df.lat))
        log.debug('Created geopandas DataFrame')
    except Exception as e:
        log.exception(e)

    # transform to mercator epsg 3857
    gdf.crs = {'init': 'epsg:4326'}
    gdf_merc = gdf.to_crs({'init': 'epsg:3857'})

    gdf_merc.reset_index(inplace=True, drop=True)

    gdf_merc['x'] = gdf_merc['geometry'].x
    gdf_merc['y'] = gdf_merc['geometry'].y

    for key, value in sat_resolution_meter.items():
        gdf_merc.loc[gdf_merc['satellite'] == key, 'resolution_meter'] = value

    try:
        interim_file_path = os.path.join(args.out_file_path, 'interim')
        os.makedirs(interim_file_path, exist_ok=True)
    except Exception as e:
        log.exception(e)
        log.warning(interim_file_path + ' directory cannot be created!')

    try:
        processed_file_path = os.path.join(args.out_file_path, 'processed')
        os.makedirs(processed_file_path, exist_ok=True)
    except Exception as e:
        log.exception(e)
        log.warning(processed_file_path + ' directory cannot be created!')

    try:
        hotspot_json = os.path.join(
            interim_file_path, args.prefix_name + 'hotspot_' +
            end_date.strftime('%Y%m%d') + '.geojson')
        gdf_merc.to_file(hotspot_json, driver='GeoJSON')
        log.info(hotspot_json + ' is saved successfully.')
    except Exception as e:
        log.exception(e)
        log.warning(hotspot_json + ' export warning!')

    # create polygon
    for index, row in gdf_merc.iterrows():
        gdf_merc['geometry'].iloc[index] = get_poly_box(
            row['x'], row['y'], row['resolution_meter'])

    try:
        hotspot_polygon_json = os.path.join(
            interim_file_path, args.prefix_name + 'hotspot_polygon_' +
            end_date.strftime('%Y%m%d') + '.geojson')
        gdf_merc.round(4)
        gdf_merc.to_file(hotspot_polygon_json, driver='GeoJSON')
        log.info(hotspot_polygon_json + ' is saved successfully.')
    except Exception as e:
        log.exception(e)
        log.warning(hotspot_polygon_json + ' export warning!')

    # for debugging
    # hotspot_polygon_json = os.path.join(interim_file_path, args.prefix_name +
    #                                     'hotspot_polygon_' + end_date.strftime('%Y%m%d') + '.geojson')
    # gdf_merc = geopandas.read_file(hotspot_polygon_json)

    try:
        log.debug('Processing grid sjoin...')
        df_grid_joined = geopandas.sjoin(df_grid, gdf_merc, op='intersects')
        grid_weight_total = df_grid_joined[[
            'id', 'geo_weight', 'terra_weight', 'aqua_weight', 'n20_weight',
            'npp_weight'
        ]].groupby(['id']).sum()
        grid_geometry = df_grid_joined[['id',
                                        'geometry']].groupby(['id']).first()
        processed_grid = pd.merge(grid_weight_total, grid_geometry, on='id')
        processed_grid_gpd = geopandas.GeoDataFrame(processed_grid)
        processed_grid_gpd.crs = {'init': 'epsg:3857'}
        log.debug('Processing grid completed.')
    except Exception as e:
        log.exception(e)
        log.error('Unable to process grid sjoin!')

    try:
        fusiun_ml_model = load(open(fusiun_ml_model_fpath, 'rb'))
        log.debug('Loaded trained model from ' + fusiun_ml_model_fpath)
        log.debug(f'Model pipeline: {fusiun_ml_model}')
        processed_grid_gpd['prob'] = fusiun_ml_model.predict_proba(
            processed_grid_gpd[fusiun_predict_features])[:, 1]
        log.info(
            'Probabilities filled using FUSIUN Logistic Regression model.')
    except Exception as e:
        log.exception(e)

    try:
        hotspot_grid_json = os.path.join(
            processed_file_path, args.prefix_name + 'hotspot_grid_' +
            end_date.strftime('%Y%m%d') + '.geojson')
        processed_grid_gpd.to_file(hotspot_grid_json, driver='GeoJSON')
        log.info(processed_grid_gpd + ' is saved successfully.')
    except Exception as e:
        log.warning(hotspot_grid_json + ' export warning!')

    try:
        ann_file = os.path.join(
            processed_file_path, args.prefix_name + 'hotspot_grid_' +
            end_date.strftime('%Y%m%d') + '.ann')
        save_fred_grid_meteor_ann(processed_grid_gpd, ann_file, low_prob_thres,
                                  med_prob_thres)
        log.info(ann_file + ' is saved successfully.')
    except Exception as e:
        log.exception(e)
        log.warning(ann_file + ' cannot be saved!')
Ejemplo n.º 47
0
    # iv: clean dataframe:
    weather_model_df = weather_model_df[
        weather_model_df["LATITUDE"] != "unknown"
    ]
    weather_model_df = weather_model_df[
        weather_model_df["LONGITUDE"] != "unknown"
    ]
    weather_model_df = weather_model_df[
        weather_model_df["LONGITUDE"].notnull()
    ]
    weather_model_df['LATITUDE'] = weather_model_df['LATITUDE'].apply(
        lambda x: float(x)
    )
    weather_model_df['LONGITUDE'] = weather_model_df['LONGITUDE'].apply(
        lambda x: float(x)
    )
    # ________________________________
    # v: map latitude, longtitude to county:
    gpd_file = gpd.read_file(
        "/Users/Hsieh/Desktop/persephone/Data/uscounties.geojson"
    )
    geo_series = weather_model_df.apply(get_county, axis=1)
    gpd_df = gpd.GeoDataFrame(geometry=geo_series)
    counties_df = gpd.sjoin(gpd_df, gpd_file, op="within")
    weather_model_df['COUNTY'] = counties_df['name']
    weather_model_df['STATE'] = counties_df['state_name']
    # vi: write cleaned df to csv file:
    weather_model_df.to_csv(
        'cleaned_master_weather_complete.csv'
    )
Ejemplo n.º 48
0
def dir_centerline(links, nodes, meshpolys, meshlines, Imask, gt, pixlen):
    """
    Guess flow directions of links in a braided river channel.

    Guesses the flow direction of links in a braided river channel network by
    exploiting a "valleyline" centerline. Two metrics are computed to help
    guess the correct direction. The first is the number of centerline
    transects (meshlines) that the link crosses. The second is the local angle
    of the centerline compared to the link's angle. These metrics are appended
    to the links dictionary as links['cldist'] and links['clangs'].

    Parameters
    ----------
    links : dict
        Network links and associated properties.
    nodes : dict
        Network nodes and associated properties.
    meshpolys : list
        List of shapely.geometry.Polygons that define the valleyline mesh.
    meshlines : list
        List of shapely.geometry.LineStrings that define the valleyline mesh.
    Imask : np.array
        Binary mask of the network.
    gt : tuple
        gdal-type GeoTransform of the original binary mask.
    pixlen : float
        Length resolution of each pixel.

    Returns
    -------
    links : dict
        Network links and associated properties with 'cldists' and 'clangs'
        attributes appended.

    """
    # alg = 20
    alg = dy.algmap('cl_dist_guess')

    # Create geodataframes for intersecting meshpolys with nodes
    mp_gdf = gpd.GeoDataFrame(geometry=[Polygon(mp) for mp in meshpolys])
    rc = np.unravel_index(nodes['idx'], Imask.shape)
    nodecoords = gu.xy_to_coords(rc[1], rc[0], gt)
    node_gdf = gpd.GeoDataFrame(
        geometry=[Point(x, y) for x, y in zip(nodecoords[0], nodecoords[1])],
        index=nodes['id'])

    # Determine which meshpoly each node lies within
    intersect = gpd.sjoin(node_gdf, mp_gdf, op='intersects', rsuffix='right')

    # Compute guess and certainty, where certainty is how many transects apart
    # the link endpoints are (longer=more certain)
    cldists = np.zeros((len(links['id']), 1))
    for i, lconn in enumerate(links['conn']):
        try:
            first = intersect.loc[lconn[0]].index_right
            second = intersect.loc[lconn[1]].index_right
            cldists[i] = second - first
        except KeyError:
            pass

    for i, c in enumerate(cldists):
        if c != 0:
            if c > 0:
                links['guess'][i].append(links['conn'][i][0])
                links['guess_alg'][i].append(alg)
            elif c < 0:
                links['guess'][i].append(links['conn'][i][-1])
                links['guess_alg'][i].append(alg)

    # Save the distances for certainty
    links['cldists'] = np.abs(cldists)

    # Compute guesses based on how the link aligns with the local centerline
    # direction
    # alg = 21
    alg = dy.algmap('cl_ang_guess')
    clangs = np.ones((len(links['id']), 1)) * np.nan
    for i, (lconn, lidx) in enumerate(zip(links['conn'], links['idx'])):
        # Get coordinates of link endpoints
        rc = np.unravel_index([lidx[0], lidx[-1]], Imask.shape)

        try:  # Try is because some points may not lie within the mesh polygons
            # Get coordinates of centerline midpoints
            first = intersect.loc[lconn[0]].index_right
            second = intersect.loc[lconn[1]].index_right
            if first > second:
                first, second = second, first
            first_mp = np.mean(np.array(meshlines[first]), axis=0)  # midpoint
            second_mp = np.mean(np.array(meshlines[second + 1]),
                                axis=0)  # midpoint
        except KeyError:
            continue

        # Centerline vector
        cl_vec = second_mp - first_mp
        cl_vec = cl_vec / np.sqrt(np.sum(cl_vec**2))

        # Link vectors - as-is and flipped (reversed)
        link_vec = dy.get_link_vector(links,
                                      nodes,
                                      links['id'][i],
                                      Imask.shape,
                                      pixlen=pixlen)
        link_vec_rev = -link_vec

        # Compute interior radians between centerline vector and link vector
        # (then again with link vector flipped)
        lva = np.math.atan2(np.linalg.det([cl_vec, link_vec]),
                            np.dot(cl_vec, link_vec))
        lvar = np.math.atan2(np.linalg.det([cl_vec, link_vec_rev]),
                             np.dot(cl_vec, link_vec_rev))

        # Save the maximum angle
        clangs[i] = np.min(np.abs([lva, lvar]))

        # Make a guess; smaller interior angle (i.e. link direction that aligns
        # best with local centerline direction) guesses the link orientation
        if np.abs(lvar) < np.abs(lva):
            links['guess'][i].append(links['conn'][i][1])
            links['guess_alg'][i].append(alg)
        else:
            links['guess'][i].append(links['conn'][i][0])
            links['guess_alg'][i].append(alg)
    links['clangs'] = clangs

    return links
Ejemplo n.º 49
0
    def __init__(self,
                 tessellation,
                 edges,
                 buildings,
                 id_name,
                 unique_id,
                 verbose=True):
        self.tessellation = tessellation
        self.edges = edges
        self.buildings = buildings
        self.id_name = id_name
        self.unique_id = unique_id

        if id_name in buildings.columns:
            raise ValueError(
                "'{}' column cannot be in the buildings GeoDataFrame".format(
                    id_name))

        cells_copy = tessellation[[unique_id, "geometry"]].copy()

        print("Buffering streets...") if verbose else None
        street_buff = edges.copy()
        street_buff["geometry"] = street_buff.buffer(0.1)

        print("Generating spatial index...") if verbose else None
        streets_index = street_buff.sindex

        print("Difference...") if verbose else None
        new_geom = []

        for ix, cell in tqdm(
                cells_copy.geometry.iteritems(),
                total=cells_copy.shape[0],
                disable=not verbose,
        ):
            possible_matches_index = streets_index.query(cell)
            possible_matches = street_buff.iloc[possible_matches_index]
            new_geom.append(cell.difference(possible_matches.unary_union))

        print("Defining adjacency...") if verbose else None
        blocks_gdf = gpd.GeoDataFrame(geometry=new_geom)
        blocks_gdf = blocks_gdf.explode().reset_index(drop=True)

        spatial_weights = libpysal.weights.Queen.from_dataframe(
            blocks_gdf, silence_warnings=True)

        patches = {}
        jID = 1
        for idx in tqdm(blocks_gdf.index,
                        total=blocks_gdf.shape[0],
                        disable=not verbose):

            # if the id is already present in courtyards, continue (avoid repetition)
            if idx in patches:
                continue
            else:
                to_join = [idx
                           ]  # list of indices which should be joined together
                neighbours = []  # list of neighbours
                neighbours += spatial_weights.neighbors[
                    idx]  # neighbours from spatial weights

                for n in neighbours:
                    while (
                            n not in to_join
                    ):  # until there is some neighbour which is not in to_join
                        to_join.append(n)
                        neighbours += spatial_weights.neighbors[
                            n]  # extend neighbours by neighbours of neighbours :)
                for b in to_join:
                    patches[b] = jID  # fill dict with values
                jID = jID + 1

        blocks_gdf["patch"] = blocks_gdf.index.map(patches)

        print("Defining street-based blocks...") if verbose else None
        blocks_single = blocks_gdf.dissolve(by="patch")
        blocks_single.crs = buildings.crs

        blocks_single["geometry"] = blocks_single.buffer(0.1)

        print("Defining block ID...") if verbose else None  # street based
        blocks_single[id_name] = range(len(blocks_single))

        print("Generating centroids...") if verbose else None
        buildings_c = buildings.copy()
        buildings_c["geometry"] = buildings_c.representative_point(
        )  # make points

        print("Spatial join...") if verbose else None
        centroids_tempID = gpd.sjoin(buildings_c,
                                     blocks_single,
                                     how="left",
                                     op="intersects")

        tempID_to_uID = centroids_tempID[[unique_id, id_name]]

        print("Attribute join (tesselation)...") if verbose else None
        cells_copy = cells_copy.merge(tempID_to_uID, on=unique_id, how="left")

        print("Generating blocks...") if verbose else None
        blocks = cells_copy.dissolve(by=id_name)

        print("Multipart to singlepart...") if verbose else None
        blocks = blocks.explode()
        blocks.reset_index(inplace=True, drop=True)

        blocks["geometry"] = blocks.exterior
        blocks[id_name] = range(len(blocks))

        blocks["geometry"] = blocks.apply(lambda row: Polygon(row.geometry),
                                          axis=1)

        # if polygon is within another one, delete it
        sindex = blocks.sindex
        inp, res = sindex.query_bulk(blocks.geometry, predicate="within")
        res = res[~(inp == res)]
        mask = np.ones(len(blocks.index), dtype=bool)
        mask[list(set(res))] = False
        blocks = blocks.loc[mask]

        self.blocks = blocks[[id_name, "geometry"]]

        centroids_w_bl_ID2 = gpd.sjoin(buildings_c,
                                       self.blocks,
                                       how="left",
                                       op="intersects")
        bl_ID_to_uID = centroids_w_bl_ID2[[unique_id, id_name]]

        print("Attribute join (buildings)...") if verbose else None
        buildings_m = buildings[[unique_id]].merge(bl_ID_to_uID,
                                                   on=unique_id,
                                                   how="left")
        self.buildings_id = buildings_m[id_name]

        print("Attribute join (tesselation)...") if verbose else None
        cells_m = tessellation[[unique_id]].merge(bl_ID_to_uID,
                                                  on=unique_id,
                                                  how="left")
        self.tessellation_id = cells_m[id_name]
Ejemplo n.º 50
0
def split_on_poly_by_streetname(square_id):
    '''
    '''
    logging.info(f"working on shape_id: {square_id}")
    try:
        select_square = squares[squares.loc[:, 'SQUARE'] == square_id].copy()
        if (select_square.geometry.type == 'MultiPolygon').any():
            select_square = select_square.explode()
        address_pts = addresses[addresses.loc[:, 'SQUARE'] == square_id].copy()
        square_part = 1
        for index, one_square in select_square.iterrows():
            one_square_shape = one_square['geometry']
            address_within = address_pts[address_pts.within(
                one_square_shape)].copy()
            if len(address_within) < 4:
                split_type = "equal_area"
                poly_shapes_df = split_poly_into_equal_parts(
                    one_square_shape, 4)
                poly_shapes_partial = gpd.sjoin(
                    poly_shapes_df,
                    address_within[['SQUARE', 'SSL', 'STNAME', 'geometry']],
                    how='left')
                poly_shapes_partial.loc[:, 'SQUARE_PART'] = square_part

                poly_shapes_partial = poly_shapes_partial[[
                    'group', 'geometry', 'SQUARE', 'SSL', 'STNAME',
                    'SQUARE_PART'
                ]]

            else:
                split_type = "streetname_breakdown"
                address_pts_array = np.array([
                    coords for coords in address_within.geometry.apply(
                        lambda x: (x.x, x.y))
                ])
                poly_shapes, pts, poly_to_pt_assignments = voronoi_regions_from_coords(
                    address_pts_array, one_square_shape)

                poly_shapes_df = gpd.GeoDataFrame(pd.DataFrame(
                    poly_to_pt_assignments, columns=['group']),
                                                  crs="EPSG:4326",
                                                  geometry=poly_shapes)

                poly_shapes_w_stname = gpd.sjoin(
                    poly_shapes_df,
                    address_within[['SQUARE', 'SSL', 'STNAME', 'geometry']])

                poly_shapes_partial = poly_shapes_w_stname.dissolve(
                    by='STNAME').reset_index()

                poly_shapes_partial.loc[:, 'SQUARE_PART'] = square_part
                poly_shapes_partial = poly_shapes_partial[[
                    'group', 'geometry', 'SQUARE', 'SSL', 'STNAME',
                    'SQUARE_PART'
                ]]

            if square_part == 1:
                full_poly_shape_df = poly_shapes_partial.copy()
            else:
                full_poly_shape_df = full_poly_shape_df.append(
                    poly_shapes_partial)
            square_part += 1
    except:
        bad_shape_df = pd.DataFrame(
            [[0, Polygon([(0, 0), (1, 1), (0, 1)]), square_id, 0]],
            columns=['group', 'geometry', 'SQUARE', 'SQUARE_PART'])
        full_poly_shape_df = gpd.GeoDataFrame(bad_shape_df,
                                              crs="EPSG:4326",
                                              geometry='geometry')

    return full_poly_shape_df
Ejemplo n.º 51
0
    end = Point(float(end[1]), float(end[0]))  # lon, lat

    points = gpd.GeoSeries([start, end])

    # Here I need to converte the line to a geodataframe to make the
    # merge with attributes

    route = gpd.GeoDataFrame(interim)
    route['geometry'] = gpd.GeoSeries(LineString(lon_lat))[0]  ## adding
    ## geometry

    route.crs = {'init': 'epsg:4326'}  # define coords

    # Match route with attributes of the municipality

    route_mun = gpd.sjoin(route, slv, how="inner", op='intersects')
    route_mun['codmun'] = route_mun['COD_MUN4']

    # Then replace the interim database. Note that there could be more than
    # one match

    interim = route_mun[[
        'circuito', 'latlon0', 'latlon1', 'time', 'dist', 'codmun'
    ]]  # mun identifier

    # Append to the main dataframe

    output = output.append(interim)

    print(i)
Ejemplo n.º 52
0
def measure_network_density(streets_for_networkd_prj, gross_city_blocks_prj):
    """
    Adds network density (m/ha.) onto a gdf of gross urban blocks

    Requires a gdf of streets to overlay with the gross city blocks. Streets
    that are within a gross urban blocks (i.e. do not coincide with its perimeter)
    have the block id added to them. The length of these streets are then aggregated
    by block id and their complete length added to the gross city blocks gdf. Half
    the lenght of the perimeter (i.e. the bounding roads) are then added to the gdf
    as well and the network density calculated as the sum of these two numbers divided
    by the gross area of the block.

    Parameters
    ----------
    streets_for_networ_prj : geodataframe
        a projected gdf of streets
    gross_city_blocks_prj: geodataframe
        a projected gdf of gross city blocks

    Returns
    -------
    gross_city_blocks_prj
        GeoDataFrame
    """
    # OSMnx returns some highway values as lists, this converts them to strings
    streets_for_networkd_prj['highway'] = streets_for_networkd_prj[
        'highway'].apply(lambda x: ', '.join(x) if type(x) is list else x)

    # make a new gdf which only contains street fragments completely within a gross city block
    streets_in_gross_blocks = gpd.sjoin(streets_for_networkd_prj,
                                        gross_city_blocks_prj,
                                        how="inner",
                                        op="within")

    # Write the length of these inner streets into a new column 'inner_streets_m'
    streets_in_gross_blocks[
        'inner_streets_m'] = streets_in_gross_blocks.length.round(decimals=1)

    # aggregate the total length of inner streets for each block
    inner_streets_agg_by_block = streets_in_gross_blocks.groupby(
        ['city_block_id']).sum().round(decimals=2)

    # reindex to keep onlt the columns necessary
    keep_columns = ['inner_streets_m']
    inner_streets_agg_by_block = inner_streets_agg_by_block.reindex(
        columns=keep_columns)

    # merge the total inner street length onto the gross blocks
    gross_city_blocks_prj = gross_city_blocks_prj.merge(
        inner_streets_agg_by_block,
        how='outer',
        left_index=True,
        right_index=True)

    # Fill NaN with zeroes
    gross_city_blocks_prj.fillna(0, axis=1, inplace=True)

    gross_city_blocks_prj[
        'outer_streets_m'] = gross_city_blocks_prj.length.round(decimals=2)
    gross_city_blocks_prj['gross_area_ha'] = (gross_city_blocks_prj.area /
                                              10000).round(decimals=4)
    gross_city_blocks_prj['network_density_m_ha'] = (
        ((gross_city_blocks_prj['outer_streets_m'] / 2) +
         (gross_city_blocks_prj['inner_streets_m'])) /
        ((gross_city_blocks_prj.area / 10000))).round(decimals=2)

    return gross_city_blocks_prj
Ejemplo n.º 53
0
# %%
queens_top_plants = gf.df_queens_plants.sort_values("gwh",
                                                    ascending=False).iloc[:5]
queens_top_plants_gdf = queens_plants_gdf.rename(columns={
    "Plant_Code": "plant_id"
}).merge(
    right=queens_top_plants[["plant_id"]],
    on=["plant_id"],
    how="inner",
    validate="one_to_one",
)

queens_top_nbd_gdf = gpd.sjoin(queens_nbd_gdf,
                               queens_top_plants_gdf,
                               how="inner",
                               op="intersects")[["ntaname", "geometry"
                                                 ]].drop_duplicates()

queens_top_nbd_gdf.loc[:,
                       "centroid_lat"] = queens_top_nbd_gdf.geometry.centroid.y
queens_top_nbd_gdf.loc[:,
                       "centroid_long"] = queens_top_nbd_gdf.geometry.centroid.x
alt.Chart(queens_top_nbd_gdf).mark_geoshape(
    fill="lightgray",
    stroke="white") + alt.Chart(queens_top_plants_gdf).mark_geoshape(
    ) + alt.Chart(queens_top_plants_gdf).mark_text(
        align="left", baseline="middle").encode(
            longitude="Longitude",
            latitude="Latitude",
            text="Plant_Name",
Ejemplo n.º 54
0
def tract_links(resource, doc, env, *args, **kwargs):
    from metapack.rowgenerator import PandasDataframeSource
    from metapack import get_cache
    from shapely.geometry import Point
    import geopandas as gpd

    # First, geo join the tracts into the communities and cities.

    comm = doc.resource('cities_communities').geoframe()
    tracts = doc.resource('tracts').dataframe()

    tracts['intp'] = tracts.apply(
        lambda r: Point(float(r.intptlon), float(r.intptlat)), axis=1)

    tract_pt = gpd.GeoDataFrame(tracts, geometry='intp')

    tract_community = gpd.sjoin(comm, tract_pt, op='contains')

    columns = ['geoid', 'type', 'name', 'name_code', 'city', 'link_code']

    tc = tract_community.rename({'name_left': 'name'}, axis=1)[columns]

    # Now link everything together.

    acronyms = doc.reference('acronyms')
    acro_map = dict(list(acronyms)[1:])
    acro_map[''] = ''

    _1 = tc[['geoid']].drop_duplicates().set_index('geoid').join(
        tracts[['geoid', 'geometry']].set_index('geoid'))
    _2 = tc.set_index('geoid')

    _3 = _2[_2.type == 'city'][['name', 'name_code']]
    _4 = _2[_2.type == 'county'][['name', 'name_code']]
    _5 = _2[_2.type == 'sd_community'][['name', 'name_code']]
    _6 = _2[_2.type == 'county_community'][['name', 'name_code']]

    _7 = _1.join(_3, rsuffix='_city').join(_4, rsuffix='_county')\
           .join(_5, rsuffix='_sdc').join(_6, rsuffix='_cnc')

    _7.columns =\
    [
     'geometry',
     'city_name',
     'city_code',
     'county_name',
     'county_code',
     'community_name',
     'community_cpcode',
     'cnc_name',
     'cnc_code']

    # Move the county name into the city columns, then drop it
    _7['city_name'] = _7.city_name.where(
        ~((_7.city_name.isnull()) & (_7.county_code == 'CN')), 'COUNTY')
    _7['city_code'] = _7.city_code.where(
        ~((_7.city_code.isnull()) & (_7.county_code == 'CN')), 'CN')

    _7.drop(['county_name', 'county_code'], axis=1, inplace=True)

    # Move the  county community names into the community columns
    _7['community_name'] = _7.community_name.where(_7.city_code != 'CN',
                                                   _7.cnc_name).fillna('')
    _7['community_cpcode'] = _7.community_cpcode.where(_7.city_code != 'CN',
                                                       _7.cnc_code).fillna('0')

    _7.drop(['cnc_name', 'cnc_code'], axis=1, inplace=True)

    _7 = _7.fillna('')

    _7['city_name'] = _7.city_name.apply(lambda v: v.title())
    _7['community_name'] = _7.community_name.apply(
        lambda v: str(v).title()).fillna('')

    _7['community_code'] = _7.community_name.apply(
        lambda v: acro_map[clean_comm_name(v)].upper())

    # move geometry to the end
    _7 = _7[list(_7.columns)[1:] + list(_7.columns)[:1]]

    yield from PandasDataframeSource('<df>', _7, get_cache())
# use coordinates
strikes = gp.GeoDataFrame(df_strikes,geometry=gp.points_from_xy(df_strikes.longitude, df_strikes.latitude)).set_crs(epsg=z_epsg_wgs84)
    
    
#single strike sources    
strikes_internet = gp.GeoDataFrame(df_internet,geometry=gp.points_from_xy(df_internet.longitude, df_internet.latitude)).set_crs(epsg=z_epsg_wgs84)
strikes_cities = gp.GeoDataFrame(df_cities,geometry=gp.points_from_xy(df_cities.longitude, df_cities.latitude)).set_crs(epsg=z_epsg_wgs84)
strikes_ordnungsamt = gp.GeoDataFrame(df_ordnungsamt,geometry=gp.points_from_xy(df_ordnungsamt.longitude, df_ordnungsamt.latitude)).set_crs(epsg=z_epsg_wgs84)





# merge spatially with kreise & wahlkreise to know in which ags they lie
strikes_ags = gp.sjoin(strikes, kreise[['AGS', 'geometry']], how='left', op='within')
strikes_ags.drop('index_right', axis=1, inplace=True)


strikes_ags = gp.sjoin(strikes_ags, wahlkreise[['WKR_NR', 'geometry']],
                       how='left', op='within')
strikes_ags.drop('index_right', axis=1, inplace=True)


strikes_ags = gp.sjoin(strikes_ags, teralytics[['FID', 'geometry']])
strikes_ags.drop('index_right', axis=1, inplace=True)


strikes_ags.rename(columns={'AGS':'ags5',
                            'WKR_NR':'wkr_nr',
                            'FID':'teralytics_id'}, inplace=True)
Ejemplo n.º 56
0
 def test_sjoin_inner(self):
     df = sjoin(self.pointdf, self.polydf, how="inner")
     assert df.shape == (11, 8)
Ejemplo n.º 57
0
def _consolidate_intersections_rebuild_graph(G,
                                             tolerance=10,
                                             reconnect_edges=True):
    """
    Consolidate intersections comprising clusters of nearby nodes.

    Merge nodes and return a rebuilt graph with consolidated intersections and
    reconnected edge geometries.

    The tolerance argument should be adjusted to approximately match street
    design standards in the specific street network, and you should always use
    a projected graph to work in meaningful and consistent units like meters.

    Returned graph's node IDs represent clusters rather than osmids. Refer to
    nodes' osmid attributes for original osmids. If multiple nodes were merged
    together, the osmid attribute is a list of merged nodes' osmids.

    Parameters
    ----------
    G : networkx.MultiDiGraph
        a projected graph
    tolerance : float
        nodes are buffered to this distance (in graph's geometry's units) and
        subsequent overlaps are dissolved into a single node
    reconnect_edges : bool
        ignored if rebuild_graph is not True. if True, reconnect edges and
        their geometries in rebuilt graph to the consolidated nodes and update
        edge length attributes; if False, returned graph has no edges (which
        is faster if you just need topologically consolidated intersection
        counts).

    Returns
    -------
    H : networkx.MultiDiGraph
        a rebuilt graph with consolidated intersections and reconnected
        edge geometries
    """
    # STEP 1
    # buffer nodes to passed-in distance and merge overlaps. turn merged nodes
    # into gdf and get centroids of each cluster as x, y
    node_clusters = gpd.GeoDataFrame(
        geometry=_merge_nodes_geometric(G, tolerance))
    centroids = node_clusters.centroid
    node_clusters["x"] = centroids.x
    node_clusters["y"] = centroids.y

    # STEP 2
    # attach each node to its cluster of merged nodes. first get the original
    # graph's node points then spatial join to give each node the label of
    # cluster it's within
    node_points = utils_graph.graph_to_gdfs(G, edges=False)[["geometry"]]
    gdf = gpd.sjoin(node_points, node_clusters, how="left", op="within")
    gdf = gdf.drop(columns="geometry").rename(
        columns={"index_right": "cluster"})

    # STEP 3
    # if a cluster contains multiple components (i.e., it's not connected)
    # move each component to its own cluster (otherwise you will connect
    # nodes together that are not truly connected, e.g., nearby deadends or
    # surface streets with bridge).
    groups = gdf.groupby("cluster")
    for cluster_label, nodes_subset in groups:
        if len(nodes_subset) > 1:
            # identify all the (weakly connected) component in cluster
            wccs = list(
                nx.weakly_connected_components(G.subgraph(nodes_subset.index)))
            if len(wccs) > 1:
                # if there are multiple components in this cluster
                suffix = 0
                for wcc in wccs:
                    # set subcluster xy to the centroid of just these nodes
                    subcluster_centroid = node_points.loc[
                        wcc].unary_union.centroid
                    gdf.loc[wcc, "x"] = subcluster_centroid.x
                    gdf.loc[wcc, "y"] = subcluster_centroid.y
                    # move to subcluster by appending suffix to cluster label
                    gdf.loc[wcc, "cluster"] = f"{cluster_label}-{suffix}"
                    suffix += 1

    # STEP 4
    # create new empty graph and copy over misc graph data
    H = nx.MultiDiGraph()
    H.graph = G.graph

    # STEP 5
    # create a new node for each cluster of merged nodes
    # regroup now that we potentially have new cluster labels from step 3
    groups = gdf.groupby("cluster")
    for cluster_label, nodes_subset in groups:

        osmids = nodes_subset.index.to_list()
        if len(osmids) == 1:
            # if cluster is a single node, add that node to new graph
            H.add_node(cluster_label, **G.nodes[osmids[0]])
        else:
            # if cluster is multiple merged nodes, create one new node to
            # represent them
            H.add_node(
                cluster_label,
                osmid=str(osmids),
                x=nodes_subset["x"].iloc[0],
                y=nodes_subset["y"].iloc[0],
            )

    if not G.edges or not reconnect_edges:
        # if reconnect_edges is False or there are no edges in original graph
        # (after dead-end removed), then skip edges and return new graph as-is
        return H

    # STEP 6
    # create new edge from cluster to cluster for each edge in original graph
    gdf_edges = utils_graph.graph_to_gdfs(G, nodes=False)
    for u, v, k, data in G.edges(keys=True, data=True):
        u2 = gdf.loc[u, "cluster"]
        v2 = gdf.loc[v, "cluster"]

        # only create the edge if we're not connecting the cluster
        # to itself, but always add original self-loops
        if (u2 != v2) or (u == v):
            data["u_original"] = u
            data["v_original"] = v
            if "geometry" not in data:
                data["geometry"] = gdf_edges.loc[(u, v, k), "geometry"]
            H.add_edge(u2, v2, **data)

    # STEP 7
    # for every group of merged nodes with more than 1 node in it, extend the
    # edge geometries to reach the new node point
    for cluster_label, nodes_subset in groups:

        # but only if there were multiple nodes merged together,
        # otherwise it's the same old edge as in original graph
        if len(nodes_subset) > 1:

            # get coords of merged nodes point centroid to prepend or
            # append to the old edge geom's coords
            x = H.nodes[cluster_label]["x"]
            y = H.nodes[cluster_label]["y"]
            xy = [(x, y)]

            # for each edge incident to this new merged node, update its
            # geometry to extend to/from the new node's point coords
            in_edges = set(H.in_edges(cluster_label, keys=True))
            out_edges = set(H.out_edges(cluster_label, keys=True))
            for u, v, k in in_edges | out_edges:
                old_coords = list(H.edges[u, v, k]["geometry"].coords)
                new_coords = xy + old_coords if cluster_label == u else old_coords + xy
                new_geom = LineString(new_coords)
                H.edges[u, v, k]["geometry"] = new_geom

                # update the edge length attribute, given the new geometry
                H.edges[u, v, k]["length"] = new_geom.length

    return H
def write_source_models(version=0,
                        full=False,
                        use_recomputed=False,
                        prefix='nt2012'):
    '''
    Writes all source models.
    '''
    # compute some filenames
    if use_recomputed:
        smoothed_data_path = RECOMPUTED_DATA_PATH
        smoothed_prefix = 'recomputed'
    else:
        smoothed_data_path = ORIGINAL_DATA_PATH
        smoothed_prefix = prefix
    layers_df = pd.read_csv(LAYERS_FORMAT % version, index_col='layerid')

    # load electronic supplement for areal zones
    df_erroneous = pd.read_csv(StringIO('''\
    zoneid layerid strike dip rake
    14     1       228    69  330
    914    1       192    46  124
    '''),
                               sep=r'\s+',
                               index_col='zoneid')

    print('Reading areal polygons and seismicity statistics for each layer')
    areal_dfs = []
    for layer_id in layers_df.index:

        # read seismicity and polygons and join them
        seismicity_file = os.path.join(ORIGINAL_DATA_PATH,
                                       SEISMICITY_FORMAT % layer_id)
        print('Reading: ' + os.path.abspath(seismicity_file))
        seismicity_df = pd.read_csv(seismicity_file)
        seismicity_df.set_index('zoneid', inplace=True, verify_integrity=True)
        seismicity_df.rename(columns=SEISMICITY_ALIASES, inplace=True)

        # preserve errors in electonic supplement in version v0
        if int(version) == 0:
            if layer_id == 4:
                (seismicity_df.loc[169],
                 seismicity_df.loc[170]) = \
                    (seismicity_df.loc[170].copy(),
                     seismicity_df.loc[169].copy())
                print('Swapped seismicity parameters for zones 169 and 170.')

            for zoneid, row in df_erroneous[df_erroneous.layerid ==
                                            layer_id].iterrows():
                row = row.drop('layerid')
                for column in row.keys():
                    seismicity_df.loc[zoneid, column] = row[column]
                print('Restored zone %d erroneous %s: %s' %
                      (zoneid, row.keys().values, row.values))

        polygon_file = os.path.join(ORIGINAL_DATA_PATH,
                                    POLYGON_FORMAT % layer_id)
        print('Reading: ' + os.path.abspath(polygon_file))
        polygon_df = read_polygons(polygon_file)
        polygon_df.set_index('zoneid', inplace=True, verify_integrity=True)

        df = seismicity_df.join(polygon_df, how='outer')

        # add layer info
        df.insert(0, 'layerid', layer_id)
        areal_dfs.append(df)

    # put it all together
    columns = list(
        unique_everseen([column for column in df.columns for df in areal_dfs]))
    areal_df = pd.concat(areal_dfs, sort=True)[columns].sort_index()

    # auxiliary information
    aux_file = AUX_FORMAT % int(version)
    print('\nReading: ' + os.path.abspath(aux_file))
    aux_df = pd.read_csv(aux_file, index_col='zoneid').sort_index()
    assert (areal_df.index == aux_df.index).all()
    if 'layerid' in aux_df:
        aux_df.drop(columns='layerid', inplace=True)
    areal_df = areal_df.join(aux_df)

    # assign undefined focal mechanisms as reverse faulting - shouldn't matter
    undefined = areal_df['dip'] == -1
    areal_df.loc[undefined, 'rake'] = 90
    areal_df.loc[undefined, 'dip'] = 45
    areal_df.loc[undefined, 'strike'] = 0

    # augment areal zone description tables
    areal_df = areal_df.join(layers_df, on='layerid')
    areal_df['rake'] = wrap(areal_df['rake'])
    areal_df['mechanism'] = focal_mech(areal_df['dip'], areal_df['rake'])
    areal_df['new style'] = faulting_style(areal_df['strike'], areal_df['dip'],
                                           areal_df['rake'])
    areal_df['strike2'], areal_df['dip2'], areal_df['rake2'] = zip(*[
        aux_plane(strike, dip, rake) for strike, dip, rake in zip(
            areal_df['strike'], areal_df['dip'], areal_df['rake'])
    ])
    areal_df['mechanism2'] = focal_mech(areal_df['dip2'], areal_df['rake2'])

    areal_df['mmin'] = MIN_MAGS[0]

    areal_df['strike2'] = areal_df['strike2'].round(1)
    areal_df['dip2'] = areal_df['dip2'].round(1)
    areal_df['rake2'] = areal_df['rake2'].apply(wrap)
    areal_df['rake2'] = areal_df['rake2'].round(1)
    areal_df['rake2'] = areal_df['rake2'].apply(wrap)

    swap = areal_df['focal plane'] == 'secondary'
    print('Treating %d focal planes as secondary: %s' %
          (sum(swap), ', '.join(str(item) for item in areal_df.index[swap])))
    for column in ['strike', 'dip', 'rake', 'mechanism']:
        areal_df.loc[swap, [column, column + '2']] = \
            areal_df.loc[swap, [column + '2', column]].values

    # grab mmax and bvalue from zone above if mmax zero for this zone
    check_keys = ['mmax', 'b']
    none_found = True
    for i, area_series in areal_df[(areal_df[check_keys] == 0).any(
            axis=1)].iterrows():
        alternate_zone = int(area_series.name / 10)
        for key in check_keys:
            if area_series['a'] != 0 and area_series[key] == 0:
                print('For zone %d taking %s from zone %d' %
                      (area_series.name, key, alternate_zone))
                areal_df.at[i, key] = areal_df.at[alternate_zone, key]
                none_found = False
    if none_found:
        print('SUCCESS: All zones already have mmax & b defined.')

    # write areal CSV
    areal_source_model_base = AREAL_MODEL_FORMAT % (prefix, int(version))
    areal2csv(areal_df, areal_source_model_base)

    # write areal NRML
    mark = time()
    df2nrml(areal_df, areal_source_model_base)
    print('Finished writing areal model to NRML: %s\n' %
          pd.to_timedelta(time() - mark, unit='s'))

    # read logic tree description table
    source_tree_tsv = SOURCE_TREE_FORMAT % int(version)
    print('Logic tree before collapse:')
    source_tree_symbolic_df = read_tree_tsv(source_tree_tsv)
    print(source_tree_symbolic_df)

    # compute collapsed rates
    areal_collapsed_df, collapsed_tree_df, _, _ = \
        collapse_sources(areal_df, source_tree_symbolic_df)

    print('Logic tree after collapse:')
    print(collapsed_tree_df)

    # write areal sources to NRML
    mark = time()
    areal_collapsed_model_base = areal_source_model_base + ' collapsed'
    df2nrml(areal_collapsed_df, areal_collapsed_model_base)
    print('Finished writing collapsed areal model to NRML: %s\n' %
          pd.to_timedelta(time() - mark, unit='s'))

    # completeness tables
    print('Reading completeness tables.')
    completeness_df = pd.read_csv(
        '../Data/thingbaijam2011seismogenic/Table1.csv',
        header=[0, 1],
        index_col=[0, 1])
    completeness_df.columns = [
        ' '.join(col).strip() for col in completeness_df.columns.values
    ]

    # electronic supplement for smoothed-gridded model
    print('Reading smoothed seismicity data ...')
    smoothed_data_format = os.path.join(smoothed_data_path, SMOOTHED_FORMAT)

    mark = time()
    smoothed_df_list = []
    for i, min_mag in enumerate(MIN_MAGS):
        layer_smoothed_df_list = []
        for layer_id, layer in layers_df.join(completeness_df,
                                              on=['zmin', 'zmax']).iterrows():

            layer_smoothed_df = pd.read_csv(smoothed_data_format %
                                            (layer_id, min_mag))
            nu_mag = 'nu%s' % str(min_mag).replace('.', '_')

            rename_cols = {nu_mag: 'nu', 'lat': 'latitude', 'lon': 'longitude'}
            layer_smoothed_df.rename(columns=rename_cols, inplace=True)

            layer_smoothed_df['layerid'] = layer_id
            layer_smoothed_df['mmin model'] = min_mag
            layer_smoothed_df['mmin'] = min_mag
            layer_smoothed_df['duration'] = (layer[str(min_mag) + ' end'] -
                                             layer[str(min_mag) + ' start'] +
                                             1)
            if use_recomputed:
                layer_smoothed_df['lambda'] = layer_smoothed_df['nu']
                layer_smoothed_df['nu'] = (layer_smoothed_df['lambda'] *
                                           layer_smoothed_df['duration'])
            else:
                layer_smoothed_df['lambda'] = (layer_smoothed_df['nu'] /
                                               layer_smoothed_df['duration'])

            layer_smoothed_df_list.append(layer_smoothed_df)

        layer_smoothed_df = pd.concat(layer_smoothed_df_list,
                                      ignore_index=True)
        smoothed_df_list.append(layer_smoothed_df)

    smoothed_df = pd.concat(smoothed_df_list, ignore_index=True)
    len_smoothed = smoothed_df.shape[0]
    smoothed_df.sort_values(['layerid', 'mmin model', 'longitude', 'latitude'])
    smoothed_df['geometry'] = [
        Point(longitude, latitude) for longitude, latitude in zip(
            smoothed_df['longitude'], smoothed_df['latitude'])
    ]
    smoothed_df = gpd.GeoDataFrame(smoothed_df, crs='WGS84')

    print('Read %d point sources from %d files: %s\n' %
          (len(smoothed_df), len(MIN_MAGS) * len(layers_df),
           pd.to_timedelta(time() - mark, unit='s')))

    # associate smoothed-gridded points with zones
    # we are only interested in active zones
    active_areal_df = areal_df[areal_df['a'] != 0].reset_index()

    print('Associate point sources in areal zones with those zones ...')
    # quick, requires no transformations
    mark = time()

    smoothed_df['distance'] = np.inf
    smoothed_dfs = []
    for layer_id in layers_df.index:
        smoothed_layer_df = smoothed_df[smoothed_df['layerid'] == layer_id]
        areal_layer_df = gpd.GeoDataFrame(
            active_areal_df[active_areal_df['layerid'] == layer_id],
            crs='WGS84')
        smoothed_layer_df = gpd.sjoin(
            smoothed_layer_df,
            areal_layer_df[['zoneid', 'a', 'geometry']],
            how='left',
            op='within')
        smoothed_dfs.append(smoothed_layer_df)
    smoothed_df = pd.concat(smoothed_dfs)
    smoothed_df.drop(columns='index_right', inplace=True)

    smoothed_df['in zoneid'] = smoothed_df['zoneid'].copy()

    assigned = (~np.isnan(smoothed_df['in zoneid'])) & (smoothed_df['a'] != 0)
    smoothed_df.loc[assigned, 'distance'] = 0
    print('Spatial join accounted for %.2f%% of sources: %s\n' %
          (100 * len(smoothed_df[assigned]) / len(smoothed_df),
           pd.to_timedelta(time() - mark, unit='s')))

    unassigned_zones = (
        set(active_areal_df.zoneid.unique()) -
        set(smoothed_df[pd.notnull(smoothed_df.zoneid)].zoneid.unique()))
    if list(unassigned_zones):
        raise RuntimeError('Zones not assigned to any point: ' +
                           str(sorted(list(unassigned_zones))))
    else:
        print('SUCCESS: All active areal zones assigned to at least one point')

    # no point should be associated with multiple zones
    id_columns = ['latitude', 'longitude', 'layerid', 'mmin']
    duplicated_df = smoothed_df[smoothed_df.duplicated(
        subset=id_columns, keep=False)].sort_values(id_columns + ['zoneid'])
    if duplicated_df.empty:
        print('SUCCESS: No grid point fell in multiple areal zones')
    else:
        duplicated_df.to_csv('smoothed_duplicated.csv')

        point_a = duplicated_df.iloc[0]
        point_b = duplicated_df.iloc[1]
        zone_a = active_areal_df.at[int(point_a.zoneid), 'geometry']
        zone_b = active_areal_df.at[int(point_b.zoneid), 'geometry']

        _, ax = plt.subplots()
        ax.add_patch(PolygonPatch(zone_a, alpha=0.5))
        ax.add_patch(PolygonPatch(zone_b, alpha=0.5))
        ax.scatter(duplicated_df['longitude'], duplicated_df['latitude'])
        ax.set_xlim((point_a.longitude - 5, point_a.longitude + 5))
        ax.set_ylim((point_a.latitude - 5, point_a.latitude + 5))
        ax.set_aspect(1)

        print(int(point_a.zoneid), point_a.layerid,
              dumps(zone_a, rounding_precision=2))
        print(int(point_b.zoneid), point_a.layerid,
              dumps(zone_b, rounding_precision=2))
        print(point_a.longitude, point_a.latitude)
        raise RuntimeError('Points assigned to multiple zones.')

    # associate points nearest to zones
    print('Find nearest areal zones for remaining points ...')
    mark = time()
    active_areal_df['polygon'] = [
        MyPolygon([
            geo.point.Point(lat, lon)
            for lat, lon in zip(*zone.geometry.exterior.coords.xy)
        ]) for _, zone in active_areal_df.iterrows()
    ]

    unassigned_df = smoothed_df.loc[~assigned].copy()
    distances = np.full((len(unassigned_df), len(active_areal_df)), np.inf)
    for i, area_series in active_areal_df.iterrows():
        in_layer = (unassigned_df['layerid'] == area_series['layerid']).values
        mesh = geo.mesh.Mesh(unassigned_df.loc[in_layer, 'longitude'].values,
                             unassigned_df.loc[in_layer, 'latitude'].values)
        distances[in_layer, i] = area_series['polygon'].distances(mesh)

    unassigned_df.loc[:, 'zoneid'] = active_areal_df.loc[
        np.argmin(distances, axis=1), 'zoneid'].values
    unassigned_df.loc[:, 'distance'] = np.amin(distances, axis=1)

    print('Nearest zone required for %.0f%% of sources: %s\n' %
          (100 * len(unassigned_df) / len(smoothed_df),
           pd.to_timedelta(time() - mark, unit='s')))

    smoothed_df = pd.concat((smoothed_df[assigned], unassigned_df))

    # copy parameters of nearest areal zone
    print('For each point source, copy parameters of nearest areal zone')
    columns_to_copy = [
        'zoneid', 'zmax', 'zmin', 'hypo_depth', 'tectonic subregion', 'a', 'b',
        'stdb', 'mmax', 'stdmmax', 'rake', 'dip', 'strike', 'aspect ratio',
        'msr'
    ]
    smoothed_df.drop(columns=['a'], inplace=True)
    smoothed_df = smoothed_df.merge(active_areal_df[columns_to_copy],
                                    on='zoneid')
    smoothed_df['a'] = (np.log10(smoothed_df['lambda']) +
                        smoothed_df['b'] * smoothed_df['mmin model'])
    assert len_smoothed == smoothed_df.shape[0]

    # check for unassigned parameters
    display_drop = [
        'zmax', 'zmin', 'aspect ratio', 'msr', 'rake', 'dip', 'strike', 'stdb',
        'stdmmax'
    ]
    no_zoneid_df = smoothed_df[smoothed_df['zoneid'].isnull()]
    no_mmax_df = smoothed_df[smoothed_df['mmax'] == 0]
    no_b_df = smoothed_df[smoothed_df['b'] == 0]
    if not no_zoneid_df.empty:
        print(no_zoneid_df.drop(display_drop, axis=1).head())
        RuntimeError("Leftover points with no assigned zone id")
    if not no_mmax_df.empty:
        print(no_mmax_df.drop(display_drop, axis=1).head())
        RuntimeError("Leftover points with no assigned mmax")
    if not no_b_df.empty:
        print(no_b_df.drop(display_drop, axis=1).head())
        RuntimeError("Leftover points with no assigned b")

    if (no_mmax_df.empty and no_b_df.empty and no_zoneid_df.empty):
        print("SUCCESS: No points with unassigned MFD or zone")
    else:
        raise RuntimeError('Unassigned parameters remain.')

    # Thinning of models allows quick testing and git archiving of a sample
    res_deg = 1
    thinned_df = smoothed_df.loc[
        np.isclose(np.remainder(smoothed_df['latitude'], res_deg), 0)
        & np.isclose(np.remainder(smoothed_df['longitude'], res_deg), 0)].copy(
        )
    print('Thinning to %g° spacing reduces number of points from %d to %d.\n' %
          (res_deg, len(smoothed_df), len(thinned_df)))

    # write thinned models
    mark = time()
    thinned_base = (SMOOTHED_MODEL_FORMAT.replace('v%d', '') + 'thinned ' +
                    'v%d') % (smoothed_prefix, int(version))
    points2csv(thinned_df, thinned_base)
    points2nrml(thinned_df, thinned_base)
    print('Wrote %d thinned smoothed-gridded sources to CSV & NRML: %s\n' %
          (len(thinned_df), pd.to_timedelta(time() - mark, unit='s')))

    thinned_collapsed_df, collapsed_tree_df, _, _ = \
        collapse_sources(thinned_df, source_tree_symbolic_df)

    points2nrml(thinned_collapsed_df, thinned_base + ' collapsed')
    print(
        'Wrote %d collapsed thinned sources to CSV & NRML: %s\n' %
        (len(thinned_collapsed_df), pd.to_timedelta(time() - mark, unit='s')))

    # write full smoothed-gridded models (~10 minutes)
    if full:

        mark = time()
        smoothed_model_base = SMOOTHED_MODEL_FORMAT % (smoothed_prefix,
                                                       int(version))
        points2csv(smoothed_df, smoothed_model_base)
        points2nrml(smoothed_df, smoothed_model_base, by='mmin model')
        print('Wrote %d full smoothed-gridded sources to CSV & NRML: %s\n' %
              (len(smoothed_df), pd.to_timedelta(time() - mark, unit='s')))

        # write collapsed smoothed-gridded sources to NRML (~10 minutes)

        mark = time()
        smoothed_collapsed_df, collapsed_tree_df, _, _ = \
            collapse_sources(smoothed_df, source_tree_symbolic_df)

        points2nrml(smoothed_collapsed_df, smoothed_model_base + ' collapsed')
        print(
            'Wrote %d collapsed smoothed-gridded sources to CSV & NRML: %s\n' %
            (len(smoothed_collapsed_df),
             pd.to_timedelta(time() - mark, unit='s')))
Ejemplo n.º 59
0
 def time_sjoin(self, op):
     sjoin(self.df1, self.df2, op=op)
corpus = args.corpus
data=[]

with open(corpus) as f:
    for i,l in enumerate(f):
        t=ujson.loads(l)
        if ("hashtags" in t) :
            data.append({'geometry':shapely.geometry.Point(t['geo']['longitude'],t['geo']['latitude']), 'melt':t['melt'], 'hashtags':t['hashtags']})
        else :
            data.append({'geometry':shapely.geometry.Point(t['geo']['longitude'],t['geo']['latitude']), 'melt':t['melt']})
allTweets=gpd.GeoDataFrame(data)
allTweets = allTweets.where(pd.notnull(allTweets), None)
allTweets.crs=departements.crs

sys.stdout.write("1/6 - Jointure entre les tweets et les départements\n")
allTweets_with_departments = gpd.sjoin(allTweets,departements, how="inner", op='intersects')

sys.stdout.write("2/6 - Sauvegarde de la jointure dans le .csv\n")
allTweets_with_departments.to_csv('tweets_with_departments.csv')


## freq by departement

def freqByDep(annotationType) :
    freqParDepartement=defaultdict(lambda : defaultdict(int))
    for i in range(allTweets_with_departments.shape[0]):
        percentage = round(i*100/allTweets_with_departments.shape[0])
        sys.stdout.write("\r3/6 - Calcul des fréquences - "+str(annotationType)+" : "+str(percentage)+"%")
        if annotationType == "hashtags" :
            if (allTweets_with_departments.iloc[i][annotationType] is not None) :
                for h in allTweets_with_departments.iloc[i][annotationType]: