def agg_catch(catch_del_shp, catch_sites_csv, catch_sites_col=['GRIDCODE', 'SITE'], catch_col='GRIDCODE'): """ Function to take the output of the ArcGIS catchment delineation polygon shapefile and cathcment sites csv and return a shapefile with appropriately delineated polygons. """ ## Catchment areas shp catch = read_file(catch_del_shp)[[catch_col, 'geometry']] ## dissolve the polygon catch3 = catch.dissolve(catch_col) ## Determine upstream catchments catch_df, singles_df = catch_net(catch_sites_csv, catch_sites_col) base1 = catch3[in1d(catch3.index, singles_df)].geometry for i in catch_df.index: t1 = append(catch_df.loc[i, :].dropna().values, i) t2 = GeoSeries(catch3[in1d(catch3.index, t1)].unary_union, index=[i]) base1 = GeoSeries(concat([base1, t2])) ## Convert to GeoDataFrame (so that all functions can be applied to it) base2 = GeoDataFrame(base1.index, geometry=base1.geometry.values, crs=catch.crs) base2.columns = ['site', 'geometry'] return (base2)
def _standardize(cls, geo_df: gpd.GeoDataFrame, source: Union[str, Path, gpd.GeoDataFrame], feature_type_str: str): """ Standardize the format of a given catchment/nexus geodataframe. Method first makes all column names lower case. Then, the unless initial index already has a name of ``id`` (or some case-insensitive equivalent), the ``id`` column is set as the index. An error is raised this cannot be done because there is no ``id`` column. In the case of the index name already being a case-insensitive equivalent of ``id``, but not ``id`` precisely (e.g., ``ID``), the index's name is also standardized to ``id``. Parameters ---------- geo_df : gpd.GeoDataFrame The geodataframe in question. source : Union[str, Path, gpd.GeoDataFrame] Either the source file for the data or a passed "base" geodataframe (when file, included in error messages). feature_type_str : str A string describing the type of feature for this data. """ geo_df.columns = geo_df.columns.astype(str).str.lower() # Standardize capitalization if it looks like this is already set properly if geo_df.index.name != 'id' and str(geo_df.index.name).lower() == 'id': geo_df.index.name = 'id' # Otherwise, set the index as the 'id' column elif geo_df.index.name != 'id': # This requires 'id' column to be present of course if 'id' not in geo_df.columns: # Adjust error message depending on whether the source was an existing dataframe or a data file if not isinstance(source, gpd.GeoDataFrame): msg = 'Bad format of {} file {}: no \'id\' or \'ID\' column'.format(feature_type_str, source) else: msg = 'Bad format of {} dataframe: no \'id\' or \'ID\' column'.format(feature_type_str) raise RuntimeError(msg.format(msg)) geo_df.set_index('id', inplace=True)
def pretty_plot(gg: GeoDataFrame, islands: GeoDataFrame, poly_viewsheds: GeoDataFrame, save_figure_to: str, proj=PROJECTION): x = gg[gg.apply(lambda x: not x.is_empty and x.area > 1e-9)] xa = GeoDataFrame(x.centroid, geometry=0, crs=islands.crs) xa.columns = ['geometry'] xa_tmp = xa.reset_index() xa_tmp['idx'] = xa_tmp.apply(lambda y: (y.idx_a, y.idx_b), axis=1) xa_tmp['idx_other'] = xa_tmp.apply(lambda y: (y.idx_b, y.idx_a), axis=1) xa_tmp = xa_tmp.set_index('idx') paths = xa_tmp.join(xa_tmp, on='idx_other', lsuffix='_ab', rsuffix='_ba') paths = paths[paths.apply(lambda y: y.geometry_ab is not np.nan and y.geometry_ba is not np.nan, axis=1)] ax = gplt.polyplot( islands, projection=proj, figsize=(20, 20), color='darkgray' ) gplt.polyplot( poly_viewsheds, projection=proj, ax=ax, linewidth=0, facecolor='lightgray', alpha=0.3 ) gplt.polyplot( x, projection=proj, ax=ax, linewidth=0, facecolor='red', alpha=0.3 ) gplt.sankey( paths, start='geometry_ab', end='geometry_ba', ax=ax, projection=proj, alpha=0.05, rasterized=False ) plt.savefig(save_figure_to)
def data_frame_2_geo_data_frame(data_frame: DataFrame) -> GeoDataFrame: """ Creates a ``GeoDataFrame`` from the given ``DataFrame`` Args: data_frame (DataFrame): ``DataFrame`` that is used to create a ``GeoDataFrame`` with geometric information. Returns: GeoDataFrame: Created ``GeoDataFrame``. """ data_frame_sorted = data_frame["x"].sort_values() tile_size = data_frame_sorted[1] - data_frame_sorted[0] geo_data_frame = GeoDataFrame([[x, y, height, Polygon(((x, y), (x + tile_size, y), (x + tile_size, y + tile_size), (x, y + tile_size), (x, y)))] for x, y, height in data_frame.values], crs={"init": "epsg:25833"}) # Coordinate System Source: https://www.stadtentwicklung.berlin.de/geoinformation/landesvermessung/atkis/de/dgm.shtml geo_data_frame.columns = ["x", "y", "height", "geometry"] return geo_data_frame
def convert_data_cols_to_datetime( self, gdf: geopandas.GeoDataFrame) -> geopandas.GeoDataFrame: """ Convert all data columns to datetime with `pd.to_datetime` Args: gdf (geopandas.GeoDataFrame): Input GeoDataFrame Returns: geopandas.GeoDataFrame: GeoDataFrame with data columns converted to `Timestamp` """ converted_column_names = [] for col in gdf.columns: if col != "geometry": col = pd.to_datetime(col) converted_column_names.append(col) gdf.columns = converted_column_names return gdf
def format_gdf(gdf: gpd.GeoDataFrame, *, index_col: str = None) -> gpd.GeoDataFrame: """A function to prepare a GeoDataFrame for usage. Args: gdf (GeoDataFrame): The GeoDataFrame to format. index_col (str, optional): Defaults to ``None``. The name of the column in `gdf` to use as the index. Returns: gpd.GeoDataFrame """ gdf = gdf.to_crs(epsg=26917) gdf.columns = gdf.columns.str.lower() if index_col is not None: gdf = gdf.set_index(index_col.lower()).sort_index() gdf['geometry'] = gdf['geometry'].apply( lambda x: loads(dumps(x, output_dimension=2))) # flatten 3d to 2d return gdf
def _import_gdf( gdf: GeoDataFrame, sql_tablename: str, geom_type: str, uri: str = DEFAULT_DB_URI ) -> None: """ Import a geopandas GeoDataFrame to SQL """ gdf.columns = [x.lower() for x in gdf.columns] epsg_code = int(str(gdf.crs).split(":")[1]) gdf["geom"] = gdf["geometry"].apply(lambda x: WKTElement(x.wkt, srid=epsg_code)) gdf.drop("geometry", 1, inplace=True) engine = sqlalchemy.create_engine(uri) gdf.to_sql( sql_tablename, engine, dtype={"geom": Geometry(geom_type.upper(), srid=epsg_code)}, if_exists="replace", ) engine.dispose()
def func(arg): last_idterm_idx, idterm = arg # for last_track_idx, idterm in enumerate(idterms_cars): print(idterm) idterm = str(idterm) # print('VIASAT GPS track:', track_ID) viasat_data = pd.read_sql_query( ''' SELECT * FROM public.routecheck_2019 WHERE idterm = '%s' ''' % idterm, conn_HAIG) if len(viasat_data) > 0: viasat_data = viasat_data.sort_values('timedate') ## add a field with the "NEXT timedate" in seconds viasat_data['next_totalseconds'] = viasat_data.totalseconds.shift(-1) viasat_data['next_timedate'] = viasat_data.timedate.shift(-1) viasat_data['next_totalseconds'] = viasat_data[ 'next_totalseconds'].astype('Int64') viasat_data['next_totalseconds'] = viasat_data[ 'next_totalseconds'].fillna(0) viasat_data['next_lon'] = viasat_data.longitude.shift( -1) # longitude of the next trip viasat_data['next_lat'] = viasat_data.latitude.shift( -1) # latitude of the next trip all_trips = list(viasat_data.idtrajectory.unique()) ### initialize an empty dataframe # route_CATANIA = pd.DataFrame([]) for idx, idtrajectory in enumerate(all_trips): # idtrajectory = 122344050 # print(idtrajectory) ## filter data by idterm and by idtrajectory (trip) data = viasat_data[viasat_data.idtrajectory == idtrajectory] ## group by TRIP_ID, check numbers of line, if > 1 then only get the one with larger number of lines counts_TRIP_ID = data.groupby( data[['TRIP_ID']].columns.tolist(), sort=False).size().reset_index().rename(columns={0: 'counts'}) data = data[data.TRIP_ID == counts_TRIP_ID[ counts_TRIP_ID.counts == max( counts_TRIP_ID.counts)].TRIP_ID[0]] ### zip the coordinates into a point object and convert to a GeoData Frame #### if len(data) > 3: geometry = [ Point(xy) for xy in zip(data.longitude, data.latitude) ] df = GeoDataFrame(data, geometry=geometry) # Aggregate these points with the GroupBy df = df.groupby([ 'idtrajectory' ])['geometry'].apply(lambda x: LineString(x.tolist())) df = GeoDataFrame(df, geometry='geometry') # df.plot() df.columns = ['geometry'] idtrace_o = data[data.segment == min(data.segment)][[ 'id' ]].iloc[0][0] idtrace_d = data[data.segment == max(data.segment)][[ 'id' ]].iloc[0][0] # latitude_o = data[data.segment == min(data.segment)][['latitude']].iloc[0][0] ## at the ORIGIN # longitude_o = data[data.segment == min(data.segment)][['longitude']].iloc[0][0] ## at the ORIGIN # latitude_d = data[data.segment == max(data.segment)][['latitude']].iloc[0][0] ## at the DESTINATION # longitude_d = data[data.segment == max(data.segment)][['longitude']].iloc[0][0] ## at the DESTINATION timedate = str(data[data.segment == min(data.segment)][[ 'timedate' ]].iloc[0][0]) ## at the ORIGIN ## trip distance in meters (sum of the increment of the "progressive" ## add a field with the "previous progressive" data['last_progressive'] = data.progressive.shift() # <------- data['last_progressive'] = data['last_progressive'].astype( 'Int64') data['last_progressive'] = data['last_progressive'].fillna(0) ## compute increments of the distance (in meters) data['increment'] = data.progressive - data.last_progressive ## sum all the increments tripdistance_m = sum( data['increment'][1:len(data['increment'])][ data.increment > 0]) ## trip time in seconds (duration) time_o = data[data.segment == min(data.segment)][['path_time' ]].iloc[0][0] time_d = data[data.segment == max(data.segment)][['path_time' ]].iloc[0][0] triptime_s = time_d - time_o # time_o = data[data.segment == min(data.segment)][['totalseconds']].iloc[0][0] # time_d = data[data.segment == max(data.segment)][['totalseconds']].iloc[0][0] # triptime_s = time_d - time_o checkcode = data[data.segment == min(data.segment)][[ 'anomaly' ]].iloc[0][0] ## at the ORIGIN ## intervallo di tempo tra un l'inizio di due viaggi successivi breaktime_s = data[data.segment == max(data.segment)][['next_timedate']].iloc[0][0] - \ data[data.segment == max(data.segment)][['timedate']].iloc[0][0] breaktime_s = breaktime_s.total_seconds() if breaktime_s < 0: breaktime_s = None ### get distance between the position of two consecutive TRIPS (from END of a TRIP to START of a NEW TRIP) lon_end = data[data.segment == max(data.segment)][[ 'longitude' ]].iloc[0][0] # longitude at the END of a TRIP lat_end = data[data.segment == max(data.segment)][[ 'latitude' ]].iloc[0][0] lon_start = data[data.segment == max(data.segment)][[ 'next_lon' ]].iloc[0][0] # longitude at the START of a NEW TRIP lat_start = data[data.segment == max(data.segment)][[ 'next_lat' ]].iloc[0][0] ### find distance between coordinates of two consecutive TRIPS in METERS!!! ### end = (37.571518, 14.895852) ### start = (37.570873, 14.896243) deviation_pos = great_circle_track_node( lon_end, lat_end, lon_start, lat_start) ### build the final dataframe ("route" table) if tripdistance_m > 0: df_ROUTE = pd.DataFrame({ 'idtrajectory': [idtrajectory], 'idterm': [idterm], 'idtrace_o': [idtrace_o], 'idtrace_d': [idtrace_d], # 'latitude_o': [latitude_o], # 'longitude_o': [longitude_o], # 'latitude_d': [latitude_d], # 'longitude_d': [longitude_d], 'timedate_o': [timedate], 'tripdistance_m': [tripdistance_m], 'triptime_s': [triptime_s], 'checkcode': [checkcode], 'breaktime_s': [breaktime_s] }) geom = df['geometry'].apply(wkb_hexer) df_ROUTE['geom'] = geom.iloc[0] df_ROUTE['deviation_pos_m'] = deviation_pos # route_CATANIA = route_CATANIA.append(df_ROUTE) connection = engine.connect() df_ROUTE.to_sql("PROVA_route_2019", con=connection, schema="public", if_exists='append') connection.close()
# Making 2000 Random locations lat = [] long = [] for x, y in zip(range(2000), range(2000)): lat.append(random.uniform(a, b)) long.append(random.uniform(c, d)) bar = progressbar.ProgressBar() for x, y, i in zip(long, lat, bar(range(len(long)))): response = requests.get( 'https://mobile.o.bike/api/v1/bike/list?longitude=' + str(x) + '&latitude=' + str(y)) Lil_data = response.json() df_new = pd.DataFrame(Lil_data['data']['list']) frames = [df, df_new] df = pd.concat(frames) df = df.drop_duplicates(keep='first') df = df.reset_index() geometry = [Point(xy) for xy in zip(df['longitude'], df['latitude'])] df = df.drop(['longitude', 'latitude'], axis=1) crs = {'init': 'epsg:4326'} geo_df = GeoDataFrame(df, crs=crs, geometry=geometry) geo_df.columns = [['index', 'countyId', 'helmet', 'id', 'imei', date]] geo_df.to_csv(date + '2000obikesZH.csv')
def prepare_nhdplus( flw: gpd.GeoDataFrame, min_network_size: float, min_path_length: float, min_path_size: float = 0, purge_non_dendritic: bool = False, verbose: bool = False, ) -> gpd.GeoDataFrame: """Clean up and fix common issues of NHDPlus flowline database. Ported from `nhdplusTools <https://github.com/USGS-R/nhdplusTools>`__ Parameters ---------- flw : geopandas.GeoDataFrame NHDPlus flowlines with at least the following columns: COMID, LENGTHKM, FTYPE, TerminalFl, FromNode, ToNode, TotDASqKM, StartFlag, StreamOrde, StreamCalc, TerminalPa, Pathlength, Divergence, Hydroseq, LevelPathI min_network_size : float Minimum size of drainage network in sqkm min_path_length : float Minimum length of terminal level path of a network in km. min_path_size : float, optional Minimum size of outlet level path of a drainage basin in km. Drainage basins with an outlet drainage area smaller than this value will be removed. Defaults to 0. purge_non_dendritic : bool, optional Whether to remove non dendritic paths, defaults to False verbose : bool, optional Whether to show a message about the removed features, defaults to True. Returns ------- geopandas.GeoDataFrame Cleaned up flowlines. Note that all column names are converted to lower case. """ flw.columns = flw.columns.str.lower() nrows = flw.shape[0] req_cols = [ "comid", "terminalfl", "terminalpa", "hydroseq", "streamorde", "streamcalc", "divergence", "fromnode", "ftype", ] _check_requirements(req_cols, flw) flw[req_cols[:-1]] = flw[req_cols[:-1]].astype("Int64") if not any(flw.terminalfl == 1): if all(flw.terminalpa == flw.terminalpa.iloc[0]): flw.loc[flw.hydroseq == flw.hydroseq.min(), "terminalfl"] = 1 else: raise ZeroMatched("No terminal flag were found in the dataframe.") if purge_non_dendritic: flw = flw[((flw.ftype != "Coastline") | (flw.ftype != 566)) & (flw.streamorde == flw.streamcalc)] else: flw = flw[(flw.ftype != "Coastline") | (flw.ftype != 566)] flw.loc[flw.divergence == 2, "fromnode"] = pd.NA flw = _remove_tinynetworks(flw, min_path_size, min_path_length, min_network_size) if verbose: print(f"Removed {nrows - flw.shape[0]} paths from the flowlines.") if flw.shape[0] > 0: flw = _add_tocomid(flw) return flw
def geodataframe_to_postgis( geodataframe: gpd.GeoDataFrame, output_table_name: str, uri: str, src_epsg: Union[bool, int] = None, output_epsg: Union[bool, int] = None, debug: bool = False ): """ Write a ``geopandas.GeoDataFrame`` to a PostGIS table in a SQL database. Assumes that the geometry column has already been named 'geometry' :param geodataframe: geopandas.GeoDataFrame :param output_table_name: 'name_of_the_output_table' :param src_epsg: if not None, will assign the geodataframe this EPSG in the format of {"init": "epsg:2227"} :param output_epsg: if not None, will reproject data from input EPSG to specified EPSG :param uri: connection string :return: None """ start_time = time.time() # Get the geometry type # It's possible there are both MULTIPOLYGONS and POLYGONS. This grabs the MULTI variant geom_types = list(geodataframe.geometry.geom_type.unique()) geom_typ = max(geom_types, key=len).upper() if debug: print(f'## PROCESSING {geom_typ} geodataframe to {output_table_name} in SQL') # Manually set the EPSG if the user passes one if src_epsg: geodataframe.crs = f"epsg:{src_epsg}" epsg_code = src_epsg # Otherwise, try to get the EPSG value directly from the geodataframe else: try: # Older geodataframes has CRS stored as a dict: {'init': 'epsg:4326'} if type(geodataframe.crs) == dict: epsg_code = int(geodataframe.crs['init'].split(" ")[0].split(':')[1]) # Now geopandas has a different approach: else: epsg_code = int(str(geodataframe.crs).split(':')[1]) except: print('This geodataframe does not have a valid EPSG. Aborting.') print(geodataframe.crs) return # Sanitize the columns before writing to the database # Make all column names lower case geodataframe.columns = [x.lower() for x in geodataframe.columns] # Replace the 'geom' column with 'geometry' if 'geom' in geodataframe.columns: geodataframe['geometry'] = geodataframe['geom'] geodataframe.drop('geom', 1, inplace=True) # Drop the 'gid' column if 'gid' in geodataframe.columns: geodataframe.drop('gid', 1, inplace=True) # Rename 'uid' to 'old_uid' if 'uid' in geodataframe.columns: geodataframe['old_uid'] = geodataframe['uid'] geodataframe.drop('uid', 1, inplace=True) # Build a 'geom' column using geoalchemy2 and drop the source 'geometry' column geodataframe['geom'] = geodataframe['geometry'].apply(lambda x: WKTElement(x.wkt, srid=epsg_code)) geodataframe.drop('geometry', 1, inplace=True) # write geodataframe to SQL database if debug: print(f'## -> WRITING TO {uri}') engine = sqlalchemy.create_engine(uri) geodataframe.to_sql(output_table_name, engine, if_exists='replace', index=True, index_label='gid', dtype={'geom': Geometry(geom_typ, srid=epsg_code)}) engine.dispose() if debug: runtime = round((time.time() - start_time), 2) print(f'\t FINISHED IN {runtime} seconds') log_activity("pGIS.geodataframe_to_postgis", uri=uri, query_text=f"Wrote geopandas.GeoDataFrame to {output_table_name}", debug=debug) # If provided an EPSG, alter whatever the native projection was to the output_epsg if output_epsg: project_spatial_table(output_table_name, geom_typ, epsg_code, output_epsg, uri=uri, debug=debug) # Add a unique_id column and do a spatial index prep_spatial_table(output_table_name, uri=uri, debug=debug)
################################################# #### Plotting ### Extract x and y data for plotting print('Creating the plot') zones1 = multipoly_to_poly(view_zones) zones1['x'] = zones1.apply(getPolyCoords, coord_type='x', axis=1) zones1['y'] = zones1.apply(getPolyCoords, coord_type='y', axis=1) zones2 = zones1.drop('geometry', axis=1) cant1 = GeoDataFrame(['Canterbury'], geometry=[zones1.unary_union]) cant1.columns = ['site', 'geometry'] cant1['x'] = cant1.apply(getPolyCoords, coord_type='x', axis=1) cant1['y'] = cant1.apply(getPolyCoords, coord_type='y', axis=1) cant2 = cant1.drop('geometry', axis=1) ## Catchments catch1 = multipoly_to_poly(site_catch2) catch1['x'] = catch1.apply(getPolyCoords, coord_type='x', axis=1) catch1['y'] = catch1.apply(getPolyCoords, coord_type='y', axis=1) catch2 = catch1.drop('geometry', axis=1) ### Combine with time series data data1 = merge(cat1.unstack('time').reset_index(), zones2, on=['zone']) time_index = hy_summ2.time.unique().tolist() data1['cat'] = data1[time_index[-1]]
def import_geodataframe( self, gdf: gpd.GeoDataFrame, table_name: str, src_epsg: Union[int, bool] = False, if_exists: str = "replace", schema: str = None, uid_col: str = "uid", ): """ Import an in-memory ``geopandas.GeoDataFrame`` to the SQL database. :param gdf: geodataframe with data you want to save :type gdf: gpd.GeoDataFrame :param table_name: name of the table that will get created :type table_name: str :param src_epsg: The source EPSG code can be passed as an integer. By default this function will try to read the EPSG code directly, but some spatial data is funky and requires that you explicitly declare its projection. Defaults to False :type src_epsg: Union[int, bool], optional :param if_exists: pandas argument to handle overwriting data, defaults to "replace" :type if_exists: str, optional """ if not schema: schema = self.ACTIVE_SCHEMA # Read the geometry type. It's possible there are # both MULTIPOLYGONS and POLYGONS. This grabs the MULTI variant geom_types = list(gdf.geometry.geom_type.unique()) geom_typ = max(geom_types, key=len).upper() print(f"\t -> SQL tablename: {schema}.{table_name}") print(f"\t -> Geometry type: {geom_typ}") print(f"\t -> Beginning DB import...") start_time = datetime.now() # Manually set the EPSG if the user passes one if src_epsg: gdf.crs = f"epsg:{src_epsg}" epsg_code = src_epsg # Otherwise, try to get the EPSG value directly from the geodataframe else: # Older gdfs have CRS stored as a dict: {'init': 'epsg:4326'} if type(gdf.crs) == dict: epsg_code = int(gdf.crs["init"].split(" ")[0].split(":")[1]) # Now geopandas has a different approach else: epsg_code = int(str(gdf.crs).split(":")[1]) # Sanitize the columns before writing to the database # Make all column names lower case gdf.columns = [x.lower() for x in gdf.columns] # Replace the 'geom' column with 'geometry' if "geom" in gdf.columns: gdf["geometry"] = gdf["geom"] gdf.drop("geom", 1, inplace=True) # Drop the 'gid' column if "gid" in gdf.columns: gdf.drop("gid", 1, inplace=True) # Rename 'uid' to 'old_uid' if uid_col in gdf.columns: gdf[f"old_{uid_col}"] = gdf[uid_col] gdf.drop(uid_col, 1, inplace=True) # Build a 'geom' column using geoalchemy2 # and drop the source 'geometry' column gdf["geom"] = gdf["geometry"].apply( lambda x: WKTElement(x.wkt, srid=epsg_code)) gdf.drop("geometry", 1, inplace=True) # Write geodataframe to SQL database self.add_schema(schema) engine = sqlalchemy.create_engine(self.uri()) gdf.to_sql( table_name, engine, if_exists=if_exists, # index=True, # index_label=uid_col, schema=schema, dtype={"geom": Geometry(geom_typ, srid=epsg_code)}, ) engine.dispose() end_time = datetime.now() runtime = end_time - start_time print(f"\t -> ... import completed in {runtime}") self.table_add_uid_column(table_name, schema=schema, uid_col=uid_col) self.table_add_spatial_index(table_name, schema=schema)
def makeGrid(ipoints, experiment, gridsize): # Projections gridproj = {'init': 'epsg:3740', 'no_defs': True} wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'} # import grid script sys.path.insert(0, os.getcwd()+'/mapping/libs/') import grid as g opath = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-grid' if(os.path.isdir(opath)): print "already a folder!" else: os.mkdir(opath) # gridsize = 200 ogridname = "grid_"+str(gridsize)+"m.shp" ofile = opath + "/" + ogridname print "making grid" g.main(ofile, ipoints.total_bounds[0], ipoints.total_bounds[2], ipoints.total_bounds[1], ipoints.total_bounds[3], gridsize, gridsize) print "grid complete! " # read in the grid that was just made grid = GeoDataFrame.from_file(ofile) grid.crs = gridproj # create grid id to groupby grid['id'] = [i for i in range(len(grid))] # Read in transect to spatial subset grids in transect transect = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/study-area/' +'transect_epicc2sp_woss.shp') transect.crs = gridproj # subset grid # transectgrid = grid[grid.geometry.intersects(transect.geometry)]; print transectgrid sagrid = [] for i in range(len(grid)): if np.array(transect.intersects(grid.geometry[i]))[0] != False: sagrid.append(grid.geometry[i]) transectgrid = GeoDataFrame(sagrid) transectgrid.columns = ['geometry'] transectgrid['id'] = [i for i in range(len(transectgrid))] transectgrid.crs = gridproj transectgrid.to_file(ofile[:-4]+"_transect.shp") # transectgrid.to_file(ofile[:-4]+"_transect.geojson",driver="GeoJSON") ## !!!Some weird things with reading in data makes the sjoin work !!! :( transectgrid = GeoDataFrame.from_file(ofile[:-4]+"_transect.shp") transectgrid.crs = gridproj print transectgrid.head() ipoints = GeoDataFrame.from_file( os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/all_20150528.shp') ipoints.crs = gridproj print ipoints.head() # ipoints['id'] = [i for i in range(len(ipoints))] # Spatial join points to grid oname = "gridjoin_"+str(gridsize)+"m.shp" # join_inner_df = sjoin(transectgrid, ipoints, how="inner") join_inner_df = sjoin(transectgrid, ipoints, how="left", op='intersects') # join_inner_df.to_file(opath+ "/"+oname) return join_inner_df
def heatmap(self, a, poly_list): "provide density of agents positions as a heatmap" "!! add poly list not working yet" #sample_agents = [self.base_model.agents[j] for j in self.index] #swap if restricting observed agents filter_class = self.filter_class bin_size = filter_class.filter_params["bin_size"] width = filter_class.model_params["width"] height = filter_class.model_params["height"] os.mkdir(self.save_dir + "output_heatmap") "cmap set up. defining bottom value (0) to be black" cmap = cm.cividis cmaplist = [cmap(i) for i in range(cmap.N)] cmaplist[0] = (0.0, 0.0, 0.0, 1.0) cmap = col.LinearSegmentedColormap("custom_cmap", cmaplist, N=cmap.N) cmap = cmap.from_list("custom", cmaplist) "split norm for better vis" n = self.filter_class.model_params["pop_total"] norm = DivergingNorm(1, n / 3, 0.1, 0.9, 1e-8, n) for i in range(a.shape[0]): locs = a[i, :] counts = self.filter_class.poly_count(poly_list, locs) if np.nansum(counts) != 0: densities = np.array(counts) / np.nansum(counts) #density else: densities = np.array(counts) #counts[np.where(counts==0)]=np.nan frame = GeoDataFrame([densities, counts, poly_list]).T frame.columns = ["densities", "counts", "geometry"] #norm =col.DivergingNorm(0.2) f = plt.figure(figsize=(12, 8)) ax = f.add_subplot(111) divider = make_axes_locatable(ax) cax = divider.append_axes("right", size="5%", pad=0.05) "plot density histogram and locations scatter plot assuming at least one agent available" if np.nansum(counts) != 0: #ax.scatter(locs[0::2],locs[1::2],color="cyan",label="True Positions") ax.set_ylim(0, height) ax.set_xlim(0, width) column = frame["counts"].astype(float) im = frame.plot(column=column, ax=ax, cax=cax, cmap=cmap, norm=norm, vmin=0, vmax=n) for k, count in enumerate(counts): if count > 0: ax.annotate(s=count, xy=poly_list[k].centroid.coords[0], ha='center', va="center", color="w") else: """ dummy frame if no locations present e.g. at the start. prevents divide by zero error in hist2d """ ax.set_ylim(0, height) ax.set_xlim(0, width) column = frame["densities"].astype(float) im = frame.plot(column=column, ax=ax, cax=cax, cmap=cmap, norm=norm, vmin=0, vmax=1) "set up cbar. colouration proportional to number of agents" ax.text(0, 101, s="Total Agents: " + str(np.sum(counts)), color="k") sm = cm.ScalarMappable(norm=norm, cmap=cmap) cbar = plt.colorbar(sm, cax=cax, spacing="proportional") cbar.set_label("Agent Counts") cbar.set_alpha(1) #cbar.draw_all() "set legend to bottom centre outside of plot" box = ax.get_position() ax.set_position([ box.x0, box.y0 + box.height * 0.1, box.width, box.height * 0.9 ]) "labels" ax.set_xlabel("Corridor width") ax.set_ylabel("Corridor height") #ax.set_title("Agent Densities vs True Positions") cbar.set_label(f"Agent Counts (out of {n})") """ frame number and saving. padded zeroes to keep frames in order. padded to nearest upper order of 10 of number of iterations. """ number = str(i).zfill(ceil(log10(a.shape[0]))) file = self.save_dir + f"output_heatmap/{number}" f.savefig(file) plt.close() animations.animate( self, self.save_dir + "output_heatmap", self.save_dir + f"heatmap_{filter_class.pop_total}_", 12)