def __convert_gdf_to_bokeh_data( input_gdf: gpd.GeoDataFrame, get_gdf_structure: bool = False) -> ColumnDataSource: assert isinstance( input_gdf, gpd.GeoDataFrame ), f"use a GeoDataframe please => found {type(input_gdf)}" if get_gdf_structure: input_gdf = input_gdf.head(1) bokeh_data = ColumnDataSource({ **{ "x": input_gdf["geometry"].apply(lambda x: geometry_2_bokeh_format( x, "x")).tolist(), "y": input_gdf["geometry"].apply(lambda x: geometry_2_bokeh_format( x, "y")).tolist(), }, **{ column: input_gdf[column].to_list() for column in input_gdf.columns if column != "geometry" }, }) return bokeh_data
frames = [guiddf, pubdatedf, isodf, countrydf, etypedf, epidf, evidf, bboxdf,coordf, aleveldf, ascoredf] df = pd.concat(frames, axis=1, ignore_index = True, names=[frames]) return df[df[2].isin(wfpiso3)] #!IMPORTANT!# #return df data = get_data() data #f = 'C:/Users/Michael/Desktop/Notebooks/test.csv' #data.to_csv(f, encoding='utf-8') geometry = [Point(xy) for xy in zip(data[8])] gdacsdf = data.drop([8], axis=1) crs = {'init': 'epsg:4326'} gdf = GeoDataFrame(gdacsdf, crs=crs, geometry=geometry) gdf.head() #Same as line 75/56 #geodata = gpd.GeoDataFrame(gdf) #geodata.head(3) jsondata = geodata.to_json() m = folium.Map(tiles='stamentoner') #cartodbpositron #stamentoner #cartodbdark_matter folium.GeoJson(jsondata).add_to(m) m.fit_bounds(m.get_bounds()) #m.save(os.path.join('results', 'geopandas_2.html')) m
def log_data_frame(gdf: GeoDataFrame) -> None: logger.debug(gdf.head()) buffer = StringIO() gdf.info(buf=buffer) logger.debug(buffer.getvalue())
api = tweepy.API(auth) keywords = ['earthquake', 'quake', 'magnitude', 'epicenter', 'magnitude', 'aftershock'] # Collect 100 tweets using the keywords: search_results = api.search(q=' OR '.join(keywords), count=10) df = pd.DataFrame([ {'id': result.id, 'created_at': result.created_at, 'user': '******'+result.user.name, 'text': result.text } for result in search_results])[['id', 'created_at', 'user', 'text']] df.display(df.head()) # What is the weather 500mm around Lyon?: keywords2 = ['weather' , 'forcast', 'sun', 'rain', 'clouds', 'storm'] # Only in english please ! lang = 'en' # Get tweets around Lyon (latitide,longitude,radius): geocode = '45.76,4.84,500km' # Collect tweets using the keywords: search_results2 = api.search(q=' OR '.join(keywords2), geocode=geocode, lang=lang, count=1500) # Convert to GeoPandas: df2 = pd.DataFrame([ {'id': result.id, 'created_at': result.created_at, 'user': '******'+result.user.name, 'text': result.text, 'geometry': result.coordinates } for result in search_results2])[['id', 'created_at', 'user', 'text', 'geometry']] df2['geometry'] = df2['geometry'].apply(lambda coords: np.nan if coords is None else Point(coords['coordinates'])) df2 = df2.dropna() # Remove documents without geometry point (the twitter API may obtain location using user details rather than the tweet location.). df2 = GeoDataFrame(df2, crs = {'init': 'epsg:2263'}) display(df2.head())
import pandas as pd os.chdir("path to working directory") # ### Create GeoDataFrames from geopandas import GeoDataFrame from shapely.geometry import Point, LineString shipping_gdf = GeoDataFrame( shipping, geometry=[Point(xy) for xy in zip(shipping.Long, shipping.Lat)]) noShipping_gdf = GeoDataFrame( noShipping, geometry=[Point(xy) for xy in zip(noShipping.Long, noShipping.Lat)]) hq_gdf = GeoDataFrame(hq, geometry=[Point(xy) for xy in zip(hq.Long, hq.Lat)]) hq_gdf.head() # ### Get adjusted lat/long coordinates # https://stackoverflow.com/questions/30740046/calculate-distance-to-nearest-feature-with-geopandas def nearest_poly(point, polygons): min_dist = polygons.distance(point).min() index = polygons.distance(point)[polygons.distance(point) == min_dist].index[0] return polygons.iat[index, 0] def getXY(pt): return (pt.x, pt.y)
# #convert to geodataframe # In[6]: geometry = [ Point(xy) for xy in zip(df_stations.LATITUDE, df_stations.LONGITUDE) ] df_stations = df_stations.drop(['LATITUDE', 'LONGITUDE'], axis=1) crs = {'init': 'epsg:4326'} geodf_stations = GeoDataFrame(df_stations, crs=crs, geometry=geometry) # In[7]: geodf_stations.info() geodf_stations.head() # In[8]: #add a new geometry to geodf_stations of a circle of X miles around each station #new design uses polygons that will be loaded from a shape file so drawing buffer circles around the stations will not be required #X = 0.01 #geodf_stations['CIRCLE'] = geodf_stations.geometry.buffer(X) #geodf_stations.geometry.name #geodf_stations = geodf_stations.rename(columns={'geometry':'POINT'}).set_geometry('CIRCLE') #geodf_stations.geometry.name #geodf_stations.info() #geodf_stations.head() # # LOAD STATIONS FROM TRANSIT DATA
df_stations.columns = [ 'STATION_ID', 'STOP_ID', 'STOP_NAME', 'BOROUGH', 'LATITUDE', 'LONGITUDE' ] # convert to geodataframe geometry = [ Point(xy) for xy in zip(df_stations.LATITUDE, df_stations.LONGITUDE) ] df_stations = df_stations.drop(['LATITUDE', 'LONGITUDE'], axis=1) crs = {'init': 'epsg:4326'} geodf_stations = GeoDataFrame(df_stations, crs=crs, geometry=geometry) geodf_stations.info() geodf_stations.head() # In[8]: #add a new geometry to geodf_stations of a circle of X miles around each station #new design uses polygons that will be loaded from a shape file so drawing buffer circles around the stations will not be required #X = 0.01 #geodf_stations['CIRCLE'] = geodf_stations.geometry.buffer(X) #geodf_stations.geometry.name #geodf_stations = geodf_stations.rename(columns={'geometry':'POINT'}).set_geometry('CIRCLE') #geodf_stations.geometry.name #geodf_stations.info() #geodf_stations.head() # # LOAD STATIONS FROM TRANSIT DATA
# #convert to geodataframe # In[5]: geometry = [ LineString(build_coord_tuples(x)) for x in df_traffic_links.LINK_POINTS ] crs = {'init': 'epsg:4326'} geodf_traffic_links = GeoDataFrame(df_traffic_links.drop('LINK_POINTS', axis=1), crs=crs, geometry=geometry) geodf_traffic_links.info() geodf_traffic_links.head() # In[6]: geodf_traffic_links.plot(color='r') plt.show() # # JOIN TRANSIT STATIONS WITH TRAFFIC LINKS # In[7]: #LOAD STATION GEO DF (ALREADY PROCESSED IN STATIONS NOTEBOOK) #file = root + 'transit/Stations_geomerged.geojson' #geodf_stations = GeoDataFrame.from_file(file)[['STATION','geometry']] #geodf_stations.head()
def makeGrid(ipoints, experiment, gridsize): # Projections gridproj = {'init': 'epsg:3740', 'no_defs': True} wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'} # import grid script sys.path.insert(0, os.getcwd()+'/mapping/libs/') import grid as g opath = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-grid' if(os.path.isdir(opath)): print "already a folder!" else: os.mkdir(opath) # gridsize = 200 ogridname = "grid_"+str(gridsize)+"m.shp" ofile = opath + "/" + ogridname print "making grid" g.main(ofile, ipoints.total_bounds[0], ipoints.total_bounds[2], ipoints.total_bounds[1], ipoints.total_bounds[3], gridsize, gridsize) print "grid complete! " # read in the grid that was just made grid = GeoDataFrame.from_file(ofile) grid.crs = gridproj # create grid id to groupby grid['id'] = [i for i in range(len(grid))] # Read in transect to spatial subset grids in transect transect = GeoDataFrame.from_file(os.getcwd()+'/diysco2-db/_main_/study-area/' +'transect_epicc2sp_woss.shp') transect.crs = gridproj # subset grid # transectgrid = grid[grid.geometry.intersects(transect.geometry)]; print transectgrid sagrid = [] for i in range(len(grid)): if np.array(transect.intersects(grid.geometry[i]))[0] != False: sagrid.append(grid.geometry[i]) transectgrid = GeoDataFrame(sagrid) transectgrid.columns = ['geometry'] transectgrid['id'] = [i for i in range(len(transectgrid))] transectgrid.crs = gridproj transectgrid.to_file(ofile[:-4]+"_transect.shp") # transectgrid.to_file(ofile[:-4]+"_transect.geojson",driver="GeoJSON") ## !!!Some weird things with reading in data makes the sjoin work !!! :( transectgrid = GeoDataFrame.from_file(ofile[:-4]+"_transect.shp") transectgrid.crs = gridproj print transectgrid.head() ipoints = GeoDataFrame.from_file( os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/all_20150528.shp') ipoints.crs = gridproj print ipoints.head() # ipoints['id'] = [i for i in range(len(ipoints))] # Spatial join points to grid oname = "gridjoin_"+str(gridsize)+"m.shp" # join_inner_df = sjoin(transectgrid, ipoints, how="inner") join_inner_df = sjoin(transectgrid, ipoints, how="left", op='intersects') # join_inner_df.to_file(opath+ "/"+oname) return join_inner_df
def makePoints(experiment): path = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered/' ipaths = [os.path.join(path,i) for i in os.listdir(path) if i.endswith('.csv')] # experiment data data = [readdata(i) for i in ipaths] # copy data to ldata ldata = [i.copy() for i in data] # --- print number of measurements --- # featcount(ldata) ''' ------------- Spatial Operations ------------- ''' for i in range(0,len(ldata)): ldata[i].lon = ldata[i].lon.astype('float') ldata[i].lat = ldata[i].lat.astype('float') # need to keep datetime field # create geopoints for i in ldata: i['datetime'] = i.index for i in ldata: i.index = [j for j in range(len(i))] i['geometry'] = GeoSeries([Point(x, y) for x, y in zip(i.lon, i.lat)]) # convert datetime string to iso format i['datetime'] = i.datetime.map(lambda x: datetime.strftime(x, '%Y-%m-%dT%H:%M:%SZ')) print ldata[0].head() # Projections gridproj = {'init': 'epsg:3740', 'no_defs': True} wgs84 = {'datum':'WGS84', 'no_defs':True, 'proj':'longlat'} # create geodataframe from data ldata = [GeoDataFrame(i) for i in ldata] # set projection as wgs84 for i in ldata: i.crs = wgs84 # reproject to utm zone 10N for i in ldata: i.geometry = i.geometry.to_crs(epsg=3740) # i.geometry = i.geometry.to_crs(epsg=4326) for i in ldata: i = i[pd.isnull(i.geometry) == False] # --- Merge geodata together --- # mergedgeo = pd.concat([ldata[0], ldata[1],ldata[2],ldata[3],ldata[4]]) mergedgeo = GeoDataFrame(mergedgeo) mergedgeo.crs = gridproj print len(mergedgeo) mergedgeo = mergedgeo[pd.isnull(mergedgeo.lat)==False] print len(mergedgeo) # mergedgeo['date'] = mergedgeo['date'].str.replace('/', '-').astype(str) # mergedgeo['datetime'] = mergedgeo['datetime'].astype(str) print mergedgeo.head() # mergedgeo.to_crs(wgs84) opath = os.getcwd() + '/diysco2-db/campaigns/'+experiment+'/diysco2-filtered-points/' print opath if(os.path.isdir(opath)): print "already a folder!" else: os.mkdir(opath) if(os.path.isfile(opath + 'all_20150528.geojson')): os.remove(opath + 'all_20150528.geojson') mergedgeo.to_file(opath + 'all_20150528.geojson', driver="GeoJSON") # with open(opath + 'all_20150528.geojson', 'w') as f: # f.write(mergedgeo.to_json()) mergedgeo.to_file(opath + 'all_20150528.shp', driver='ESRI Shapefile') return mergedgeo del mergedgeo
# ### Convert to GeoDataFrame # In[160]: from geopandas import GeoDataFrame from shapely.geometry import Point # In[161]: # Initialize the geographic reference system as WGS 1984 crs = {'init': 'epsg:4326'} # Project ot Mercator system stations_gdf = GeoDataFrame(stations, crs=crs, geometry='geometry').to_crs(epsg=3857) stations_gdf.head() # ### Join Trip Counts to Station Data # In[162]: # Count trips by station by_startStation = indego.groupby('start_station').size().reset_index( name='counts') by_startStation.head() # In[163]: stations_gdf = stations_gdf.merge(by_startStation, left_on='kioskId', right_on='start_station')
# change the CRS of the shapefile to the specified projected one all_countries.to_crs(crs=target_crs, inplace=True) # In[6]: # create a geometry column in our point data set for geopandas to use rs['geometry'] = rs.apply(lambda row: Point(row['lon'], row['lat']), axis=1) # create a new geopandas geodataframe from the point data points = GeoDataFrame(rs) # you must specify its original CRS to convert it to a different (projected) one later points.crs = original_crs points.head() # In[7]: # convert the point data to the same projected CRS we specified earlier for our shapefile points.to_crs(crs=target_crs, inplace=True) # convert the projected points into discrete x and y columns for easy matplotlib scatterplotting points['x'] = points['geometry'].map(lambda point: point.x) points['y'] = points['geometry'].map(lambda point: point.y) points.head() # In[8]:
all_rps.head() #Create geometries for sps geometry = [Point(xy) for xy in zip(all_sps.Easting, all_sps.Northing)] crs = {'init': 'epsg:23031'} #Create points for sps all_sps_points = GeoDataFrame(all_sps, crs=crs, geometry=geometry) #Create lines for sps all_sps_lines = all_sps_points.groupby( ['Line Number'])['geometry'].apply(lambda x: LineString(x.tolist())) all_sps_lines = GeoDataFrame(all_sps_lines, geometry='geometry') #Check all_sps_points.head() all_sps_lines.head() #Create shapefiles for sps all_sps_points.to_file(driver='ESRI Shapefile', filename="result_SPS_points.shp") all_sps_lines.to_file(driver='ESRI Shapefile', filename="result_SPS_lines.shp") #Create geometries for rps geometry = [Point(xy) for xy in zip(all_rps.Easting, all_rps.Northing)] crs = {'init': 'epsg:23031'} #Create points for rps all_rps_points = GeoDataFrame(all_rps, crs=crs, geometry=geometry) #Create lines for rps