st.title('Streamlit with Folium') """ ## An easy way to create a website using Python """ df = pd.read_csv( 'https://raw.githubusercontent.com/Maplub/MonthlyAirQuality/master/sensorlist.csv' ) tambol = st.text_input(label='ตำบล') st.write(df[df['tambol'] == tambol]) crs = "EPSG:4326" geometry = gp.points_from_xy(df.lon, df.lat) geo_df = gp.GeoDataFrame(df, crs=crs, geometry=geometry) nan_boundary = gp.read_file( 'https://github.com/Maplub/AirQualityData/blob/master/nan_shp_wgs84.zip?raw=true' ) nanall = nan_boundary.unary_union nan_sta = geo_df.loc[geo_df.geometry.within(nanall)] longitude = 100.819200 latitude = 19.331900 station_map = fo.Map(location=[latitude, longitude], zoom_start=10) latitudes = list(nan_sta.lat) longitudes = list(nan_sta.lon)
polygon = cerrado_shp.iloc[0]["geometry"] polygon # + {"active": ""} # Primeiro eh preciso mudar as coordenadas do shapefile,O grace tem umas coordenadas de 0 a 360 e o Brasil esta localizado entorno do long 300. enquanto esse shape file tem de -180 a 180 e o Brasil esta localizado entorno do long -50. # # para transforma a long do shape file basta (360 - long). # + x, y = polygon.exterior.xy x = 360 + np.array(x) polygon_geom = Polygon(zip(x, y)) new_shp = gpd.GeoDataFrame(index=[0], crs=cerrado_shp.crs, geometry=[polygon_geom]) new_shp.plot() # - # Aqui podemos ver que as coordenas estao compativeis # + fig, ax = plt.subplots(1, 1) ax.pcolormesh(lon, lat, lwe, cmap="terrain") new_shp.plot(ax=ax) # - polygon_corr = new_shp.iloc[0]["geometry"] polygon_corr
], ordered=False) crime_main['MONTH'] = pd.Categorical(crime_main['MONTH'], categories=[ "Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec" ], ordered=False) crime_main['GEOID10'] = crime_main['GEOID10'].fillna(0.0).apply( np.int64).astype('category') geometry = [Point(xy) for xy in zip(crime_main['X'], crime_main['Y'])] crime_main_geo = gpd.GeoDataFrame(crime_main, crs={'init': 'epsg: 4326'}, geometry=geometry) social = gpd.read_file( "Data/3aeae140-8174-4d77-8c0e-de3ef0ce4b672020330-1-1rr22uq.veze.shp") boston_polygon = social[['FID', 'GEOID10', 'Name', 'geometry']] with open('Data/Boston_Social_Vulnerability.geojson') as f: boston_geojson = json.load(f) boston_geo = [] for i in boston_geojson['features']: neighbourhood_name = i['properties']['Name'] geo_id = i['properties']['GEOID10'] geometry = i['geometry']
#print(florence_1.head()) #ADD "-" in front of the number to correctly plot th data florence_1['Long'] = 0 - florence_1['Long'] #print(florence_1.head()) #Combining Lattitude and Longitude to create hurricane coordinates florence_1['coordinates'] = florence_1[['Long', 'Lat']].values.tolist() #print(florence_1.head()) #Change the coordinates to a GeoPoint florence_1['coordinates'] = florence_1['coordinates'].apply(Point) #print(florence_1.head()) #Converting the data into a GeoSpatial Data florence_1 = geopandas.GeoDataFrame(florence_1, geometry='coordinates') print(florence_1.head()) #Visualization / Plotting to see the hurricane overlay the US map fig, ax = plt.subplots(1, figsize=(30, 20)) base = country[country['NAME'].isin(['Alaska', 'Hawaii']) == False].plot( figsize=(30, 20), color='#3B3C6E') #Plotting the hurricane position on top with red color florence_1.plot(ax=base, column='Wind', marker='<', markersize=10, cmap='cool', label="Wind Speed(mph)")
import os import pandas as pd import geopandas as gpd from shapely.geometry import Point co2measdir = 'data/co2meas.csv' co2sitesdir = 'output/gis/vector/co2sites.shp' co2meas = pd.read_csv(co2measdir) co2sites = co2meas[['Lat', 'Long', 'siteNo']].groupby(['siteNo'], as_index=False).first() gpd.GeoDataFrame(co2sites,\ geometry=[Point(xy) for xy in zip(co2sites.Long, co2sites.Lat)]) [Point(xy) for xy in zip(co2sites.Long, co2sites.Lat)]
import pytest import os import geopandas as gp import cartopy.feature as cf ne = [] for cr in ['l', 'i', 'h']: coast = cf.NaturalEarthFeature(category='physical', name='land', scale='{}m'.format({ 'l': 110, 'i': 50, 'h': 10 }[cr])) gdf = gp.GeoDataFrame(geometry=[x for x in coast.geometries()]) w = gdf.explode().reset_index(drop=True) ne.append(w) coast = cf.GSHHSFeature(scale='auto', levels=[1]) GSHHS = gp.GeoDataFrame(geometry=[x for x in coast.geometries()]) @pytest.mark.slow @pytest.mark.parametrize('coast', ne[0:1]) def test_answer(tmpdir, coast): df = pg.grid(type='tri2d', geometry='global',
def plot_stops(stops, ax=None, color='red', label=None): stops_gdf = gpd.GeoDataFrame(stops) stops_gdf["geometry"] = stops_gdf["xy"] ax = stops_gdf.plot(ax=ax, color=color, label=label) ax.set_ylim(0, 2 * cf.MAX_DEV) return ax
def gen_count_dot_density_map( county, pts_per_person=300, epsg=2163, seed=10, dot_transparency=0.5, figsize=(18, 10), ax=None, legend=True, ): """ Wraps previous functions and generates population dot density maps for a specified county by race """ # read in fips to county name relationship file fips = pd.read_csv( "https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt", header=None, dtype={ 1: np.object, 2: np.object }, ) fips["name"] = fips[3] + ", " + fips[0] fips["fips"] = fips[1] + fips[2] # get name from fips if fips specified if county.isdigit(): lookup = fips.set_index("fips")["name"] county_fips = county name = lookup[county_fips] # get fips from name if name specified else: lookup = fips.set_index("name")["fips"] name = county county_fips = lookup[name] # get geodataframe of block group shapefile bgfile_name = "http://www2.census.gov/geo/tiger/GENZ2018/shp/cb_2018_{}_tract_500k.zip".format( county_fips[:2]) bg_geo = zip_shp_to_gdf(bgfile_name) # subset to those that are in the county and project it to the CRS bg_geo = (bg_geo[bg_geo["GEOID"].str[:5] == county_fips].to_crs( epsg=epsg).set_index("GEOID")["geometry"]) # specify variable list and variable names for the census api function varlist = [ "B03002_003E", "B03002_012E", "B03002_004E", "B03002_006E", "B03002_005E", "B03002_007E", "B03002_008E", "B03002_009E", ] names = [ "White", "Hispanic", "Black", "Asian", "AI/AN", "NH/PI", "Other_", "Two Plus", ] # read in block group level census variables dems = get_census_data( 2018, "acs5", "block group", { "county": county_fips[2:], "state": county_fips[:2] }, varlist, names, ) # Calculate other as sum of those not in the 4 most populated race categories dems["Other"] = dems[["AI/AN", "NH/PI", "Other_", "Two Plus"]].sum(1) # Calculate county boundaries as the union of block groups union = gpd.GeoSeries(bg_geo.unary_union) # if axes object is specified, plot to this axis, otherwise create a new one if ax: union.plot(color="white", figsize=figsize, ax=ax) else: ax = union.plot(color="white", figsize=figsize) # set aspect equal and add title if specified ax.set(aspect="equal", xticks=[], yticks=[]) # set title as county name ax.set_title(name, size=15) # annotate the dot per person ratio ax.annotate( "1 dot = {} {}".format(pts_per_person, "person" if pts_per_person == 1 else "people"), xy=(0.5, 0.97), xycoords="axes fraction", horizontalalignment="center", fontsize=12, ) # loop each race category and generate points for each within each block group list_of_point_categories = [] for field in ["White", "Black", "Asian", "Hispanic", "Other"]: ps = gpd.GeoDataFrame( gen_points_in_gdf_polys( geometry=bg_geo, values=dems[field], points_per_value=pts_per_person, seed=seed, )) ps["field"] = field list_of_point_categories.append(ps) all_categories = pd.concat(list_of_point_categories) all_points = gpd.GeoDataFrame(all_categories) all_points.plot( ax=ax, markersize=0.125, alpha=dot_transparency, column="field", categorical=True, legend=legend, cmap="Accent", marker=",", ) return ax
"soil_grid_flat_no_geom.nc"))) # test output data with xarray.open_dataset( os.path.join(TEST_COMPARE_DATA_DIR, "soil_grid_flat_no_geom.nc"), mask_and_scale=False, ) as xdc: xarray.testing.assert_allclose(out_grid, xdc) tmpdir.remove() @pytest.mark.parametrize( "input_geodata", [ gpd.GeoDataFrame(columns=["test_col", "geometry"]), gpd.GeoDataFrame(), gpd.read_file( os.path.join(TEST_INPUT_DATA_DIR, "soil_data_flat.geojson")).drop(columns="geometry"), ], ) def test_make_geocube__invalid_gdf(input_geodata): with pytest.raises(VectorDataError): make_geocube(vector_data=input_geodata, resolution=(-0.001, 0.001)) def test_make_geocube__no_resolution_error(): with pytest.raises(RuntimeError): make_geocube( vector_data=os.path.join(TEST_INPUT_DATA_DIR,
import xarray as xr import matplotlib.pyplot as plt import geopandas as gpd from shapely.geometry import LineString #import fiona from fiona.crs import from_epsg ds = xr.open_dataset('/Volumes/MNF-Archive/underway/in2019_v04uwy.nc') newdata = gpd.GeoDataFrame() newdata['geometry'] = None coordinates = zip(ds.longitude.data[::100][1:],ds.latitude.data[::100][1:]) poly = LineString(coordinates) newdata.loc[0, 'geometry'] = poly newdata.crs = from_epsg(4326) newdata.to_file('CurrentShiptrack.shp')
def calc_areafrac_shp2rst_region(shp_path, outdir, outfilename, resolution, coord): """ This is the main function to be called in a script """ import numpy as np # define sections and resolution of section at which processed (all in degrees) # function works global by default. # coord = [lon_min,lon_max,lat_min,lat_max] lon_min = coord[0] lon_max = coord[2] lat_min = coord[1] lat_max = coord[3] res_processed = 1 # degrees # check whether pct_grid shapefile is already existing if os.path.isfile(outdir + outfilename + ".shp"): print(' ') #print(outfilename+'.shp already exists') else: # read shapefile shp_data = gpd.read_file(shp_path) # define lon lat bounds. # lon_max, lat_max both +1 to account also for last defined boundary (inherent to python) # both lats: +resolution (to really start at 0, artefact of grid making method) lon_bounds = np.arange(lon_min, lon_max + 1, res_processed) lat_bounds = np.arange(lat_min + resolution, lat_max + resolution + 1, res_processed) # initialise counter count = 0 # create empty geodataframe to store results grid_pct = gpd.GeoDataFrame() # loop over different sections for indx, xmin in enumerate(lon_bounds[:-1]): for indy, ymin in enumerate(lat_bounds[:-1]): # counter count = count + 1 # print('Processing gridcell '+ str(count) +' of '+ str(lon_bounds[:-1].size*lat_bounds[:-1].size)) # define xmax, ymax xmax = lon_bounds[indx + 1] ymax = lat_bounds[indy + 1] # create grid grid = make_grid(xmin, xmax, ymin, ymax, resolution) # clip lakes for grid area clip_area = grid.geometry.unary_union shp_clipped = shp_data[shp_data.geometry.intersects(clip_area)] # calculate percent area of clipped zone grid_pct_clipped = calc_pctarea(shp_clipped, grid, 'PCT_area') # concatenate the different shapefiles grid_pct = pd.concat([grid_pct, grid_pct_clipped], sort=False) # save to shape file grid_pct.to_file(outdir + outfilename + ".shp") # rasterize rasterize('PCT_area', lon_min, lon_max, lat_min, lat_max, resolution, outdir, outfilename) out_pct_raster = read_raster(outdir + outfilename + '.tiff') return out_pct_raster
def __init__(self, tessellation, edges, buildings, id_name, unique_id): self.tessellation = tessellation self.edges = edges self.buildings = buildings self.id_name = id_name self.unique_id = unique_id if id_name in buildings.columns: raise ValueError( "'{}' column cannot be in the buildings GeoDataFrame".format(id_name) ) cells_copy = tessellation[[unique_id, "geometry"]].copy() print("Buffering streets...") street_buff = edges.copy() street_buff["geometry"] = street_buff.buffer(0.1) print("Generating spatial index...") streets_index = street_buff.sindex print("Difference...") new_geom = [] for ix, cell in tqdm( cells_copy.geometry.iteritems(), total=cells_copy.shape[0] ): possible_matches_index = list(streets_index.intersection(cell.bounds)) possible_matches = street_buff.iloc[possible_matches_index] new_geom.append(cell.difference(possible_matches.geometry.unary_union)) print("Defining adjacency...") blocks_gdf = gpd.GeoDataFrame(geometry=gpd.GeoSeries(new_geom)) blocks_gdf = blocks_gdf.explode().reset_index(drop=True) spatial_weights = libpysal.weights.Queen.from_dataframe( blocks_gdf, silence_warnings=True ) patches = {} jID = 1 for idx in tqdm(blocks_gdf.index, total=blocks_gdf.shape[0]): # if the id is already present in courtyards, continue (avoid repetition) if idx in patches: continue else: to_join = [idx] # list of indices which should be joined together neighbours = [] # list of neighbours weights = spatial_weights.neighbors[ idx ] # neighbours from spatial weights for w in weights: neighbours.append(w) # make a list from weigths for n in neighbours: while ( n not in to_join ): # until there is some neighbour which is not in to_join to_join.append(n) weights = spatial_weights.neighbors[n] for w in weights: neighbours.append( w ) # extend neighbours by neighbours of neighbours :) for b in to_join: patches[b] = jID # fill dict with values jID = jID + 1 blocks_gdf["patch"] = blocks_gdf.index.map(patches) print("Defining street-based blocks...") blocks_single = blocks_gdf.dissolve(by="patch") blocks_single.crs = buildings.crs blocks_single["geometry"] = blocks_single.buffer(0.1) print("Defining block ID...") # street based blocks_single[id_name] = range(len(blocks_single)) print("Generating centroids...") buildings_c = buildings.copy() buildings_c["geometry"] = buildings_c.representative_point() # make points print("Spatial join...") centroids_tempID = gpd.sjoin( buildings_c, blocks_single, how="left", op="intersects" ) tempID_to_uID = centroids_tempID[[unique_id, id_name]] print("Attribute join (tesselation)...") cells_copy = cells_copy.merge(tempID_to_uID, on=unique_id, how="left") print("Generating blocks...") blocks = cells_copy.dissolve(by=id_name) print("Multipart to singlepart...") blocks = blocks.explode() blocks.reset_index(inplace=True, drop=True) blocks["geometry"] = blocks.exterior blocks[id_name] = range(len(blocks)) blocks["geometry"] = blocks.apply(lambda row: Polygon(row.geometry), axis=1) # if polygon is within another one, delete it sindex = blocks.sindex for idx, geom in tqdm(blocks.geometry.iteritems(), total=blocks.shape[0]): possible_matches = list(sindex.intersection(geom.bounds)) possible_matches.remove(idx) possible = blocks.iloc[possible_matches] for geom2 in possible.geometry: if geom.within(geom2): blocks.loc[idx, "delete"] = 1 if "delete" in blocks.columns: blocks = blocks.drop(list(blocks.loc[blocks["delete"] == 1].index)) self.blocks = blocks[[id_name, "geometry"]] centroids_w_bl_ID2 = gpd.sjoin( buildings_c, self.blocks, how="left", op="intersects" ) bl_ID_to_uID = centroids_w_bl_ID2[[unique_id, id_name]] print("Attribute join (buildings)...") buildings_m = buildings[[unique_id]].merge( bl_ID_to_uID, on=unique_id, how="left" ) self.buildings_id = buildings_m[id_name] print("Attribute join (tesselation)...") cells_m = tessellation[[unique_id]].merge( bl_ID_to_uID, on=unique_id, how="left" ) self.tessellation_id = cells_m[id_name]
import matplotlib.pyplot as plt import scipy.stats as st import numpy as np from plotnine import * df_map = gpd.GeoDataFrame.from_file('Virtual_Map1.shp') df_huouse = pd.read_csv("Virtual_huouse.csv") long_mar = np.arange(105, 135, 0.2) lat_mar = np.arange(30, 60, 0.2) xx, yy = np.meshgrid(long_mar, lat_mar) df_grid = pd.DataFrame(dict(long=xx.ravel(), lat=yy.ravel())) geom = gpd.GeoSeries( [Point(x, y) for x, y in zip(df_grid.long.values, df_grid.lat.values)]) df_geogrid = gpd.GeoDataFrame(df_grid, geometry=geom) inter_point = df_map['geometry'].intersection( df_geogrid['geometry'].unary_union).tolist() point_x = [] point_y = [] for i in range(len(inter_point)): if (str(type(inter_point[i])) != "<class 'shapely.geometry.point.Point'>"): point_x = point_x + [item.x for item in inter_point[i]] point_y = point_y + [item.y for item in inter_point[i]] else: point_x = point_x + [inter_point[i].x] point_y = point_y + [inter_point[i].y] df_pointmap = pd.DataFrame(dict(long=point_x, lat=point_y))
def create_shape_file_tile( country, city, save_dir, file_return=True, ): full_path = save_dir + country.title() + "_geojson.json" PATH_IN_TILE = f"Facebook/{country}/movement_tile/" PATH_IN_TILE = os.path.join(PATH_IN_TILE, "*.csv") all_files = glob.glob(PATH_IN_TILE) tile_data = pd.concat((pd.read_csv(f) for f in all_files)) tile_data["start_quadkey"] = tile_data.start_quadkey.astype("int") tile_data["end_quadkey"] = tile_data.end_quadkey.astype("int") # TODO: not all tiles are guaranteed, ´end_polygon_name´ should be included. # 1. iteration nairobi_df = tile_data[(tile_data.start_polygon_name == city) & (tile_data.end_polygon_name == city)] # 2. iteration if city == "Lagos": max_lat = 6.942785785094588 min_lat = 6.211551441519991 max_lon = 4.3560791015625 min_lon = 2.70538330078125 elif city == "Abuja": max_lat = 9.492408153765544 min_lat = 8.619041018922134 max_lon = 7.959594726562499 min_lon = 6.981811523437499 elif city == "Nairobi": max_lat = -0.7909904981540058 min_lat = -1.7740084780891991 max_lon = 37.3590087890625 min_lon = 36.2713623046875 #nairobi_df = tile_data[ # ( # (tile_data.end_lat <= max_lat) # & (tile_data.end_lat >= min_lat) # & (tile_data.end_lon <= max_lon) # & (tile_data.end_lon >= min_lon) # ) #] # TODO: avoid using ´.first()´. Check if one quadkey by mistake has multiple different ´end_lat´and ´end_lon´. unique_tiles = (nairobi_df.groupby(["end_quadkey" ])[["end_lat", "end_lon"]].first().reset_index()) print(len(unique_tiles)) shape_file = [] for _, row in unique_tiles.iterrows(): lat = row.end_lat lng = row.end_lon size = 0.022 # kenya 0.022 Nigeria 0.045 # create geodataframe with some variables gf = gpd.GeoDataFrame( { "lat": lat, "lon": lng, "width": size, "height": size }, index=[1], crs="epsg:4326", ) # create center as a shapely geometry point type and set geometry of dataframe to this gf["center"] = gf.apply( lambda x: shapely.geometry.Point(x["lon"], x["lat"]), axis=1) gf = gf.set_geometry("center") # create polygon using width and height gf["center"] = shapely.geometry.box( *gf["center"].buffer(1).total_bounds) gf["polygon"] = gf.apply( lambda x: shapely.affinity.scale(x["center"], x["width"], x[ "height"]), axis=1, ) gf = gf.set_geometry("polygon") geopoly = gf["polygon"].to_json() g1 = geojson.loads(geopoly) gh = g1[0].geometry g2 = shape(gh) # Create GeoJSON wow3 = geojson.dumps(g2) wow4 = json.loads(wow3) gd_feat = dict(kommune="A" + str(int(row.end_quadkey)), polygons=wow4["coordinates"]) shape_file.append(gd_feat) if save_dir != False: full_path = save_dir + country.title() + '_' + city.title( ) + "tile_geojson.json" #full_path = save_dir + "Nigeria_Abujatile_geojson.json" with open(full_path, "w") as f: json.dump(shape_file, f) if file_return: return shape_file
def __init__(self, hgrid: "Hgrid", boundaries: Union[dict, None]): ocean_boundaries = [] land_boundaries = [] interior_boundaries = [] if boundaries is not None: for ibtype, bnds in boundaries.items(): if ibtype is None: for id, data in bnds.items(): indexes = list( map(hgrid.nodes.get_index_by_id, data['indexes'])) ocean_boundaries.append({ 'id': id, "index_id": data['indexes'], "indexes": indexes, 'geometry': LineString(hgrid.vertices[indexes]) }) elif str(ibtype).endswith('1'): for id, data in bnds.items(): indexes = list( map(hgrid.nodes.get_index_by_id, data['indexes'])) interior_boundaries.append({ 'id': id, 'ibtype': ibtype, "index_id": data['indexes'], "indexes": indexes, 'geometry': LineString(hgrid.vertices[indexes]) }) else: for id, data in bnds.items(): _indexes = np.array(data['indexes']) if _indexes.ndim > 1: # ndim > 1 implies we're dealing with an ADCIRC # mesh that includes boundary pairs, such as weir new_indexes = [] for i, line in enumerate(_indexes.T): if i % 2 != 0: new_indexes.extend(np.flip(line)) else: new_indexes.extend(line) _indexes = np.array(new_indexes).flatten() else: _indexes = _indexes.flatten() indexes = list( map(hgrid.nodes.get_index_by_id, _indexes)) land_boundaries.append({ 'id': id, 'ibtype': ibtype, "index_id": data['indexes'], "indexes": indexes, 'geometry': LineString(hgrid.vertices[indexes]) }) self._ocean = gpd.GeoDataFrame(ocean_boundaries) self._land = gpd.GeoDataFrame(land_boundaries) self._interior = gpd.GeoDataFrame(interior_boundaries) self._hgrid = hgrid self._data = boundaries
def get_output_csv(lat_failures, lon_failures, distance_between_points, anomalous_failures_bool, rundir): """ get_output_csv creates a csv file with a timeseries of factor of safety for each test point, as well as the distance from the chosen calibrated point, the day of failure and whether the failure is anomalous or not. :param lat_failures: array of latitudes of failure points (out of the given test points in the area of interest) :param lon_failures: array of longitudes of failure points (out of the given test points in the area of interest) :param distance_between_points: dataframe with the distance (m) between then test points and the calibrated points :param anomalous_failures_bool: Boolean list. Elements are True if failure is anomalous, False otherwise. :param rundir: directory where the output files will be created """ test_points = pd.read_csv(bool_lat_lon) test_points['geometry'] = test_points['geometry'].apply(wkt.loads) test_points_gdf = gpd.GeoDataFrame(test_points, crs='epsg:4326') # test some of the graphs and output variables from the validation for i in range(len(lat_failures)): lat_failure = lat_failures[i] lon_failure = lon_failures[i] FoS = np.load(f'{rundir}FoS_{lat_failure}_{lon_failure}.npy') FoS_temp = np.load(f'{rundir}FoS_temp_{lat_failure}_{lon_failure}.npy') min_depth = np.load( f'{rundir}min_depth_{lat_failure}_{lon_failure}.npy') FoS_df = pd.DataFrame(FoS_temp[0, :]) FoS_df.columns = ['FoS'] # what is the earliest time where we see the FoS go below zero? day_of_failure = (FoS_df['FoS'] < 1.0).idxmax() print( f'The first failure is predicted on day {day_of_failure} after the start of the rainfall timeseries.' ) x_values = np.arange(0, np.shape(FoS_temp)[1]) y_values = np.arange(0, np.shape(FoS_temp)[0]) FoS_df['is_it_failure'] = np.where((FoS_df['FoS'] >= 1), 0, 1) #seaborn.scatterplot(data=FoS_df['FoS'], x=x_values, y=FoS_df['FoS'], hue=FoS_df['is_it_failure'], s=1) FoS_df_to_save = pd.DataFrame(FoS_df['FoS']) FoS_df_to_save['days'] = x_values.tolist() FoS_df_to_save['distance_m_from_calib_to_test_point'] = pd.Series( distance_between_points['distance_m_from_calib_to_test_point']. loc[i], index=FoS_df_to_save.index[[0]]) FoS_df_to_save['distance_m_from_calib_to_test_point'] = FoS_df_to_save[ 'distance_m_from_calib_to_test_point'].fillna('') FoS_df_to_save['day_of_failure'] = pd.Series( day_of_failure, index=FoS_df_to_save.index[[0]]) FoS_df_to_save['day_of_failure'] = FoS_df_to_save[ 'day_of_failure'].fillna('') FoS_df_to_save['anomalous_failure'] = pd.Series( anomalous_failures_bool[i], index=FoS_df_to_save.index[[0]]) FoS_df_to_save['anomalous_failure'] = FoS_df_to_save[ 'anomalous_failure'].fillna('') full_point = test_points_gdf['geometry'][i] full_point_x = full_point.x full_point_y = full_point.y FoS_df_to_save.to_csv( f'{rundir}fos_timeseries_{full_point_y}_{full_point_x}.csv', index=False)
def process_herbage(herbage, scen, scenarios, grange): """ Primary function for processing grange """ # subset for the correct scenario herbage = herbage[herbage['scenario'] == scen] herbage['geometry'] = herbage.apply( lambda x: Point(x.longitude, x.latitude), axis=1) # obtain scenario parameters params = scenarios[scenarios['scenario'] == scen].iloc[0].to_dict() params = format_params(params) run_id, model_config, run_obj = gen_run(model_name, params) # generate temp CSV and push it to S3 herbage.to_csv("tmp_g.csv", index=False) time.sleep(1) try: s3_bucket.upload_file("tmp_g.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except Exception as e: print(e) print("Retrying file upload...") try: s3_bucket.upload_file("tmp_g.csv", run_obj['key'], ExtraArgs={'ACL': 'public-read'}) except: pass # Add metadata object to DB meta = Metadata( run_id=run_id, model=model_name, raw_output_link= f"https://model-service.worldmodelers.com/results/{model_name}_results/{run_id}.csv", run_label=herbage.description.iloc[0], point_resolution_meters=25000) db_session.add(meta) db_session.commit() # Add parameters to DB for param in grange['parameters']: # ensure that no null parameters are stored if not pd.isna(params[param['name']]): if param['metadata']['type'] == 'ChoiceParameter': p_type = 'string' elif param['name'] == 'fertilizer' or param[ 'name'] == 'sowing_window_shift': p_type = 'int' else: p_type = 'float' p_value = params[param['name']] param = Parameters(run_id=run_id, model=model_name, parameter_name=param['name'], parameter_value=p_value, parameter_type=p_type) db_session.add(param) db_session.commit() gdf = gpd.GeoDataFrame(herbage) gdf = gpd.sjoin(gdf, admin2, how="left", op='intersects') gdf['run_id'] = run_id gdf['model'] = model_name if 'geometry' in gdf: del (gdf['geometry']) del (gdf['index_right']) return gdf, run_id
# X COORDINATES drzx = cp.loc[:, 'Long'].values[0] # Y COORDINATES drzy = cp.loc[:, 'Lat'].values[0] # CRS SETUP - 4326 WGS crs_ = {'init': 'epsg:4326'} # FORMAT THE GEOMETRY COLUMN - SET OF POINTS geometry_ = [ Point(x, y) for x, y in zip(df_potvrdeni['Long'], df_potvrdeni['Lat']) ] # FORMAT THE GEODATAFRAME ( DATA FRAME, CRS, GEOMETRY COLUMN) geo_df = gpd.GeoDataFrame(df_potvrdeni, crs=crs_, geometry=geometry_) # LOAD THE SHP FILE gpdf = gpd.read_file( 'C:/Users/Desktop/my_projects/Covid_Cro/ne_10m_admin_0_countries.shp') # PLOT fig, ax = plt.subplots(figsize=(20, 18)) gpdf.plot(ax=ax) geo_df.plot(ax=ax, color='red', markersize=10) ax.annotate(t, xy=(drzx, drzy), xytext=(-45, 40), arrowprops={ 'width': 1, 'color': 'black',
def bbox2gdf(self): gdf = gpd.GeoDataFrame(geometry=[self.raster_bbox()], crs=self.prj.wkt) return gdf
SUM_2015_Pop_dist_TA=Malawi_2015_dist.groupby(['NAME_0','NAME_1','NAME_2'],as_index=False)[['sum']].sum() Pop_adj_Data=read_data_xlsx(Pop_adj_path,sheetname) Pop_adj_Data=Pop_adj_Data[['District','TA','2018_pop_adj', '2019_pop_adj', '2020_pop_adj', '2021_pop_adj', '2022_pop_adj', '2023_pop_adj']] Pop_adj_Data['District']=Pop_adj_Data['District'].str.title() SUM_2015_Pop_dist_TA=SUM_2015_Pop_dist_TA.merge(Pop_adj_Data,left_on=['NAME_1','NAME_2'],right_on=['District','TA'],how='left') SUM_2015_Pop_dist_TA.rename(columns={'sum':'MAX_SUM'},inplace=True) Forecast_Malawi_Pred=Malawi_2015_dist.merge(SUM_2015_Pop_dist_TA,on=['NAME_0','NAME_1','NAME_2']) Forecast_Malawi_Pred['Percent_Grid_TA']=(Forecast_Malawi_Pred['sum']/Forecast_Malawi_Pred['MAX_SUM'])*100 Forecast_Malawi_Pred.fillna({'sum':0,'Percent_Grid_TA':0},inplace=True); for column in Forecast_Malawi_Pred.columns: if column.endswith('adj'): Forecast_Malawi_Pred[column+'sumpp']=(Forecast_Malawi_Pred['Percent_Grid_TA']*Forecast_Malawi_Pred[column])/100 Forecast_selected_cols=Forecast_Malawi_Pred[['NAME_0', 'NAME_1', 'NAME_2', 'TYPE_2', 'Type', 'ENGTYPE_2', 'Grid_index', 'geometry', 'sum', 'MAX_SUM','Percent_Grid_TA', '2018_pop_adj', '2019_pop_adj', '2020_pop_adj', '2021_pop_adj', '2022_pop_adj', '2023_pop_adj', '2018_pop_adjsumpp', '2019_pop_adjsumpp', '2020_pop_adjsumpp', '2021_pop_adjsumpp', '2022_pop_adjsumpp', '2023_pop_adjsumpp',]].copy() Forecast_selected_cols.rename(columns={'NAME_0':'Country','NAME_1':'District','NAME_2':'TA_NAMES','sum':'2015_wordpop_adjsumpp','MAX_SUM':'2015_wordpop_adj','Percent_Grid_TA':'Pop_Percent_Grid'},inplace=True) #write_data_xlsx(Malawi_Distribution_CSV_Path,Forecast_selected_cols) Forecast_selected_cols.to_csv(Malawi_Distribution_CSV_Path) Forecast_geo_frame=gpd.GeoDataFrame(Forecast_selected_cols,geometry='geometry') Forecast_geo_frame.crs=({'init': 'epsg:4326'}) write_shape_data_file(Malawi_Distribution_Path,Forecast_geo_frame)
def vis_compare(data_zip, userinput, filepath, grid_shp, sea=None, roads=None, train=None, metro=None, compare_mod=[], create_shapefiles=True, visualisation=True, map_type='interactive', destination_style='grid', destination_color='yellow', roads_color='grey', metro_color='red', train_color='blue', classification='pysal_class', class_type="Quantiles", n_classes=8, multiples=[-2, -1, 1, 2], pct=0.1, hinge=1.5, truncate=True, pct_classes=[1, 10, 50, 90, 99, 100], class_lower_limit="", class_upper_limit="", class_step="", label_lower_limit="", label_upper_limit="", label_step=""): if create_shapefiles == False and visualisation == True and not compare_mod: raise AccessVizError( "When visualising, you have to specify the two travel modes to compare. Check the 'userinput' and include, two travel modes" ) if not userinput: raise AccessVizError( "You have not specified any travel time matrix to be merged with the grid. \n Check the parameter -'userinput'- and include a valid travel time matrix" ) if create_shapefiles == False and visualisation == False: raise AccessVizError( "You have not specified any action to create shapefiles or visualise. \n Check the parameters!. Either 'create_shapefiles' or 'visualisation' has to be True." ) grid_shp = grid_shp.to_crs(from_epsg(3067)) if roads is None: print('You have not included the roads route') else: roads = roads.to_crs(from_epsg(3067)) rdfsource = GeoJSONDataSource(geojson=roads.to_json()) # #Calculate the x and y coordinates of the roads (these contain MultiLineStrings). # roads['x'] = roads.apply(get_geom.getCoords, geom_col="geometry", coord_type="x", axis=1) # # roads['y'] = roads.apply(get_geom.getCoords, geom_col="geometry", coord_type="y", axis=1) # # # Include only coordinates from roads (exclude 'geometry' column) # rdf = roads[['x', 'y']] # #this two rows had nan values which prevented me from saving the plot. I got the error: # #ValueError: Out of range float values are not JSON compliant. # #therefore, I had to remove the two rows # rdf.drop(39, inplace=True) # rdf.drop(158, inplace=True) if train is None: print('You have not included the train route') else: train = train.to_crs(from_epsg(3067)) tdfsource = GeoJSONDataSource(geojson=train.to_json()) # train['x'] = train.apply(get_geom.getCoords, geom_col="geometry", coord_type="x", axis=1) # # train['y'] = train.apply(get_geom.getCoords, geom_col="geometry", coord_type="y", axis=1) # # # tdf = train[['x','y']] # tdfsource = ColumnDataSource(data=tdf) if metro is None: print('You have not included the metro route') else: metro = metro.to_crs(from_epsg(3067)) mdfsource = GeoJSONDataSource(geojson=metro.to_json()) # #Calculate the x and y coordinates of metro. # metro['x'] = metro.apply(get_geom.getCoords, geom_col="geometry", coord_type="x", axis=1) # # metro['y'] = metro.apply(get_geom.getCoords, geom_col="geometry", coord_type="y", axis=1) # # # Include only coordinates from metro (exclude 'geometry' column) # mdf = metro[['x','y']] # mdfsource = ColumnDataSource(data=mdf) if sea is None: print('You have not included the metro route') else: sea = sea.to_crs(from_epsg(3067)) sea_source = GeoJSONDataSource(geojson=sea.to_json()) namelist = data_zip.namelist() m_list = [] #iterate over the userinput, to get all its element/values for element in userinput: #concatenate the input with the standard names of the file element_file = ("HelsinkiRegion_TravelTimeMatrix2015/" + str(element)[0:4] + "xxx/travel_times_to_ " + str(element) + ".txt") #now, check if the file is in not namelist of all the files in the ziped folder. #if it is not, give the warning if element_file not in namelist: print("WARNING: The specified matrix {0} is not available". format(element)) print("\n") else: print("Matrix {0} is available".format(element)) m_list.append(element) #check for the progress print( "Processing file travel_times_to_{0}.txt.. Progress: {1}/{2}" .format(element, len([i for i in range(len(m_list))]), len(userinput))) #The above can also simply be done as below #slice the string. This is used for the following step, just #to know which of the matrix is presently being extracted. #f_slice=filename[44:] #print("processing file {0}.. Progress: {1}/{2}".format(f_slice,len([i for i in range(len(m_list))]), len(m_list))) bytes = data_zip.read(element_file) #print the file size print('has', len(bytes), 'bytes') print("\n") tt_matrices = pd.read_csv(element_file, sep=";") column_list = [i for i in tt_matrices.columns] absent_col = [i for i in compare_mod if i not in column_list] #find if any of the items of the listed transport modes is/are not column(s) in the matrix dataframe if any(x not in column_list for x in compare_mod): if len(absent_col) == 1: raise AccessVizError( "The specified travel mode", str(absent_col).strip('[]'), "is not available. Accepted travel modes include:", str([i for i in tt_matrices.columns ][2:]).strip('[]')) # break elif len(absent_col) > 1: raise AccessVizError( "The specified travel modes:", str(absent_col).strip('[]'), ", are not available. Accepted travel modes include:", str([i for i in tt_matrices.columns ][2:]).strip('[]')) # break else: if len(compare_mod) > 2: #userinput= [int(x) for x in input("list the ID-numbers you want to read and separate each by a comma(,): ").split(',')] raise AccessVizError( "WARNING: More than two travel modes are not allowed. Specify only two similar travel modes(i.e either distance or time but not both at thesame time)" ) # break elif len(compare_mod) == 2: if compare_mod[0] == compare_mod[1]: raise AccessVizError( "WARNING: You are comparing the same travel mode\n" ) # break elif compare_mod[0][-1] != compare_mod[1][-1]: raise AccessVizError( "WARNING!:You cannot compare Travel Distance with Travel Time!!!\n" ) # break elif len(compare_mod) == 1: raise AccessVizError( "WARNING: You have specified just one travel mode. \n One travel mode is not allowed. \n Specify two travel modes in the list" ) # break #This is done to handle matrices with nodata at all. e.g: matrix"6016696" if tt_matrices['to_id'].max() == -1: print('The MATRIX- {0} is empty and has nodata'.format( element)) print('\n') else: merged_metro = pd.merge(grid_shp, tt_matrices, left_on="YKR_ID", right_on="from_id") #check if list is empty. if not compare_mod and create_shapefiles == True: print( 'NOTE: You did not specify any travel mode. Therefore, only the travel time matrix', element, 'and the grid shapefile will be produced ') merged_metro.to_file(driver='ESRI Shapefile', filename=filepath + "/travel_times_to_" + str(element) + ".shp") else: mode1 = compare_mod[0] mode2 = compare_mod[1] tt_col = mode1 + '_vs_' + mode2 #Next I will calculate the difference but be mindful of the empty grids. #when either or both of the modes is/are empty, the resultant difference #should be nodata(i.e -1) #create an empty column to imput the mode difference merged_metro[tt_col] = "" mode1_vs_mode2 = [] for idx, rows in merged_metro.iterrows(): if rows[mode1] == -1 or rows[mode2] == -1: difference = -1 mode1_vs_mode2.append(difference) else: difference = rows[mode1] - rows[mode2] mode1_vs_mode2.append(difference) merged_metro[tt_col] = mode1_vs_mode2 # ============================================================================= # alternative # mode1_vs_mode2=[] # for i in range(len(data)): # print(i) # if data.loc[i, "pt_r_tt"]!=-1 or data.loc[i,"car_r_t"]!=-1: # dat= data["pt_r_tt"] - data["car_r_t"] # mode1_vs_mode2.append(dat) # elif data.loc[i, "pt_r_tt"]==-1 or data.loc[i,"car_r_t"]==-1: # dat=-1 # mode1_vs_mode2.append(dat) # data["pt_diff_car_tt"] = mode1_vs_mode2 # ============================================================================= if create_shapefiles == True: #now, export the result merged_metro.to_file(driver='ESRI Shapefile', filename=filepath + "/travel_times_to_" + tt_col + "_" + str(element) + ".shp") if visualisation == True: #However, for the visualisation, there is need to exclude the nodata grids with -1 merged_metro = merged_metro.loc[ merged_metro[tt_col] != -1] #Calculate the x and y coordinates of the grid. merged_metro['x'] = merged_metro.apply( get_geom.getCoords, geom_col="geometry", coord_type="x", axis=1) merged_metro['y'] = merged_metro.apply( get_geom.getCoords, geom_col="geometry", coord_type="y", axis=1) if classification == 'pysal_class': if class_type == "Natural_Breaks": classifier = ps.Natural_Breaks.make( k=n_classes) elif class_type == "Equal_Interval": classifier = ps.Equal_Interval.make( k=n_classes) elif class_type == "Box_Plot": classifier = ps.Box_Plot.make(hinge) elif class_type == "Fisher_Jenks": classifier = ps.Fisher_Jenks.make( k=n_classes) # elif class_type == "Fisher_Jenks_Sampled": # classifier = ps.Fisher_Jenks_Sampled.make(k=n_classes, pct=0.1) elif class_type == "HeadTail_Breaks": classifier = ps.HeadTail_Breaks.make( k=n_classes) elif class_type == "Jenks_Caspall": classifier = ps.Jenks_Caspall.make( k=n_classes) elif class_type == "Jenks_Caspall_Forced": classifier = ps.Jenks_Caspall_Forced.make( k=n_classes) elif class_type == "Quantiles": classifier = ps.Quantiles.make(k=n_classes) elif class_type == "Percentiles": classifier = ps.Percentiles.make( pct_classes) elif class_type == "Std_Mean": classifier = ps.Std_Mean.make(multiples) mode_classif = merged_metro[[ tt_col ]].apply(classifier) #Rename the columns of our classified columns. mode_classif.columns = [tt_col + "_ud"] #Join the classes back to the main data. merged_metro = merged_metro.join(mode_classif) merged_metro['label_' + tt_col] = mode_classif elif classification == "User_Defined": #Next, we want to classify the travel times with 5 minute intervals until 200 minutes. #Let’s create a list of values where minumum value is 5, maximum value is 200 and step is 5. breaks = [ x for x in range(class_lower_limit, class_upper_limit, class_step) ] #Now we can create a pysal User_Defined classifier and classify our travel time values. classifier = ps.User_Defined.make(bins=breaks) #walk_classif = data[['walk_t']].apply(classifier) mode_classif = merged_metro[[ tt_col ]].apply(classifier) #Rename the columns of our classified columns. mode_classif.columns = [tt_col + "_ud"] #walk_classif.columns = ['walk_t_ud'] #Join the classes back to the main data. merged_metro = merged_metro.join(mode_classif) #data = data.join(walk_classif) #Create names for the legend (until 60 minutes). The following will produce: ["0-5", "5-10", "10-15", ... , "60 <"]. # names = [ "%s-%s" % (x - label_step, x) for x in range(label_lower_limit, label_upper_limit, label_step) ] # ["{0}kk{1}".format(x-5,x) for x in range(5, 200, 5)] #alternative #Add legend label for over 60. names.append("%s<" % label_upper_limit) #Assign legend names for the classes. #data['label_wt'] = None merged_metro['label_' + tt_col] = None #Update rows with the class-names. for i in range(len(names)): merged_metro.loc[merged_metro[tt_col + "_ud"] == i, 'label_' + tt_col] = names[i] #Update all cells that didn’t get any value with "60 <" #data['label_wt'] = data['label_wt'].fillna("%s <" % upper_limit) merged_metro['label_' + tt_col] = merged_metro[ 'label_' + tt_col].fillna( "%s<" % label_upper_limit) #Finally, we can visualize our layers with Bokeh, add a legend for travel times #and add HoverTools for Destination Point and the grid values (travel times). # Select only necessary columns for our plotting to keep the amount of data minumum #df = data[['x', 'y', 'walk_t','walk_t_ud', 'car_r_t','car_r_t_ud', 'from_id', 'label_wt', "label_car"]] df = merged_metro[[ 'x', 'y', "YKR_ID", mode1, mode2, tt_col, tt_col + "_ud", "from_id", 'label_' + tt_col ]] dfsource = ColumnDataSource(data=df) # dfsource = GeoJSONDataSource(geojson=merged_metro.to_json()) df_dest_id = merged_metro.loc[ merged_metro['YKR_ID'] == element] dfsource_dest_id = ColumnDataSource( data=df_dest_id) # dfsource_dest_id = GeoJSONDataSource(geojson=df_dest_id.to_json()) # Specify the tools that we want to use TOOLS = "pan,wheel_zoom,box_zoom,reset,save" # Flip the colors in color palette palette2.reverse() color_mapper = LogColorMapper(palette=palette2) #color_mapper = ContinuousColorMapper(palette=palette4) #This part is for automating the title list_of_titles = [ "walk_t: Travel time in minutes from origin to destination by walking", "walk_d: Distance in meters of the walking route", "pt_r_tt: Travel time in minutes from origin to destination by public transportation in rush hour traffic(including waiting time at home)", "pt_r_t: Travel time in minutes from origin to destination by public transportation in rush hour traffic(excluding waiting time at home)", "pt_r_d: Distance in meters of the public transportation route in rush hour traffic", "pt_m_tt: Travel time in minutes to destination by public transportation in midday traffic(including waiting time at home)", "pt_m_t: Travel time in minutes from origin to destination by public transportation in midday traffic(excluding waiting time at home)", "pt_m_d: Distance in meters of the public transportation route in midday traffic", "car_r_t: Travel time in minutes from origin to destination by private car in rush hour traffic", "car_r_d: Distance in meters of the private car route in rush hour traffic", "car_m_t: Travel time in minutes from origin to destination by private car in midday traffic", "car_m_d: Distance in meters of the private car route in midday traffic" ] title_mod1 = list_of_titles[ tt_matrices.columns.get_loc(mode1) - 2] title_mod2 = list_of_titles[ tt_matrices.columns.get_loc(mode2) - 2] index = title_mod1.find('destination') title_mat = title_mod1[:index + len( 'destination')] + ' ' + str( element) + title_mod1[index + len('destination'):] index_mode1 = title_mat.find(mode1 + str(':')) if 'destination' in title_mod1: if mode2[:2] == 'pt': title_matrix = title_mat[:index_mode1 + len( mode1 )] + ' vs ' + mode2 + ( ': Difference between' + title_mat[index_mode1 + len(mode1) + 1:] + ' vs ' + title_mod2[title_mod2.find('public'):] ).title() elif mode2[:4] == 'walk': title_matrix = title_mat[:index_mode1 + len( mode1 )] + ' vs ' + mode2 + ( ': Difference between' + title_mat[index_mode1 + len(mode1) + 1:] + ' vs ' + title_mod2[title_mod2.find('walking'):] ).title() elif mode2[:3] == "car": title_matrix = title_mat[:index_mode1 + len( mode1 )] + ' vs ' + mode2 + ( ': Difference between' + title_mat[index_mode1 + len(mode1) + 1:] + ' vs ' + title_mod2[title_mod2.find('private'):] ).title() #here, for the title. i got the location of the specified travel mode(tt_col), then, with its # with its index, i got the corresponsding location in the list which was arranged according to the # to the columns of the dataframe(tt_matrices) too. 2 is subracted(i.e -2) because, the list_of_titles # is shorter by 2, as it does not include from_id or to_id which are not variables of interest here but the travel modes only. elif 'Distance' in title_mod1: title_mat = title_mod1 + ' to ' + str(element) if mode2[:2] == 'pt': title_matrix = title_mat[:index_mode1 + len( mode1 )] + ' vs ' + mode2 + ( ': Difference between' + title_mat[index_mode1 + len(mode1) + 1:] + ' vs ' + title_mod2[title_mod2.find('public'):] ).title() elif mode2[:4] == 'walk': title_matrix = title_mat[:index_mode1 + len( mode1 )] + ' vs ' + mode2 + ( ': Difference between' + title_mat[index_mode1 + len(mode1) + 1:] + ' vs ' + title_mod2[title_mod2.find('walking'):] ).title() elif mode2[:3] == "car": title_matrix = title_mat[:index_mode1 + len( mode1 )] + ' vs ' + mode2 + ( ': Difference between' + title_mat[index_mode1 + len(mode1) + 1:] + ' vs ' + title_mod2[title_mod2.find('private'):] ).title() if map_type == 'interactive': p = figure(title=tt_col, tools=TOOLS, plot_width=850, plot_height=650, active_scroll="wheel_zoom") #p.title.text=title_matrix p.title.text_color = "blue" p.title.text_font = "times" p.title.text_font_style = "italic" p.title.text_font_size = '20px' p.title.offset = -5.0 p.add_layout( Title(text=title_matrix[len(tt_col) + 1:][211:], text_font_size="11pt", text_font_style="bold"), 'above') #sub p.add_layout( Title(text=title_matrix[len(tt_col) + 1:][102:211], text_font_size="11pt", text_font_style="bold"), 'above') #sub p.add_layout( Title(text=title_matrix[len(tt_col) + 1:][:102], text_font_size="11pt", text_font_style="bold"), 'above') #main # This can be used if you want a more generalised title # differentiating just travel times and distances and not the meanas. # if tt_col[-1]== 't': # p = figure(title="Travel times to The Grid", tools=TOOLS, # plot_width=800, plot_height=650, active_scroll = "wheel_zoom" ) # elif tt_col[-1]== 'd': # p = figure(title="Travel distances to The Grid", tools=TOOLS, # plot_width=800, plot_height=650, active_scroll = "wheel_zoom" ) # # Do not add grid line p.grid.grid_line_color = None if sea is not None: #add water s = p.patches('xs', 'ys', source=sea_source, color='#6baed6', legend='Sea') # Add polygon grid and a legend for it grid = p.patches('x', 'y', source=dfsource, name="grid", fill_color={ 'field': tt_col + "_ud", 'transform': color_mapper }, fill_alpha=1.0, line_color="black", line_width=0.03, legend='label_' + tt_col) if roads is not None: # Add roads #for GeoJSONDataSource xs and ys are used instead of x and y if I had used the normal way in bokeh r = p.multi_line('xs', 'ys', source=rdfsource, color=roads_color, legend="roads") if metro is not None: # Add metro m = p.multi_line('xs', 'ys', source=mdfsource, color=metro_color, line_dash='solid', legend="metro") #other line dash option: 'solid' ,'dashed','dotted','dotdash','dashdot' if train is not None: # Add train tr = p.multi_line('xs', 'ys', source=tdfsource, line_cap='butt', line_width=2, line_dash='dashdot', color=train_color, legend="train") # Modify legend location p.legend.location = "top_right" p.legend.orientation = "vertical" ghover = HoverTool(renderers=[grid]) ghover.tooltips = [("YKR-ID", "@from_id"), (mode1, "@" + mode1), (mode2, "@" + mode2), (mode1 + " minus " + mode2, "@" + tt_col)] p.add_tools(ghover) # Insert a circle on top of the location(coords in EurefFIN-TM35FIN) #print(element) #because, it is a grid, the location of each cell has about s x and #y coordinates, hence, after finding the x for each grid, select #one of the x and y coordinates(the third, which is the centre of each grid) from the list. #dest_grid_x = (df.loc[df["YKR_ID"]==element, 'x'].values[0])[2] #dest_grid_y = (df.loc[df["YKR_ID"]==element, 'y'].values[0])[2] #Alternative to getting the centre of a grid: grid_centroid = merged_metro.loc[ merged_metro['YKR_ID'] == element, 'geometry'].values[0].centroid dest_grid_x = grid_centroid.x dest_grid_y = grid_centroid.y if destination_style == 'circle': # Add two separate hover tools for the data circle = p.circle(x=[dest_grid_x], y=[dest_grid_y], name="point", size=7, color=destination_color, legend='Destination') phover = HoverTool(renderers=[circle]) phover.tooltips = [("Destination Grid:", str(element))] p.add_tools(phover) elif destination_style == 'grid': grid_dest_id = p.patches( 'x', 'y', source=dfsource_dest_id, name='grid', color=destination_color) ghover_dest_id = HoverTool( renderers=[grid_dest_id]) ghover_dest_id.tooltips = [ ("DESTINATION GRID", str(element)) ] p.add_tools(ghover_dest_id) # Output filepath to HTML # Save the map save( p, filepath + "/" + mode1 + "_vs_" + mode2 + "_" + str(element) + ".html") elif map_type == 'static': my_map = merged_metro.plot(column=tt_col, linewidth=0.02, legend=True, cmap="RdYlGn", scheme=class_type, k=n_classes, alpha=0.9) if roads is not None: # Add roads on top of the grid # (use ax parameter to define the map on top of which the second items are plotted) roads.plot(ax=my_map, color=roads_color, legend=True, linewidth=1.2) if metro is not None: # Add metro on top of the previous map metro.plot(ax=my_map, color=metro_color, legend=True, linewidth=2.0) if train is not None: # Add metro on top of the previous map train.plot(ax=my_map, color=train_color, legend=True, linewidth=2.0) ## Insert a circle on top of the Central Railway Station (coords in EurefFIN-TM35FIN) dest_grid_x = (df.loc[df["YKR_ID"] == element, 'x'].values[0])[2] dest_grid_y = (df.loc[df["YKR_ID"] == element, 'y'].values[0])[2] dest_grid = gpd.GeoDataFrame() dest_grid_loc = Point(dest_grid_x, dest_grid_y) dest_grid["geometry"] = "" dest_grid.loc[1, "geometry"] = dest_grid_loc #r_s["geometry"]=r_s["geometry"].to_crs(crs=gridCRS) dest_grid.plot(ax=my_map, color="blue", legend=True, linewidth=1.5) #plt.legend(["roads", "metro line","Rautatientori"]) #title_map=list_of_titles[tt_matrices.columns.get_loc(tt_col) - 2] plt.title(textwrap.fill(title_matrix, 65), fontsize=8) #north arrow in the southeastern corner my_map.text(x=df['x'].max()[2], y=df['y'].min()[2], s='N\n^', ha='center', fontsize=23, family='Courier new', rotation=0) #move legend to avoid overlapping the map lege = my_map.get_legend() lege.set_bbox_to_anchor((1.60, 0.9)) #resize the map to fit in thr legend. mapBox = my_map.get_position() my_map.set_position([ mapBox.x0, mapBox.y0, mapBox.width * 0.6, mapBox.height * 0.9 ]) my_map.legend(loc=2, prop={'size': 3}) # plt.show() # Save the figure as png file with resolution of 300 dpi outfp = filepath + "/" + "static_map_" + mode1 + "_vs_" + mode2 + "_" + str( element) + ".png" plt.savefig(outfp, dpi=300) #put into an object the inputs are not in the matrix list(i.e which of the specified is not in the zipped matrices) absentinput = [i for i in userinput if i not in m_list] #check if all of the imputed values does not exist if len(absentinput) == len(userinput): print("all the inputs do not exist") #check for those that are not included in the matrices elif any(absentinput) not in m_list: #warn that they do not exist print("WARNING: ", (str(absentinput)).strip('[]'), "are not available in the matrices") #check how many of them are not in the matrices print(len(absentinput), "of the inputs are not included in the matrices") print("\n") merged_files = [i for i in userinput if i in m_list] if not compare_mod: if len(userinput) == 1: print( "NOTE: You have not specified the travel modes to compare, hence, the merged shapefile", str(merged_files).strip("[]"), "alone was produced") elif len(userinput) > 1: print( "NOTE: You have not specified the travel modes to compare, hence, the merged shapefiles- {0} -alone were produced" .format(str(merged_files).strip("[]")))
def overpass_pois(bounds, facilities=None, custom_query=None): ''' Download POIs using Overpass API. Parameters ---------- bounds: array_like Input bounds for query. Follows [minx,miny,maxx,maxy] pattern. facilities: str. One of {'food', 'health', 'education', 'finance'} Type of facilities to download according to HOTOSM types. Based on this a different type of query is constructed. custom_query: str (Optional). Default None. String with custom Overpass QL query (See https://wiki.openstreetmap.org/wiki/Overpass_API/Language_Guide). If this parameter is diferent than None, bounds and facilities values are ignored. Returns ------- gdf: GeoDataFrame POIs from the selected type of facility. If 'custom_query' is given response is returned instead of gdf. response: request.Response Returned only if 'custom_query' is given. Contains the server's response to the HTTP request from the Overpass API Server. Examples -------- >>> lima = nominatim_osm('Lima, Peru', 2) >>> urbanpy.download.overpass_pois(lima.total_bounds, 'health') type | id | lat | lon | tags | geometry | poi_type node | 367826732 | -0.944005 | -80.733941 | {'amenity': 'pharmacy', 'name': 'Fybeca'} | POINT (-80.73394 -0.94401) | pharmacy node | 367830051 | -0.954086 | -80.742420 | {'amenity': 'hospital', 'emergency': 'yes', 'n... | POINT (-80.74242 -0.95409) | hospital node | 367830065 | -0.954012 | -80.741554 | {'amenity': 'hospital', 'name': 'Clínica del S... | POINT (-80.74155 -0.95401) | hospital node | 367830072 | -0.953488 | -80.740739 | {'amenity': 'hospital', 'name': 'Clínica Cente... | POINT (-80.74074 -0.95349) | hospital node | 3206491590| -1.040708 | -80.665107 | {'amenity': 'hospital', 'name': 'Clínica Monte... | POINT (-80.66511 -1.04071) | hospital ''' minx, miny, maxx, maxy = bounds bbox_string = f'{minx},{miny},{maxx},{maxy}' overpass_url = "http://overpass-api.de/api/interpreter" facilities_opt = { 'food': 'node["amenity"="marketplace"];\nnode["shop"~"supermarket|kiosk|mall|convenience|butcher|greengrocer"];', 'health': 'node["amenity"~"doctors|dentist|clinic|hospital|pharmacy"];', 'education': 'node["amenity"~"kindergarten|school|college|university"];', 'finance': 'node["amenity"~"mobile_money_agent|bureau_de_change|bank|microfinance|atm|sacco|money_transfer|post_office"];', } if custom_query is None: overpass_query = f""" [timeout:120][out:json][bbox]; ( {facilities_opt[facilities]} ); out body geom; """ # Request data response = requests.get(overpass_url, params={ 'data': overpass_query, 'bbox': bbox_string }) data = response.json() df = pd.DataFrame.from_dict(data['elements']) df_geom = gpd.points_from_xy(df['lon'], df['lat']) gdf = gpd.GeoDataFrame(df, geometry=df_geom) gdf['poi_type'] = gdf['tags'].apply( lambda tag: tag['amenity'] if 'amenity' in tag.keys() else np.NaN) if facilities == 'food': # Food facilities also have its POI type wthin the shop tag (See query) also_poi_type = gdf['tags'].apply( lambda tag: tag['shop'] if 'shop' in tag.keys() else np.NaN) gdf['poi_type'] = gdf['poi_type'].fillna(also_poi_type) return gdf else: response = requests.get(overpass_url, params={ 'data': custom_query, 'bbox': bbox_string }) return response
def decomposeSite(deims_site, admin_zones, zone_id, zone_name, debug=False): """Decompose a site by administrative zones. deims_site: site to decompose (gpd.GeoDataFrame) admin_zones: admin zones/regions to break deims_site into (gpd.GeoDataFrame) zone_id: column name of the zone ID in admin_zones (str) zone_name: column name of the zone name in admin_zones (str) debug: whether or not to plot the results for visual checking (bool) Returns the resulting GDF. """ # convert CRS of dataset to match admin zones # this will be the CRS of the output GDF deims_site = deims_site.to_crs(admin_zones.crs) # check which zones intersect the LTSER site ltser_zones = gpd.overlay(deims_site,admin_zones,how='intersection') # add original zones for area comparison, setting correct geometry # this is necessary to align the comparison geometry correctly: # comparing straight away with admin_zones.geometry.area doesn't # align the rows correctly ltser_zones = pd.merge(ltser_zones,admin_zones,on=zone_id) ltser_zones = ltser_zones.set_geometry('geometry_x') # add intersection area/zone area as new column # full_areas definition necessary because pd.merge only allows for one geoseries full_areas = gpd.GeoSeries(ltser_zones['geometry_y']) ltser_zones['intersection_ratio'] = ltser_zones.geometry.area/full_areas.area # construct GDF of cropped zones + ratio of area intersection gdf_out = gpd.GeoDataFrame( { 'zone_id': ltser_zones[zone_id].astype('string'), 'zone_name': ltser_zones[zone_name+'_x'].astype('string'), 'geometry': ltser_zones['geometry_x'], 'area_ratio': ltser_zones['intersection_ratio'] }, crs = ltser_zones.crs ) # optional visual check of intersection adds full geometry of zones if debug: # add and set full geometry gdf_out['debug_geometry'] = ltser_zones['geometry_y'] gdf_out = gdf_out.set_geometry('debug_geometry') # plot overlap - no need to return object since plots directly to (presumably) stdout fig, ax = plt.subplots(figsize = (10,10)) ax.set_axis_off() ax.set_title('Zones (blue) intersecting LTSER site (red)') gdf_out.plot(ax=ax) deims_site.boundary.plot(color='r',ax=ax) # drop debug_geometry column so output is identical to non-debug gdf_out.drop(columns='debug_geometry',inplace=True) gdf_out = gdf_out.set_geometry('geometry') return gdf_out else: return gdf_out
data[year] = msoa_2011.set_index('MSOA11CD')[[ 'geometry', 'RGN11NM' ]].join(data[year].set_index('MSOA11CD'), how='left') data[year] = data[year].loc[(data[year]['RGN11NM'] != 'Scotland') & (data[year]['RGN11NM'] != 'Northern Ireland') & (data[year]['RGN11NM'] != 'Wales')] # try different product categories new_cat = {} cat_dict = pd.read_excel( eval("r'" + data_directory + "/data/processed/LCFS/Meta/lcfs_desc_anne&john.xlsx'")) cats = cat_dict[['category_2']].drop_duplicates()['category_2'] cat_dict['ccp_code'] = [x.split(' ')[0] for x in cat_dict['ccp']] cat_dict = dict(zip(cat_dict['ccp_code'], cat_dict['category_2'])) for year in range(2007, 2018): new_cat[year] = data[year].rename(columns=cat_dict).sum(axis=1, level=0) new_cat[year] = gpd.GeoDataFrame(new_cat[year], geometry='geometry') new_cat_all = pd.DataFrame(columns=new_cat[2017].columns) for year in range(2007, 2018): temp = cp.copy(new_cat[year]) temp['year'] = year new_cat_all = new_cat_all.append(temp) new_cat_all = gpd.GeoDataFrame(new_cat_all, geometry='geometry') new_cat_all.to_file(eval("r'" + data_directory + "/data/processed/new_cat_for_gwr.shp'"), driver='ESRI Shapefile')
def complete_unique_geoms(): # output unique geometries and sum of all # project locations associated with that geometry unique_geo_df = gpd.GeoDataFrame() if active_data.size > 0: unique_active_data = active_data.loc[ active_data.geom_val != "None"].copy(deep=True) if active_data.size > 0 and unique_active_data.size > 0: # creating geodataframe geo_df = gpd.GeoDataFrame() # location id geo_df["project_location_id"] = unique_active_data["project_location_id"] geo_df["project_location_id"].fillna(unique_active_data["project_id"], inplace=True) geo_df["project_location_id"] = geo_df["project_location_id"].astype(str) # assuming even split of total project dollars is "max" dollars # that project location could receive geo_df["dollars"] = unique_active_data["adjusted_val"] # geometry for each project location geo_df["geometry"] = gpd.GeoSeries(unique_active_data["geom_val"]) # # write full to geojson # full_geo_json = geo_df.to_json() # full_geo_file = open(dir_working + "/full.geojson", "w") # json.dump(json.loads(full_geo_json), full_geo_file, indent=4) # full_geo_file.close() # string version of geometry used to determine duplicates geo_df["str_geo_hash"] = geo_df["geometry"].astype(str).apply( lambda z: str_sha1_hash(z)) # create and set unique index geo_df['index'] = range(0, len(geo_df)) geo_df = geo_df.set_index('index') # group project locations by geometry using str_geo_hash field # and for each unique geometry get the sum of dollars for # all project locations with that geometry sum_unique = geo_df.groupby(by='str_geo_hash')['dollars'].sum() # get count of locations for each unique geom geo_df['ones'] = 1 #(pd.Series(np.ones(len(geo_df)))).values sum_count = geo_df.groupby(by='str_geo_hash')['ones'].sum() # create list of project location ids for unique geoms cat_plids = geo_df.groupby(by='str_geo_hash')['project_location_id'].apply( lambda z: '|'.join(list(z))) # temporary dataframe with # unique geometry # location_count # dollar sums # which can be used to merge with original geo_df dataframe tmp_geo_df = gpd.GeoDataFrame() tmp_geo_df['unique_dollars'] = sum_unique tmp_geo_df['location_count'] = sum_count tmp_geo_df['project_location_ids'] = cat_plids tmp_geo_df['str_geo_hash'] = tmp_geo_df.index # merge geo_df with tmp_geo_df new_geo_df = geo_df.merge(tmp_geo_df, how='inner', on="str_geo_hash") # drops duplicate rows new_geo_df.drop_duplicates(subset="str_geo_hash", inplace=True) # gets rid of str_geo_hash column new_geo_df.drop('str_geo_hash', axis=1, inplace=True) # create final output geodataframe with index, unique_dollars # and unique geometry # unique_geo_df = gpd.GeoDataFrame() unique_geo_df["geometry"] = gpd.GeoSeries(new_geo_df["geometry"]) unique_geo_df["unique_dollars"] = new_geo_df["unique_dollars"] unique_geo_df["location_count"] = new_geo_df["location_count"] unique_geo_df["project_location_ids"] = new_geo_df["project_location_ids"] # unique_geo_df['index'] = range(len(unique_geo_df)) # write unique to geojson unique_geo_json = unique_geo_df.to_json() unique_geo_file = open(dir_working + "/unique.geojson", "w") json.dump(json.loads(unique_geo_json), unique_geo_file, indent=4) unique_geo_file.close()
def exportShapeFile(geo_data , frame,frame1): #creates a geoDataFrome from the DataFrame. the column names are specific to SWNewMexico BHT Geothermal data. need to find a way to automate selecting columns #have the user select cross refference lookup table not all datasets will have headers so having set variables will be more secure frame1.destroy() #frame.geometry("600x400") frame2 = Frame(frame) #frame2.pack_propagate(False) frame2.pack() frame3 = Frame(frame,width = 800) #frame3.pack_propagate(0) frame3.pack() scroll = Scrollbar(frame2,orient="horizontal") scroll.pack(side=TOP,pady=20,fill = X) hold = [] lab = ttk.Treeview(frame2) lab.pack(side = TOP,pady=20) for i in range(len(geo_data.columns)): hold.append(i+1) lab["columns"] = (hold) for i in range(len(geo_data.columns)): lab.column(str(i+1),minwidth=100, anchor = 'c') lab.heading(str(hold[i]), text = str(hold[i])) for i in range(0,10): lab.insert("",tk.END ,values = list(geo_data.iloc[i,:])) scroll.config(command=lab.xview) lab.config(xscrollcommand = scroll.set) entlab1= Label(frame3, text = "please enter the column # for latitude") entlab1.grid(row=2,column=0) ent1 = Entry(frame3) ent1.grid(row=3,column=0) entlab2= Label(frame3, text = "please enter the column # for longitude") entlab2.grid(row=4,column=0) ent2 = Entry(frame3) ent2.grid(row=5,column=0) bttn = tk.Button(frame3, text = 'Enter', command = lambda : get_columns_from(ent1,ent2,X_COLUMNY_COLUMN,frame)) bttn.grid(row = 6,column = 0) frame.mainloop() latHold = int(X_COLUMNY_COLUMN[1]) latHold = latHold -1 longHold = int(X_COLUMNY_COLUMN[0]) longHold = longHold - 1 geo_gdf = gpd.GeoDataFrame(geo_data, geometry = gpd.points_from_xy(geo_data.iloc[:,int(latHold)],geo_data.iloc[:,int(longHold)])) # scale = Scale(frame, label = "dataset brief",from = 0, to = 100, command = getValue, orient="horizontal" ) #scale.pack(fill = "x") geo_gdf.plot() ESRI_WKT = 'PROJCS["NAD83_HARN_New_Mexico_West",GEOGCS["GCS_NAD83(HARN)",DATUM["D_North_American_1983_HARN",SPHEROID["GRS_1980",6378137,298.257222101]],PRIMEM["Greenwich",0],UNIT["Degree",0.017453292519943295]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",31],PARAMETER["central_meridian",-107.8333333333333],PARAMETER["scale_factor",0.999916667],PARAMETER["false_easting",830000],PARAMETER["false_northing",0],UNIT["Meter",1]]' #needs to incorporate projection file. epsg code or WKT well known text code espg.io file_save = filedialog.asksaveasfilename(initialdir = '/') geo_gdf.to_file(filename =file_save, driver = 'ESRI Shapefile', crs_wkt = ESRI_WKT ) print("Made it here!!") frame2.destroy(); frame3.destroy();
import json import yaml import configparser import os import io import pandas as pd import geopandas as gpd from shapely.geometry import Point LIMITIPATH = sys.argv[4] regioni = gpd.read_file( LIMITIPATH + "/Limiti01012019_g/Reg01012019_g/Reg01012019_g_WGS84.shp") regioni = gpd.GeoDataFrame(regioni) regioni.crs = 'epsg:23032' regioni = regioni.to_crs('epsg:4326') province = gpd.read_file( LIMITIPATH + "/Limiti01012019_g/ProvCM01012019_g/ProvCM01012019_g_WGS84.shp") province = gpd.GeoDataFrame(province) province.crs = 'epsg:23032' province = province.to_crs('epsg:4326') FILTER_LABELS = ("Accettato", ) try: config = configparser.RawConfigParser()
parametros = { "1.1": 3, #"1.52": 9, #"1.8": 15 } canchasList = [] caminitos = [] sensProp = 1.1 lengthComp = 3 data = gpd.read_file(fp) caminos = data[data.TIPOUSO == "FCAM"] caminos = caminos["geometry"] caminos = gpd.GeoSeries(caminos) saltados = gpd.GeoDataFrame(columns=['puntoMarcado'], geometry='puntoMarcado') print("length comp: ", lengthComp) start = time.time() separateTol = 25 saltadosCount = 0 caminoCount = 1 anchosCanchas = [] canchas = [] canchas, _ = detCanchasAncho(caminos, lengthComp, sensProp) sepCanchas = postJuntarPuntos(canchas) centroides = gpd.GeoDataFrame(columns=["centro"], geometry="centro")
'crop_concentrations.csv') subprocess.run(["gdal2xyz.py", '-csv', fpath, outCSVName]) '''Load points and convert to geodataframe with coordinates''' load_points = pd.read_csv(outCSVName, header=None, names=['x', 'y', 'tons'], index_col=None) load_points = load_points[load_points['tons'] > 0] geometry = [ Point(xy) for xy in zip(load_points.x, load_points.y) ] load_points = load_points.drop(['x', 'y'], axis=1) crs = {'init': 'epsg:4326'} crop_points = gpd.GeoDataFrame(load_points, crs=crs, geometry=geometry) del load_points # clip all to province prov_crop = gdf_geom_clip(crop_points, province_geom) if len(prov_crop.index) > 0: prov_crop_sindex = prov_crop.sindex prov_crop['NEAREST_G_NODE'] = prov_crop.geometry.apply( lambda x: get_nearest_node(x, sindex_nodes, nodes, 'NODE_ID')) sindex_commune_center = prov_commune_center.sindex prov_crop['NEAREST_C_CENTER'] = prov_crop.geometry.apply( lambda x: get_nearest_node(x, sindex_commune_center,
import pyposeidon.grid as pg import numpy as np import pytest import os import geopandas as gp import cartopy.feature as cf cr = "i" coast = cf.NaturalEarthFeature(category="physical", name="land", scale="{}m".format({"l": 110, "i": 50, "h": 10}[cr])) natural_earth = gp.GeoDataFrame(geometry=[x for x in coast.geometries()]) coast = cf.GSHHSFeature(scale="auto", levels=[1]) GSHHS = gp.GeoDataFrame(geometry=[x for x in coast.geometries()]) # define the lat/lon window and time frame of interest window0 = {"lon_min": -30, "lon_max": -10.0, "lat_min": 60.0, "lat_max": 70.0} window1 = {"lon_min": 175.0, "lon_max": 184.0, "lat_min": -21.5, "lat_max": -14.5} # lat/lon window window2 = {"lon_min": -20.0, "lon_max": -10.0, "lat_min": 63.0, "lat_max": 67.0} window3 = {"lon_min": 175.0 - 360.0, "lon_max": 184.0 - 360.0, "lat_min": -21.5, "lat_max": -14.5} # lat/lon window window4 = {"lon_min": -25.0, "lon_max": -10.0, "lat_min": 60.0, "lat_max": 68.0}