def clean(city): inpath = os.path.join(BASE, city, 'standardized') outpath = os.path.join(BASE, city, 'clean') if not os.path.exists(outpath): os.mkdir(outpath) streets = gpd.read_file(os.path.join(inpath, 'streets.shp')) sidewalks = gpd.read_file(os.path.join(inpath, 'sidewalks.shp')) click.echo('Assigning sidewalk side to streets...') sidewalks = sidewalk_clean.sw_tag_streets(sidewalks, streets) click.echo('Drawing sidewalks...') sidewalks = sidewalk_clean.redraw_sidewalks(sidewalks, streets) click.echo('Cleaning with street buffers...') sidewalks, buffers = sidewalk_clean.buffer_clean(sidewalks, streets) click.echo('Sanitizing sidewalks...') sidewalks = sidewalk_clean.sanitize(sidewalks) click.echo('Snapping sidewalk ends...') sidewalks = sidewalk_clean.snap(sidewalks, streets) click.echo('Writing to file...') streets.to_file(os.path.join(outpath, 'streets.shp')) sidewalks.to_file(os.path.join(outpath, 'sidewalks.shp')) # FIXME: curbramps should go through its own standardization/cleanup # workflow for path in os.listdir(inpath): filename = os.path.basename(path) if path.split(os.extsep, 1)[0] == 'curbramps': shutil.copy2(os.path.join(inpath, path), os.path.join(BASE, city, 'clean', filename))
def test_overlay_nybb(how): polydf = read_file(geopandas.datasets.get_path('nybb')) # construct circles dataframe N = 10 b = [int(x) for x in polydf.total_bounds] polydf2 = GeoDataFrame( [{'geometry': Point(x, y).buffer(10000), 'value1': x + y, 'value2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=polydf.crs) result = overlay(polydf, polydf2, how=how) cols = ['BoroCode', 'BoroName', 'Shape_Leng', 'Shape_Area', 'value1', 'value2'] if how == 'difference': cols = cols[:-2] # expected result if how == 'identity': # read union one, further down below we take the appropriate subset expected = read_file(os.path.join( DATA, 'nybb_qgis', 'qgis-union.shp')) else: expected = read_file(os.path.join( DATA, 'nybb_qgis', 'qgis-{0}.shp'.format(how))) # The result of QGIS for 'union' contains incorrect geometries: # 24 is a full original circle overlapping with unioned geometries, and # 27 is a completely duplicated row) if how == 'union': expected = expected.drop([24, 27]) expected.reset_index(inplace=True, drop=True) # Eliminate observations without geometries (issue from QGIS) expected = expected[expected.is_valid] expected.reset_index(inplace=True, drop=True) if how == 'identity': expected = expected[expected.BoroCode.notnull()].copy() # Order GeoDataFrames expected = expected.sort_values(cols).reset_index(drop=True) # TODO needed adaptations to result result = result.sort_values(cols).reset_index(drop=True) if how in ('union', 'identity'): # concat < 0.23 sorts, so changes the order of the columns # but at least we ensure 'geometry' is the last column assert result.columns[-1] == 'geometry' assert len(result.columns) == len(expected.columns) result = result.reindex(columns=expected.columns) assert_geodataframe_equal(result, expected, check_crs=False, check_column_type=False,)
def setUp(self): path_to_shp = examples.get_path('streets.shp') gdf = geopandas.read_file(path_to_shp) self.ntw = network.Network(in_data=gdf) for obs in ['schools', 'crimes']: path_to_shp = examples.get_path('{}.shp'.format(obs)) in_data = geopandas.read_file(path_to_shp) self.ntw.snapobservations(in_data, obs, attribute=True) setattr(self, obs, self.ntw.pointpatterns[obs])
def test_read_paths(self): gdf = read_file(get_path('naturalearth_lowres')) assert isinstance(gdf, GeoDataFrame) gdf = read_file(get_path('naturalearth_cities')) assert isinstance(gdf, GeoDataFrame) gdf = read_file(get_path('nybb')) assert isinstance(gdf, GeoDataFrame)
def setUp(self): path_to_shp = examples.get_path('streets.shp') gdf = geopandas.read_file(path_to_shp) self.ntw = spgh.Network(in_data=gdf) pt_str = 'crimes' path_to_shp = examples.get_path('{}.shp'.format(pt_str)) in_data = geopandas.read_file(path_to_shp) self.ntw.snapobservations(in_data, pt_str, attribute=True) npts = self.ntw.pointpatterns['crimes'].npoints self.ntw.simulate_observations(npts)
def exportBaltimore(): # Separates out just the Baltimore data from the data regionsdf = gpd.read_file('dc-baltimore_maryland_admin.geojson') #baltimore shape in index 18 baltimoreBD = regionsdf.values[18,:] baltimoreBD[1] df = gpd.read_file('dc-baltimore_maryland_roads.geojson') df = df[df.geometry.within(baltimoreBD[1])] with open('baltimore.geojson','w') as f: f.write(df.to_json())
def setup(self, *args): world = read_file(datasets.get_path('naturalearth_lowres')) capitals = read_file(datasets.get_path('naturalearth_cities')) countries = world[['geometry', 'name']] countries = countries.to_crs('+init=epsg:3395')[ countries.name != "Antarctica"] capitals = capitals.to_crs('+init=epsg:3395') capitals['geometry'] = capitals.buffer(500000) self.countries = countries self.capitals = capitals
def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.df = read_file(nybb_filename) self.tempdir = tempfile.mkdtemp() self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file( os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson'))
def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file('examples/null_geom.geojson') self.line_paths = self.df3['Name']
def test_overlay_overlap(how): """ Overlay test with overlapping geometries in both dataframes. Test files are created with:: import geopandas from geopandas import GeoSeries, GeoDataFrame from shapely.geometry import Point, Polygon, LineString s1 = GeoSeries([Point(0, 0), Point(1.5, 0)]).buffer(1, resolution=2) s2 = GeoSeries([Point(1, 1), Point(2, 2)]).buffer(1, resolution=2) df1 = GeoDataFrame({'geometry': s1, 'col1':[1,2]}) df2 = GeoDataFrame({'geometry': s2, 'col2':[1, 2]}) ax = df1.plot(alpha=0.5) df2.plot(alpha=0.5, ax=ax, color='C1') df1.to_file('geopandas/geopandas/tests/data/df1_overlap.geojson', driver='GeoJSON') df2.to_file('geopandas/geopandas/tests/data/df2_overlap.geojson', driver='GeoJSON') and then overlay results are obtained from using QGIS 2.16 (Vector -> Geoprocessing Tools -> Intersection / Union / ...), saved to GeoJSON. """ df1 = read_file(os.path.join(DATA, 'overlap', 'df1_overlap.geojson')) df2 = read_file(os.path.join(DATA, 'overlap', 'df2_overlap.geojson')) result = overlay(df1, df2, how=how) if how == 'identity': raise pytest.skip() expected = read_file(os.path.join( DATA, 'overlap', 'df1_df2_overlap-{0}.geojson'.format(how))) if how == 'union': # the QGIS result has the last row duplicated, so removing this expected = expected.iloc[:-1] # TODO needed adaptations to result result = result.reset_index(drop=True) if how == 'union': result = result.sort_values(['col1', 'col2']).reset_index(drop=True) assert_geodataframe_equal(result, expected, check_column_type=False, check_less_precise=True)
def combineResults(input_file_list, outputFileName): # Read files in for file in input_file_list: if "2017" in file: data17 = gpd.read_file(file) elif "2013" in file: data13 = gpd.read_file(file) elif "2009" in file: data09 = gpd.read_file(file) # Drop duplicate geometries data13 = data13.drop(labels='geometry', axis=1) data17 = data17.drop(labels='geometry', axis=1) # Join datasets join09_13 = data09.merge(data13, how='inner', left_on ='YKR_ID', right_on='from_id') join09_13.drop(labels=['from_id', 'to_id'], inplace=True, axis=1) full_join = join09_13.merge(data17, how='inner', left_on='YKR_ID', right_on='from_id') # Rename columns full_join.columns = [u'Asuk09', u'YKR_ID', u'geometry', u'maxT09', u'meanT09', u'minT09', u'Asuk13', u'Car_D13', u'Car_T13', u'PT_D13', u'PT_T13', u'PT_ToT13', u'PT_D17', u'PT_T17', u'PT_ToT17', u'from_id', u'Asuk17', u'to_id'] # Choose and reorder join = full_join[[ u'from_id', u'to_id', u'maxT09', u'meanT09', u'minT09', u'Asuk09', u'PT_T13', u'PT_ToT13', u'PT_D13', u'Car_T13', u'Car_D13', u'Asuk13', u'PT_T17', u'PT_ToT17', u'PT_D17', u'Asuk17', u'geometry']] # Set -1 values to NaN join = join.replace(to_replace={'PT_T13': {-1: np.nan}}) # Drop NaNs join = join.dropna() # Calculate accessibility differences join['Dif09_13'] = None join['Dif09_17'] = None join['Dif13_17'] = None join['Dif09_13'] = join['meanT09'] - join['PT_T13'] join['Dif09_17'] = join['meanT09'] - join['PT_T17'] join['Dif13_17'] = join['PT_T13'] - join['PT_T17'] # Save output folder = os.path.dirname(input_file_list[0]) outfile = os.path.join(folder, outputFileName) join.to_file(outfile, driver="ESRI Shapefile") print outfile
def readShpFile(self, input_dir, fname): #need to make sure that cols are lower shp = gpd.read_file(input_dir+ fname) cols = shp.columns.values.tolist() colsLower = [col.lower() for col in cols] shp.rename(columns=dict(zip(cols,colsLower)), inplace=True) return shp
def calculateCarCO2emissions(self, src_file, time, car_co2_emission=171): # Read data into GeoDataFrame print("Reading: %s" % os.path.basename(src_file)) data = gpd.read_file(src_file) # Calculate CO2 distance (in meters) and total CO2 emissions (in grams per kilometer) from car usage print("Calculating the driven distance and CO2 emissions") data['distDriven'] = data['Pituus_Ajo'] + data['Pituus_P_E'] data['co2FromCar'] = (data['distDriven'] / 1000.0) * car_co2_emission # Select columns slct_cols = ['from_id', 'to_id', 'Pituus_TOT', 'distDriven', 'co2FromCar'] outdata = data[slct_cols] # Create output file outname = "%s_CO2_emissions.txt" % os.path.basename(src_file).split('.')[0] if time in ['08', '8', 'r']: outfp = os.path.join(self.car_r_co2_dir, outname) else: outfp = os.path.join(self.car_m_co2_dir, outname) # Save CO2 data to disk print("Saving the CO2 emissions to: %s" % outfp) outdata.to_csv(outfp, sep=';', index=False) # Return the output path return outfp
def _rasterize_subdomains( self ): ''' rasterize a subdomains shapefile to the extent and resolution of a template raster file. The two must be in the same reference system or there will be potential issues. returns: numpy.ndarray with the shape of the input raster and the shapefile polygons burned in with the values of the id_field of the shapefile gotchas: currently the only supported data type is uint8 and all float values will be coerced to integer for this purpose. Another issue is that if there is a value greater than 255, there could be some error-type issues. This is something that the user needs to know for the time-being and will be fixed in subsequent versions of rasterio. Then I can add the needed changes here. ''' import geopandas as gpd import numpy as np gdf = gpd.read_file( self.subdomains_fn ) id_groups = gdf.groupby( self.id_field ) # iterator of tuples (id, gdf slice) out_shape = self.rasterio_raster.height, self.rasterio_raster.width out_transform = self.rasterio_raster.affine arr_list = [ self._rasterize_id( df, value, out_shape, out_transform, background_value=self.background_value ) for value, df in id_groups ] self.sub_domains = arr_list
def clean_boundary_shapefile(shapefile_path): """ Cleans the boundary shapefile to that there is only one main polygon. :param shapefile_path: :return: """ wfg = gpd.read_file(shapefile_path) first_shape = wfg.iloc[0].geometry if hasattr(first_shape, 'geoms'): log.warning("MultiPolygon found in boundary. " "Picking largest area ...") # pick largest shape to be the watershed boundary # and assume the other ones are islands to be removed max_area = -9999.0 main_geom = None for geom in first_shape.geoms: if geom.area > max_area: main_geom = geom max_area = geom.area # remove self intersections if not main_geom.is_valid: log.warning("Invalid geometry found in boundary. " "Attempting to self clean ...") main_geom = main_geom.buffer(0) wfg.loc[0, 'geometry'] = main_geom out_cleaned_boundary_shapefile = \ os.path.splitext(shapefile_path)[0] +\ str(uuid.uuid4()) +\ '.shp' wfg.to_file(out_cleaned_boundary_shapefile) log.info("Cleaned boundary shapefile written to:" "{}".format(out_cleaned_boundary_shapefile)) return out_cleaned_boundary_shapefile return shapefile_path
def read(self, format=None, epsg=None): """ Read vector data from a file (JSON, Shapefile, etc) :param format: Format to return data in (default is GeoDataFrame) :param epsg: EPSG code to reproject data to :return: Data in requested format (GeoDataFrame, GeoJSON) """ if not format: format = self.default_output if self.ext not in formats.VECTOR: raise UnsupportedFormatException( "Only the following vector formats are supported: {}".format( ','.join(formats.VECTOR) ) ) if self.data is None: self.data = geopandas.read_file(self.uri) if self.filters: self.filter_data() out_data = self.data if epsg and self.get_epsg() != epsg: out_data = geopandas.GeoDataFrame.copy(out_data) out_data[out_data.geometry.name] = \ self.data.geometry.to_crs(epsg=epsg) out_data.crs = fiona.crs.from_epsg(epsg) if format == formats.JSON: return out_data.to_json() else: return out_data
def setUp(self): nybb_filename = tests.util.download_nybb() path = '/nybb_13a/nybb.shp' vfs = 'zip://' + nybb_filename self.df = read_file(path, vfs=vfs) with fiona.open(path, vfs=vfs) as f: self.crs = f.crs
def setUp(self): answers = gpd.read_file(DIRPATH + '/answers.geojson') tests = pd.read_json(DIRPATH + '/tests.json') hard_tests = pd.read_json(DIRPATH + '/degenerate.json') self.all = answers.merge(tests, on='names').merge(hard_tests, on='names') self.conn = Connection('DECENNIALSF12010') self.conn.set_mapservice('tigerWMS_Census2010')
def setup_method(self): nybb_filename = geopandas.datasets.get_path('nybb') self.polydf = read_file(nybb_filename) self.polydf = self.polydf[['geometry', 'BoroName', 'BoroCode']] self.polydf = self.polydf.rename(columns={'geometry': 'myshapes'}) self.polydf = self.polydf.set_geometry('myshapes') self.polydf['manhattan_bronx'] = 5 self.polydf.loc[3:4, 'manhattan_bronx'] = 6 # Merged geometry manhattan_bronx = self.polydf.loc[3:4, ] others = self.polydf.loc[0:2, ] collapsed = [others.geometry.unary_union, manhattan_bronx.geometry.unary_union] merged_shapes = GeoDataFrame( {'myshapes': collapsed}, geometry='myshapes', index=pd.Index([5, 6], name='manhattan_bronx')) # Different expected results self.first = merged_shapes.copy() self.first['BoroName'] = ['Staten Island', 'Manhattan'] self.first['BoroCode'] = [5, 1] self.mean = merged_shapes.copy() self.mean['BoroCode'] = [4, 1.5]
def setUp(self): N = 10 nybb_filename, nybb_zip_path = download_nybb() self.df = read_file(nybb_zip_path, vfs='zip://' + nybb_filename) with fiona.open(nybb_zip_path, vfs='zip://' + nybb_filename) as f: self.schema = f.schema self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file(os.path.join(PACKAGE_DIR, 'examples', 'null_geom.geojson')) self.line_paths = self.df3['Name']
def setUp(self): nybb_filename, nybb_zip_path = download_nybb() vfs = 'zip://' + nybb_filename self.df = read_file(nybb_zip_path, vfs=vfs) with fiona.open(nybb_zip_path, vfs=vfs) as f: self.crs = f.crs self.columns = list(f.meta["schema"]["properties"].keys())
def setUp(self): nybb_filename = tests.util.download_nybb() path = "/nybb_14a_av/nybb.shp" vfs = "zip://" + nybb_filename self.df = read_file(path, vfs=vfs) with fiona.open(path, vfs=vfs) as f: self.crs = f.crs
def assign_taxi_zones(df, lon_var, lat_var, locid_var): """Joins DataFrame with Taxi Zones shapefile. This function takes longitude values provided by `lon_var`, and latitude values provided by `lat_var` in DataFrame `df`, and performs a spatial join with the NYC taxi_zones shapefile. The shapefile is hard coded in, as this function makes a hard assumption of latitude and longitude coordinates. It also assumes latitude=0 and longitude=0 is not a datapoint that can exist in your dataset. Which is reasonable for a dataset of New York, but bad for a global dataset. Only rows where `df.lon_var`, `df.lat_var` are reasonably near New York, and `df.locid_var` is set to np.nan are updated. Parameters ---------- df : pandas.DataFrame or dask.DataFrame DataFrame containing latitudes, longitudes, and location_id columns. lon_var : string Name of column in `df` containing longitude values. Invalid values should be np.nan. lat_var : string Name of column in `df` containing latitude values. Invalid values should be np.nan locid_var : string Name of column in `df` containing taxi_zone location ids. Rows with valid, nonzero values are not overwritten. """ import geopandas from shapely.geometry import Point localdf = df[[lon_var, lat_var, locid_var]].copy() # localdf = localdf.reset_index() localdf[lon_var] = localdf[lon_var].fillna(value=0.) localdf[lat_var] = localdf[lat_var].fillna(value=0.) localdf['replace_locid'] = (localdf[locid_var].isnull() & (localdf[lon_var] != 0.) & (localdf[lat_var] != 0.)) if (np.any(localdf['replace_locid'])): shape_df = geopandas.read_file('../shapefiles/taxi_zones.shp') shape_df.drop(['OBJECTID', "Shape_Area", "Shape_Leng", "borough", "zone"], axis=1, inplace=True) shape_df = shape_df.to_crs({'init': 'epsg:4326'}) try: local_gdf = geopandas.GeoDataFrame( localdf, crs={'init': 'epsg:4326'}, geometry=[Point(xy) for xy in zip(localdf[lon_var], localdf[lat_var])]) local_gdf = geopandas.sjoin( local_gdf, shape_df, how='left', op='within') return local_gdf.LocationID.rename(locid_var) except ValueError as ve: print(ve) print(ve.stacktrace()) return df[locid_var] else: return df[locid_var]
def test_timedynamic_geo_json(): """ tests folium.plugins.TimeSliderChoropleth """ import geopandas as gpd assert 'naturalearth_lowres' in gpd.datasets.available datapath = gpd.datasets.get_path('naturalearth_lowres') gdf = gpd.read_file(datapath) n_periods = 3 dt_index = pd.date_range('2016-1-1', periods=n_periods, freq='M').strftime('%s') styledata = {} for country in gdf.index: pdf = pd.DataFrame( {'color': np.random.normal(size=n_periods), 'opacity': np.random.normal(size=n_periods)}, index=dt_index) styledata[country] = pdf.cumsum() max_color, min_color = 0, 0 for country, data in styledata.items(): max_color = max(max_color, data['color'].max()) min_color = min(max_color, data['color'].min()) cmap = linear.PuRd_09.scale(min_color, max_color) # Define function to normalize column into range [0,1] def norm(col): return (col - col.min())/(col.max()-col.min()) for country, data in styledata.items(): data['color'] = data['color'].apply(cmap) data['opacity'] = norm(data['opacity']) styledict = {str(country): data.to_dict(orient='index') for country, data in styledata.items()} m = folium.Map((0, 0), tiles='Stamen Watercolor', zoom_start=2) time_slider_choropleth = TimeSliderChoropleth( gdf.to_json(), styledict ) time_slider_choropleth.add_to(m) rendered = time_slider_choropleth._template.module.script(time_slider_choropleth) m._repr_html_() out = m._parent.render() assert '<script src="https://d3js.org/d3.v4.min.js"></script>' in out # We verify that data has been inserted correctly expected_timestamps = """var timestamps = ["1454198400", "1456704000", "1459382400"];""" # noqa assert expected_timestamps.split(';')[0].strip() == rendered.split(';')[0].strip() expected_styledict = json.dumps(styledict, sort_keys=True, indent=2) assert expected_styledict in rendered
def setup_method(self): N = 10 nybb_filename = geopandas.datasets.get_path('nybb') self.polydf = read_file(nybb_filename) self.crs = {'init': 'epsg:4326'} b = [int(x) for x in self.polydf.total_bounds] self.polydf2 = GeoDataFrame( [{'geometry': Point(x, y).buffer(10000), 'value1': x + y, 'value2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=self.crs) self.pointdf = GeoDataFrame( [{'geometry': Point(x, y), 'value1': x + y, 'value2': x - y} for x, y in zip(range(b[0], b[2], int((b[2]-b[0])/N)), range(b[1], b[3], int((b[3]-b[1])/N)))], crs=self.crs) # TODO this appears to be necessary; # why is the sindex not generated automatically? self.polydf2._generate_sindex() self.union_shape = (180, 7)
def __init__(self, img_rds, depth_rds, sand_shp, gdf_query=None, depth_range=None, surface_refraction=False, surface_reflectance=False): self.surf_reflectance = surface_reflectance self.surf_refraction = surface_refraction self.depth_range = depth_range if type(img_rds).__name__ == 'RasterDS': self.img_rds = img_rds else: self.img_rds = RasterDS(img_rds) if type(depth_rds).__name__ == 'RasterDS': self.depth_rds = depth_rds else: self.depth_rds = RasterDS(depth_rds) if type(sand_shp).__name__ == 'GeoDataFrame': self.gdf = sand_shp else: self.gdf = gpd.read_file(sand_shp) self.gdf_query = gdf_query # self.full_image_array = self.img_rds.band_array self._set_arrays()
def test_mapclassify_bin(): # data link_to_data = examples.get_path('columbus.shp') gdf = gpd.read_file(link_to_data) x = gdf['HOVAL'].values # quantiles mapclassify_bin(x, 'quantiles') mapclassify_bin(x, 'quantiles', k=3) # box_plot mapclassify_bin(x, 'box_plot') mapclassify_bin(x, 'box_plot', hinge=2) # headtail_breaks mapclassify_bin(x, 'headtail_breaks') # percentiles mapclassify_bin(x, 'percentiles') mapclassify_bin(x, 'percentiles', pct=[25,50,75,100]) # std_mean mapclassify_bin(x, 'std_mean') mapclassify_bin(x, 'std_mean', multiples=[-1,-0.5,0.5,1]) # maximum_breaks mapclassify_bin(x, 'maximum_breaks') mapclassify_bin(x, 'maximum_breaks', k=3, mindiff=0.1) # natural_breaks, max_p_classifier mapclassify_bin(x, 'natural_breaks') mapclassify_bin(x, 'max_p_classifier', k=3, initial=50) # user_defined mapclassify_bin(x, 'user_defined', bins=[20, max(x)])
def prepare_edge(edge_shapefile, building_shapefile): """Create edge graph with grouped building demands. """ # load buildings and sum by type and nearest edge ID # 1. read shapefile to DataFrame (with special geometry column) # 2. group DataFrame by columns 'nearest' (ID of nearest edge) and 'type' # (residential, commercial, industrial, other) # 3. sum by group and unstack, i.e. convert secondary index 'type' to columns buildings = geopandas.read_file(building_shapefile+'.shp') buildings = buildings.convert_objects(convert_numeric=True) building_type_mapping = { 'basin': 'other', 'chapel': 'other', 'church': 'other', 'farm_auxiliary': 'other', 'greenhouse': 'other', 'school': 'public', 'office': 'commercial', 'restaurant': 'commercial', 'yes': 'residential', 'house': 'residential'} buildings.replace(to_replace={'type': building_type_mapping}, inplace=True) buildings = buildings.to_crs(epsg=32632) buildings['AREA'] = buildings.area buildings_grouped = buildings.groupby(['nearest', 'type']) total_area = buildings_grouped.sum()['AREA'].unstack() # load edges (streets) and join with summed areas # 1. read shapefile to DataFrame (with geometry column) # 2. join DataFrame total_area on index (=ID) # 3. fill missing values with 0 edge = pdshp.read_shp(edge_shapefile) edge = edge.set_index('Edge') edge = edge.join(total_area) edge = edge.fillna(0) return edge
def union_and_filter(input_dir, output_dir, countries): # make dir to hold unioned and dissolved shapefiles rm_and_mkdir(output_dir) for country in countries: print country # specify io paths input_filename = country + '.shp' input_path = os.path.join(input_dir, input_filename) output_path = os.path.join(output_dir, input_filename) # load country shapefile gdf_country = gpd.read_file(input_path) gdf_country.rename(columns={'country': 'val', 'val': 'country'}, inplace=True) # filter out low pixel values thresh = 25 gdf_country = gdf_country[gdf_country['val'] >= thresh] # union resulting geometries, assign crs, write to temp file polys = gdf_country.geometry poly = polys.unary_union poly_country = [country, poly] gdf_poly_country = gpd.GeoDataFrame(poly_country).T.rename(columns={0: 'country', 1: 'geometry'}) gdf_poly_country.crs = {'init': 'epsg:4326', 'no_defs': True} try: gdf_poly_country.to_file(output_path) except: print 'No polygon values greater than thresh'
def test_vba_choropleth(): # data link_to_data = examples.get_path('columbus.shp') gdf = gpd.read_file(link_to_data) x = gdf['HOVAL'].values y = gdf['CRIME'].values # plot fig, _ = vba_choropleth(x, y, gdf) plt.close(fig) # plot with divergent and reverted alpha fig, _ = vba_choropleth(x, y, gdf, cmap='RdBu', divergent=True, revert_alpha=True) plt.close(fig) # plot with classified alpha and rgb fig, _ = vba_choropleth(x, y, gdf, cmap='RdBu', alpha_mapclassify=dict(classifier='quantiles'), rgb_mapclassify=dict(classifier='quantiles')) plt.close(fig) # plot classified with legend fig, _ = vba_choropleth(x, y, gdf, alpha_mapclassify=dict(classifier='std_mean'), rgb_mapclassify=dict(classifier='std_mean'), legend=True) plt.close(fig)
def spectral_unmixing_main(args, img_src, endmember_array, unmixing_method): # Find the number of regional clusters for the area of interest n_regional_clusters = return_nclusters(args) img_meta = img_src.meta # Read in polygons file polygons_file = os.path.join( args.base_dir, 'saved_rainfall_regions', 'clean_regions', '{}_rainfall_regions_nclusters_{}_clean.shp'.format( args.unmixing_region, n_regional_clusters)) region_polygons = gpd.read_file(polygons_file).to_crs(img_src.meta['crs']) # Reorder maps and endmembers endmember_array = np.transpose(np.array(endmember_array)) print('Cropping image and spectral unmixing, starting timer') t = time.time() for region in range(n_regional_clusters): # Crop image to the regional clusters cropped_img, cropped_transform = mask( img_src, [region_polygons['geometry'].iloc[region]], crop=True) evi_img = np.moveaxis(cropped_img, 0, -1) abundance_map = np.zeros( (evi_img.shape[0], evi_img.shape[1], 4)) * np.nan nonzero_indices = np.mean(evi_img, axis=-1) != 0 evi_img_nonzero = evi_img[nonzero_indices] # Set up an unmixing modeling instance if unmixing_method == 'ucls': amap = UCLS elif unmixing_method == 'fcls': amap = FCLS elif unmixing_method == 'nnls': amap = NNLS # Select and normalize endmembers regional_endmembers = endmember_array[region * 3:(region + 1) * 3] for i in range(2): regional_endmembers[i] = normalize(regional_endmembers[i]) print('Unmixing for region {}'.format(region)) abundance_map[nonzero_indices] = flattened_image_unmixing( amap, evi_img_nonzero, regional_endmembers) abundance_map = np.moveaxis(abundance_map, -1, 0).astype(np.float32) out_file_path = os.path.join( args.base_dir, 'abundance_maps', args.unmixing_region, 'regional_maps', '{}_abundancemap_modis_250m_{}_unmixingmethod_automatic_tEMs_' 'outphasetype_{}_region_{}.tif'.format( args.unmixing_region, args.unmixing_method, args.outphase_endmember_type, region)) if os.path.exists(out_file_path): os.remove(out_file_path) # Update metadata img_meta['count'] = 4 img_meta['dtype'] = 'float32' img_meta['nodata'] = 'nan' img_meta['transform'] = cropped_transform img_meta['height'] = abundance_map.shape[1] img_meta['width'] = abundance_map.shape[2] # Write out regional abundance map with rasterio.open(out_file_path, 'w+', **img_meta) as dest: dest.write(abundance_map) elapsed = (time.time() - t) print('Elapsed time for abundance map creation: {}s'.format(elapsed)) # Merge the abundance maps into a mosaic merge_regional_abundance_maps(args)
def add_columns_from_shapefile(self, shapefile_path, columns=None, id_column=None): df = geopandas.read_file(shapefile_path) self.add_data_from_df(df, columns, id_column)
import geopandas import pandas as pd #import shapely #from shapely.geometry import Point, LineString, MultiPolygon, Polygon import pyproj #import matplotlib import matplotlib.pyplot as plt import numpy as np #import os #os.chdir('Code/mapproj') import mapproj geod = pyproj.Geod(a=6371, f=0) world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) a = np.arctan(1 / 2) / np.pi * 180 actrlpts3 = np.array([[15 + 0, 15 + 36, 15 - 36], [-a, a, a]]) ctrlpoly3 = mapproj.geodesics(actrlpts3[0], actrlpts3[1], geod, includepts=True) a = 180 / np.pi * np.arctan(1 / np.sqrt(2)) actrlpts4 = np.array([[0, 90, 90, 0], [-a, -a, a, a]]) ctrlpoly4 = mapproj.geodesics(actrlpts4[0], actrlpts4[1], geod, includepts=True) antipodepoly3 = mapproj.transeach(mapproj.transform_antipode, ctrlpoly3) antipodepoly4 = mapproj.transeach(mapproj.transform_antipode, ctrlpoly4)
import geopandas import pandas as pd if __name__ == '__main__': file = '/home/elvis/map/analize/analizeCross/countXEnt_new.shp' od_group_file = '/home/elvis/map/analize/analizeCross/count_od_group.shp' geo_df = geopandas.read_file(file) pd_df = pd.DataFrame(geo_df) geo_df = geopandas.read_file(od_group_file) od_group_df = pd.DataFrame(geo_df) od_group_df.score = od_group_df.score * pd_df.ENTROPY od_group_df = od_group_df[od_group_df.score > 0] od_group_df = od_group_df.sort_values(by=['score'], ascending=False) od_group_df = od_group_df.reset_index(drop=True) od_group_df['rank'] = od_group_df.index path = '/home/elvis/map/analize/analizeCross/od_group*entropy_v4.shp' cross_scores = geopandas.GeoDataFrame(od_group_df, geometry='geometry') cross_scores.to_file(path)
username = "******" #please update with a correct user raw_data_location = f"Users/{username}/geospatial/workshop/data/raw/" dbutils.fs.mkdirs(f"dbfs:/{raw_data_location}/") dbutils.fs.cp("file:/tmp/neighbourhoods/nynta2020_21d/nynta2020.prj", f"dbfs:/{raw_data_location}/neighbourhoods.prj") dbutils.fs.cp("file:/tmp/neighbourhoods/nynta2020_21d/nynta2020.shx", f"dbfs:/{raw_data_location}/neighbourhoods.shx") dbutils.fs.cp("file:/tmp/neighbourhoods/nynta2020_21d/nynta2020.shp", f"dbfs:/{raw_data_location}/neighbourhoods.shp") # COMMAND ---------- # DBTITLE 1,Load Data to geopandas import geopandas as gpd neighborhoods_pd = gpd.read_file( f"/dbfs/{raw_data_location}/neighbourhoods.shp") display(neighborhoods_pd.head()) # COMMAND ---------- # MAGIC %md # MAGIC In order to prepare our polygon data we will be using two set representation via h3 decompositon. </br> # MAGIC The only difference to what we discussed in the notebook 01 is that we will be using WKB representation for the chips. </br> # MAGIC Binary representation of polygons will result in more optimal runtime handling of chips. </br> # MAGIC In addition we will be returning a single array of structs instead of separate collections for core and border chips. </br> # MAGIC This will simplify our join logic. # COMMAND ---------- # DBTITLE 1,H3 Two Set Representation import shapely
import geopandas as gpd import numpy as np import pandas as pd from scipy.spatial import cKDTree points = gpd.read_file( 'hotosm_gha_points_of_interest_points_shp/hotosm_gha_points_of_interest_points.shp' ) def ckdnearest(gdA, gdB, k): """Quickly fine the k nearest points close to another :param gdA: geopandas dataframe A :param gdB: geopandas dataframe B :param k: number of nearest neighbours to find :returns: joined geopandas dataframe with distance computed in the last column """ nA = np.array(list(zip(gdA.geometry.x, gdA.geometry.y))) nB = np.array(list(zip(gdB.geometry.x, gdB.geometry.y))) btree = cKDTree(nB) dist, idx = btree.query(nA, k) dd = [] # when idx is a 1d array try:
def get_data(config_file): ########## Inputs ########## # read in config file config = configparser.ConfigParser() config.read(config_file) # shapefile and unique ID info shapefile = config['shapefile']['fname'] geoid = config['shapefile']['geoid'] # get column names from votes and districtings to read cols = [] columns = config['columns'] for key in columns: cols.append(columns[key]) # optional demographic data if 'demographics' in config: for key in config['demographics']: cols.append(config['demographics'][key]) # make dual graph dual_graph = construct_graph_from_file(shapefile, geoid, cols) num_nodes = dual_graph.number_of_nodes() #list of vectors data_vectors = [] #list of vector indexed by nodes data_vectors_att = [] # streamline the process for key in columns: vector = np.zeros((num_nodes,1)) vector_att = nx.get_node_attributes(dual_graph, columns[key]) data_vectors.append(vector) data_vectors_att.append(vector_att) # get position data for drawing nodes at centroids df_vtd = gpd.read_file(shapefile) vtd_centroids = df_vtd.centroid vtd_x = vtd_centroids.x vtd_y = vtd_centroids.y inverse = {} sf = shp.Reader(shapefile) for i in range(len(sf.fields)): if sf.fields[i][0] == geoid: idx = i-1 break records = sf.records() for i in range(len(records)): inverse[records[i][idx]] = i # assign attributes in order of nodes to match adjacency matrix for i in range(len(data_vectors)): count = 0 for node in dual_graph.nodes(): data_vectors[i][count] = data_vectors_att[i][node] count += 1 pos = {} for node in dual_graph.nodes(): pos[node] = (vtd_x[inverse[node]], vtd_y[inverse[node]]) node_size = [(data_vectors[1][i] + data_vectors[2][i])/500 for i in range(dual_graph.number_of_nodes())] return dual_graph, data_vectors, pos, node_size
def add_columns_from_shapefile(self, shapefile, columns=None, id_column=None): df = geopandas.read_file(shapefile) return self.add_columns_from_df(df, columns, id_column)
def main(): config = load_config() hazard_cols = ['hazard_type', 'climate_scenario', 'year'] duration = 10 hazard_set = [{ 'hazard': 'fluvial flooding', 'name': 'Fluvial flooding' }, { 'hazard': 'pluvial flooding', 'name': 'Pluvial flooding' }] change_colors = [ '#1a9850', '#66bd63', '#a6d96a', '#d9ef8b', '#fee08b', '#fdae61', '#f46d43', '#d73027', '#969696' ] change_labels = [ '< -100', '-100 to -50', '-50 to -10', '-10 to 0', '0 to 10', '10 to 50', '50 to 100', ' > 100', 'No change/value' ] change_ranges = [(-1e10, -100), (-100, -50), (-50, -10), (-10, 0), (0.001, 10), (10, 50), (50, 100), (100, 1e10)] eael_set = [{ 'column': 'min_eael', 'title': 'Min EAEL', 'legend_label': "EAEL (million USD)", 'divisor': 1000000, 'significance': 0 }, { 'column': 'max_eael', 'title': 'Max EAEL', 'legend_label': "EAEL (million USD)", 'divisor': 1000000, 'significance': 0 }] data_path = config['paths']['data'] region_file_path = os.path.join(config['paths']['data'], 'network', 'rail_edges.shp') flow_file_path = os.path.join(config['paths']['output'], 'flow_mapping_combined', 'weighted_flows_rail_100_percent.csv') region_file = gpd.read_file(region_file_path, encoding='utf-8') flow_file = pd.read_csv(flow_file_path) region_file = pd.merge(region_file, flow_file, how='left', on=['edge_id']).fillna(0) region_file = region_file[region_file['max_total_tons'] > 0] del flow_file flow_file_path = os.path.join( config['paths']['output'], 'failure_results', 'minmax_combined_scenarios', 'single_edge_failures_minmax_rail_100_percent_disrupt.csv') flow_file = pd.read_csv(flow_file_path) flow_file_path = os.path.join( config['paths']['output'], 'network_stats', 'rail_hazard_intersections_risk_weights.csv') fail_sc = pd.read_csv(flow_file_path) fail_scenarios = pd.merge(fail_sc, flow_file, how='left', on=['edge_id']).fillna(0) del flow_file, fail_sc fail_scenarios['min_eael'] = duration * fail_scenarios[ 'risk_wt'] * fail_scenarios['min_econ_impact'] fail_scenarios['max_eael'] = duration * fail_scenarios[ 'risk_wt'] * fail_scenarios['max_econ_impact'] all_edge_fail_scenarios = fail_scenarios[ hazard_cols + ['edge_id', 'min_eael', 'max_eael']] all_edge_fail_scenarios = all_edge_fail_scenarios.groupby( hazard_cols + ['edge_id'])['min_eael', 'max_eael'].max().reset_index() # Climate change effects all_edge_fail_scenarios = all_edge_fail_scenarios.set_index( ['hazard_type', 'edge_id']) scenarios = list(set(all_edge_fail_scenarios.index.values.tolist())) change_tup = [] for sc in scenarios: eael = all_edge_fail_scenarios.loc[[sc], 'max_eael'].values.tolist() yrs = all_edge_fail_scenarios.loc[[sc], 'year'].values.tolist() cl = all_edge_fail_scenarios.loc[[sc], 'climate_scenario'].values.tolist() if 2016 not in yrs: for e in range(len(eael)): if eael[e] > 0: # change_tup += list(zip([sc[0]]*len(cl),[sc[1]]*len(cl),cl,yrs,[0]*len(cl),eael,[1e9]*len(cl))) change_tup += [(sc[0], sc[1], cl[e], yrs[e], 0, eael[e], 1e9)] elif len(yrs) > 1: vals = list(zip(cl, eael, yrs)) vals = sorted(vals, key=lambda pair: pair[-1]) change = 100.0 * (np.array([p for (c, p, y) in vals[1:]]) - vals[0][1]) / vals[0][1] cl = [c for (c, p, y) in vals[1:]] yrs = [y for (c, p, y) in vals[1:]] fut = [p for (c, p, y) in vals[1:]] change_tup += list( zip([sc[0]] * len(cl), [sc[1]] * len(cl), cl, yrs, [vals[0][1]] * len(cl), fut, change)) change_df = pd.DataFrame(change_tup, columns=[ 'hazard_type', 'edge_id', 'climate_scenario', 'year', 'current', 'future', 'change' ]).fillna('inf') change_df = change_df[change_df['change'] != 'inf'] change_df.to_csv(os.path.join(config['paths']['output'], 'network_stats', 'national_rail_eael_climate_change.csv'), index=False) # Change effects change_df = change_df.set_index(hazard_cols) scenarios = list(set(change_df.index.values.tolist())) for sc in scenarios: hazard_type = sc[0] climate_scenario = sc[1] year = sc[2] percentage = change_df.loc[[sc], 'change'].values.tolist() edges = change_df.loc[[sc], 'edge_id'].values.tolist() edges_df = pd.DataFrame(list(zip(edges, percentage)), columns=['edge_id', 'change']) edges_vals = pd.merge(region_file, edges_df, how='left', on=['edge_id']).fillna(0) del percentage, edges, edges_df proj_lat_lon = ccrs.PlateCarree() ax = get_axes() plot_basemap(ax, data_path) scale_bar(ax, location=(0.8, 0.05)) plot_basemap_labels(ax, data_path, include_regions=True) name = [c['name'] for c in hazard_set if c['hazard'] == hazard_type][0] for record in edges_vals.itertuples(): geom = record.geometry region_val = record.change if region_val: cl = [ c for c in range(len((change_ranges))) if region_val >= change_ranges[c][0] and region_val < change_ranges[c][1] ] if cl: c = cl[0] ax.add_geometries([geom], crs=proj_lat_lon, linewidth=2.0, edgecolor=change_colors[c], facecolor='none', zorder=8) # ax.add_geometries([geom.buffer(0.1)],crs=proj_lat_lon,linewidth=0,facecolor=change_colors[c],edgecolor='none',zorder=8) else: ax.add_geometries([geom], crs=proj_lat_lon, linewidth=1.5, edgecolor=change_colors[-1], facecolor='none', zorder=7) # ax.add_geometries([geom.buffer(0.1)], crs=proj_lat_lon, linewidth=0,facecolor=change_colors[-1],edgecolor='none',zorder=7) # Legend legend_handles = [] for c in range(len(change_colors)): legend_handles.append( mpatches.Patch(color=change_colors[c], label=change_labels[c])) ax.legend(handles=legend_handles, title='Percentage change in EAEL', loc=(0.55, 0.2), fancybox=True, framealpha=1.0) if climate_scenario == 'none': climate_scenario = 'current' else: climate_scenario = climate_scenario.upper() title = 'Percentage change in EAEL for {} {} {}'.format( name, climate_scenario.replace('_', ' ').title(), year) print(" * Plotting {}".format(title)) plt.title(title, fontsize=10) output_file = os.path.join( config['paths']['figures'], 'national-rail-{}-{}-{}-risks-change-percentage.png'.format( name, climate_scenario.replace('-', ' ').title(), year)) save_fig(output_file) plt.close() # Absolute effects all_edge_fail_scenarios = all_edge_fail_scenarios.reset_index() all_edge_fail_scenarios = all_edge_fail_scenarios.set_index(hazard_cols) scenarios = list(set(all_edge_fail_scenarios.index.values.tolist())) for sc in scenarios: hazard_type = sc[0] climate_scenario = sc[1] if climate_scenario == 'none': climate_scenario = 'current' else: climate_scenario = climate_scenario.upper() year = sc[2] min_eael = all_edge_fail_scenarios.loc[[sc], 'min_eael'].values.tolist() max_eael = all_edge_fail_scenarios.loc[[sc], 'max_eael'].values.tolist() edges = all_edge_fail_scenarios.loc[[sc], 'edge_id'].values.tolist() edges_df = pd.DataFrame(list(zip(edges, min_eael, max_eael)), columns=['edge_id', 'min_eael', 'max_eael']) edges_vals = pd.merge(region_file, edges_df, how='left', on=['edge_id']).fillna(0) del edges_df for c in range(len(eael_set)): proj_lat_lon = ccrs.PlateCarree() ax = get_axes() plot_basemap(ax, data_path) scale_bar(ax, location=(0.8, 0.05)) plot_basemap_labels(ax, data_path, include_regions=True) # generate weight bins column = eael_set[c]['column'] weights = [ record[column] for iter_, record in edges_vals.iterrows() ] max_weight = max(weights) width_by_range = generate_weight_bins(weights, width_step=0.04, n_steps=5) rail_geoms_by_category = {'1': [], '2': []} for iter_, record in edges_vals.iterrows(): geom = record.geometry val = record[column] if val == 0: cat = '2' else: cat = '1' buffered_geom = None for (nmin, nmax), width in width_by_range.items(): if nmin <= val and val < nmax: buffered_geom = geom.buffer(width) if buffered_geom is not None: rail_geoms_by_category[cat].append(buffered_geom) else: print("Feature was outside range to plot", iter_) styles = OrderedDict([ ('1', Style(color='#006d2c', zindex=9, label='Hazard failure effect')), # green ('2', Style(color='#969696', zindex=7, label='No hazard exposure/effect')) ]) for cat, geoms in rail_geoms_by_category.items(): cat_style = styles[cat] ax.add_geometries(geoms, crs=proj_lat_lon, linewidth=0, facecolor=cat_style.color, edgecolor='none', zorder=cat_style.zindex) name = [ h['name'] for h in hazard_set if h['hazard'] == hazard_type ][0] x_l = -62.4 x_r = x_l + 0.4 base_y = -42.1 y_step = 0.8 y_text_nudge = 0.2 x_text_nudge = 0.2 ax.text(x_l, base_y + y_step - y_text_nudge, eael_set[c]['legend_label'], horizontalalignment='left', transform=proj_lat_lon, size=10) divisor = eael_set[c]['divisor'] significance_ndigits = eael_set[c]['significance'] max_sig = [] for (i, ((nmin, nmax), line_style)) in enumerate(width_by_range.items()): if round(nmin / divisor, significance_ndigits) < round( nmax / divisor, significance_ndigits): max_sig.append(significance_ndigits) elif round(nmin / divisor, significance_ndigits + 1) < round( nmax / divisor, significance_ndigits + 1): max_sig.append(significance_ndigits + 1) elif round(nmin / divisor, significance_ndigits + 2) < round( nmax / divisor, significance_ndigits + 2): max_sig.append(significance_ndigits + 2) else: max_sig.append(significance_ndigits + 3) significance_ndigits = max(max_sig) for (i, ((nmin, nmax), width)) in enumerate(width_by_range.items()): y = base_y - (i * y_step) line = LineString([(x_l, y), (x_r, y)]).buffer(width) ax.add_geometries([line], crs=proj_lat_lon, linewidth=0, edgecolor='#000000', facecolor='#000000', zorder=2) if nmin == max_weight: value_template = '>{:.' + str(significance_ndigits) + 'f}' label = value_template.format( round(max_weight / divisor, significance_ndigits)) else: value_template = '{:.' + str(significance_ndigits) + \ 'f}-{:.' + str(significance_ndigits) + 'f}' label = value_template.format( round(nmin / divisor, significance_ndigits), round(nmax / divisor, significance_ndigits)) ax.text(x_r + x_text_nudge, y - y_text_nudge, label, horizontalalignment='left', transform=proj_lat_lon, size=10) if climate_scenario == 'none': climate_scenario = 'Current' climate_scenario = climate_scenario.replace('_', ' ') title = 'Railways ({}) {} {} {}'.format(eael_set[c]['title'], name, climate_scenario.title(), year) print('* Plotting ', title) plt.title(title, fontsize=12) legend_from_style_spec(ax, styles, loc='lower left') # output output_file = os.path.join( config['paths']['figures'], 'national-rail-{}-{}-{}-{}.png'.format( name.replace(' ', ''), climate_scenario.replace('.', ''), year, eael_set[c]['column'])) save_fig(output_file) plt.close()
st.header("Select Workflow Parameters") # Define the aoi and input parameters of the workflow. col1_params, col2_params = st.beta_columns(2) with col1_params: aoi_location = st.selectbox('Which area of interest?', ["Berlin", "Washington"]) aoi = up42.get_example_aoi(location=aoi_location, as_dataframe=True) # expander_aoi = st.beta_expander("Show aoi feature") # expander_aoi.json(aoi) with col1_params: uploaded_file = st.file_uploader("Or upload a geojson file:", type=["geojson"]) if uploaded_file is not None: aoi = gpd.read_file(uploaded_file, driver="GeoJSON") st.success("Using uploaded geojson as aoi!") with col1_params: st.text("") start_date = st.date_input("Start date", parse("2019-01-01")) end_date = st.date_input("End date", parse("2020-01-01")) with col1_params: limit = st.number_input(label='limit', min_value=1, max_value=10, value=1, step=1) with col1_params:
def test_Absolute_Centralization(self): s_map = gpd.read_file(libpysal.examples.get_path("sacramentot2.shp")) df = s_map[['geometry', 'HISP_', 'TOT_POP']] index = Absolute_Centralization(df, 'HISP_', 'TOT_POP') np.testing.assert_almost_equal(index.statistic, 0.6891422368736286)
def shapefile_to_gpd_df(shp_fp, bbox=None): """ read a shapefile filepath string to a geodataframe """ gpd_df = gpd.read_file(shp_fp, bbox=bbox) return gpd_df
def main(): config = load_config() data_path = config['paths']['data'] mode_file_path = os.path.join(config['paths']['data'], 'network', 'road_edges.shp') flow_file_path = os.path.join(config['paths']['output'], 'flow_mapping_combined', 'weighted_flows_road_100_percent.csv') mode_file = gpd.read_file(mode_file_path, encoding='utf-8') flow_file = pd.read_csv(flow_file_path) mode_file = pd.merge(mode_file, flow_file, how='left', on=['edge_id']).fillna(0) mode_file = mode_file[(mode_file['road_type'] == 'national') | (mode_file['road_type'] == 'province') | (mode_file['road_type'] == 'rural')] plot_sets = [ { 'file_tag': 'tmda', 'legend_label': "AADT ('000 vehicles/day)", 'divisor': 1000, 'columns': ['tmda_count'], 'title_cols': ['Vehicle Count'], 'significance': 0 }, { 'file_tag': 'commodities', 'legend_label': "AADF ('000 tons/day)", 'divisor': 1000, 'columns': [ 'max_{}'.format(x) for x in [ 'total_tons', 'AGRICULTURA, GANADERÍA, CAZA Y SILVICULTURA', 'Carnes', 'Combustibles', 'EXPLOTACIÓN DE MINAS Y CANTERAS', 'Granos', 'INDUSTRIA MANUFACTURERA', 'Industrializados', 'Mineria', 'PESCA', 'Regionales', 'Semiterminados' ] ], 'title_cols': [ 'Total tonnage', 'AGRICULTURA, GANADERÍA, CAZA Y SILVICULTURA', 'Carnes', 'Combustibles', 'EXPLOTACIÓN DE MINAS Y CANTERAS', 'Granos', 'INDUSTRIA MANUFACTURERA', 'Industrializados', 'Mineria', 'PESCA', 'Regionales', 'Semiterminados' ], 'significance': 0 }, ] for plot_set in plot_sets: for c in range(len(plot_set['columns'])): # basemap proj_lat_lon = ccrs.PlateCarree() ax = get_axes() plot_basemap(ax, data_path) scale_bar(ax, location=(0.8, 0.05)) plot_basemap_labels(ax, data_path, include_regions=False) # generate weight bins if plot_set['columns'][c] == 'tmda': column = plot_set['columns'][c] weights = [ int(str(record[column])) for iter_, record in mode_file.iterrows() if str(record[column]).isdigit() is True and int(str(record[column])) > 0 ] max_weight = max(weights) width_by_range = generate_weight_bins(weights, n_steps=7, width_step=0.02) # width_by_range = generate_weight_bins(weights, n_steps=9, width_step=0.01, interpolation='log') else: column = 'max_total_tons' weights = [ record[column] for iter_, record in mode_file.iterrows() ] max_weight = max(weights) width_by_range = generate_weight_bins(weights, n_steps=7, width_step=0.02) road_geoms_by_category = { 'national': [], 'province': [], 'rural': [], } column = plot_set['columns'][c] for iter_, record in mode_file.iterrows(): if column == 'tmda': if str(record[column]).isdigit() is False: val = 0 else: val = int(str(record[column])) else: val = record[column] if val > 0: cat = str(record['road_type']).lower().strip() if cat not in road_geoms_by_category: raise Exception geom = record.geometry buffered_geom = None for (nmin, nmax), width in width_by_range.items(): if nmin <= val and val < nmax: buffered_geom = geom.buffer(width) if buffered_geom is not None: road_geoms_by_category[cat].append(buffered_geom) else: print("Feature was outside range to plot", iter_) styles = OrderedDict([ ('national', Style(color='#e41a1c', zindex=9, label='National')), # red ('province', Style(color='#377eb8', zindex=8, label='Provincial')), # orange ('rural', Style(color='#4daf4a', zindex=7, label='Rural')), # blue ]) for cat, geoms in road_geoms_by_category.items(): cat_style = styles[cat] ax.add_geometries(geoms, crs=proj_lat_lon, linewidth=0, facecolor=cat_style.color, edgecolor='none', zorder=cat_style.zindex) x_l = -62.4 x_r = x_l + 0.4 base_y = -42.1 y_step = 0.8 y_text_nudge = 0.2 x_text_nudge = 0.2 ax.text(x_l, base_y + y_step - y_text_nudge, plot_set['legend_label'], horizontalalignment='left', transform=proj_lat_lon, size=10) divisor = plot_set['divisor'] significance_ndigits = plot_set['significance'] max_sig = [] for (i, ((nmin, nmax), line_style)) in enumerate(width_by_range.items()): if round(nmin / divisor, significance_ndigits) < round( nmax / divisor, significance_ndigits): max_sig.append(significance_ndigits) elif round(nmin / divisor, significance_ndigits + 1) < round( nmax / divisor, significance_ndigits + 1): max_sig.append(significance_ndigits + 1) elif round(nmin / divisor, significance_ndigits + 2) < round( nmax / divisor, significance_ndigits + 2): max_sig.append(significance_ndigits + 2) else: max_sig.append(significance_ndigits + 3) significance_ndigits = max(max_sig) for (i, ((nmin, nmax), width)) in enumerate(width_by_range.items()): y = base_y - (i * y_step) line = LineString([(x_l, y), (x_r, y)]).buffer(width) ax.add_geometries([line], crs=proj_lat_lon, linewidth=0, edgecolor='#000000', facecolor='#000000', zorder=2) if nmin == max_weight: value_template = '>{:.' + str(significance_ndigits) + 'f}' label = value_template.format( round(max_weight / divisor, significance_ndigits)) else: value_template = '{:.' + str(significance_ndigits) + \ 'f}-{:.' + str(significance_ndigits) + 'f}' label = value_template.format( round(nmin / divisor, significance_ndigits), round(nmax / divisor, significance_ndigits)) ax.text(x_r + x_text_nudge, y - y_text_nudge, label, horizontalalignment='left', transform=proj_lat_lon, size=10) plt.title('Max AADF - {}'.format(plot_set['title_cols'][c]), fontsize=10) legend_from_style_spec(ax, styles) output_file = os.path.join( config['paths']['figures'], 'road_flow-map-{}-{}-max-scale.png'.format( plot_set['file_tag'], column)) save_fig(output_file) plt.close()
def return_endmembers(args, src): # This is the top-level function for calling all the helper functions to return the endmembers save_file_endmembers = os.path.join( args.base_dir, 'saved_endmembers', args.unmixing_region, 'extracted_endmembers_{}_outphasetype_{}_nclusters_{}_nsamples_{}.csv'. format(args.unmixing_region, args.outphase_endmember_type, args.num_clusters, args.num_samples)) if args.calculate_new_endmembers: # Calculate new endmembers print('Calculating new endmembers') n_regional_clusters = return_nclusters(args) rainfall_ts_file = os.path.join( args.base_dir, 'saved_rainfall_regions', 'cluster_center_rainfall_ts_csvs', '{}_rainfall_regions_nclusters_{}_normalized_monthly_ts.csv'. format(args.unmixing_region, n_regional_clusters)) monthly_rainfall_ts = np.array( pd.read_csv(rainfall_ts_file, index_col=0)) print('Interpolate rainfall timeseries') interpolated_rainfall_ts = interpolate_rainfall( args, monthly_rainfall_ts) print('Read regional polygons') polygons_file = os.path.join( args.base_dir, 'saved_rainfall_regions', 'clean_regions', '{}_rainfall_regions_nclusters_{}_clean.shp'.format( args.unmixing_region, n_regional_clusters)) region_polygons = gpd.read_file(polygons_file).to_crs(src.meta['crs']) all_endmembers_df = pd.DataFrame( index=range(len(interpolated_rainfall_ts[0]))) for region_index in range(n_regional_clusters): print('Calculating endmembers for {}, Region {}'.format( args.unmixing_region, region_index)) masked_evi_img, img_transform = mask( src, [region_polygons['geometry'].iloc[region_index]], nodata=0) print('PCA Transform') principalComponents, pca, evi_img_flattened = pca_transform( args, masked_evi_img) print('Clustering') cluster_centers, cluster_predicts = clustering( args, principalComponents) print('Finding Cluster Timeseries') cluster_timeseries = calculate_cluster_timeseries( cluster_predicts, evi_img_flattened) print('Extract Endmembers') endmember_df = find_endmembers( args, cluster_timeseries, interpolated_rainfall_ts[region_index], region_index) all_endmembers_df = pd.concat([all_endmembers_df, endmember_df], axis=1) # Save extracted endmembers all_endmembers_df.to_csv(save_file_endmembers) endmember_array = np.array( pd.read_csv(save_file_endmembers, index_col=0, header=0)) # Return endmember array return endmember_array
Roads ----- The :meth:`pygmt.Figure.plot` method allows us to plot geographical data such as lines which are stored in a :class:`geopandas.GeoDataFrame` object. Use :func:`geopandas.read_file` to load data from any supported OGR format such as a shapefile (.shp), GeoJSON (.geojson), geopackage (.gpkg), etc. Then, pass the :class:`geopandas.GeoDataFrame` as an argument to the ``data`` parameter in :meth:`pygmt.Figure.plot`, and style the geometry using the ``pen`` parameter. """ import geopandas as gpd import pygmt # Read shapefile data using geopandas gdf = gpd.read_file( "http://www2.census.gov/geo/tiger/TIGER2015/PRISECROADS/tl_2015_15_prisecroads.zip" ) # The dataset contains different road types listed in the RTTYP column, # here we select the following ones to plot: roads_common = gdf[gdf.RTTYP == "M"] # Common name roads roads_state = gdf[gdf.RTTYP == "S"] # State recognized roads roads_interstate = gdf[gdf.RTTYP == "I"] # Interstate roads fig = pygmt.Figure() # Define target region around O'ahu (Hawai'i) region = [-158.3, -157.6, 21.2, 21.75] # xmin, xmax, ymin, ymax title = r"Main roads of O\047ahu (Hawai\047i)" # \047 is octal code for ' fig.basemap(region=region, projection="M12c", frame=["af", f'WSne+t"{title}"']) fig.coast(land="gray", water="dodgerblue4", shorelines="1p,black")
from __future__ import division import geopandas as gpd from shapely.geometry import Polygon from utility import printHeader ########################### # Loading the data and print out the CRS and features ########################### printHeader("Loading the data and print out the CRS and features") # Load and join the spatial data gdf = gpd.read_file("spatialref/taxi_zones.shp") # Print out the coordinate reference system print "Coordinate system: %s\n" % gdf.crs["init"] # Print out the features of the spatial data print "Features:" for feature in gdf.columns: print "* %s" % feature ########################### # Check whether we should use the OBJECT ID or the LocationID as our index ########################### printHeader( "Check whether we should use the OBJECT ID or the LocationID as our index") print "The following entries exist in the dataframe where LocationID is not the same as OBJECTID" print gdf[(gdf["OBJECTID"] != gdf["LocationID"])] # Index by OBJECTID
df = pd.read_csv( "https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_time_series/time_series_covid19_confirmed_global.csv" ) listaPaises = [ 'Chile', 'Argentina', 'Brazil', 'Uruguay', 'Bolivia', 'Paraguay', 'Peru', 'Ecuador', 'Colombia', 'Venezuela', 'Guyana', 'Suriname' ] df = df[df['Country/Region'].isin(listaPaises)] df.drop(['Province/State', 'Country/Region', 'Lat', 'Long'], axis=1, inplace=True) world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres')) mapa = world[world['name'].isin(listaPaises)] mapa = mapa.sort_values(["name"], ascending=(True)) mapa.drop(['continent', 'iso_a3', 'gdp_md_est'], axis=1, inplace=True) pa = mapa['name'].tolist() pob = mapa['pop_est'].tolist() lista = df.columns cols = lista.size - 1 i = 4 l = []
import pandas as pnd import numpy as np import sys import traceback import geopandas from shapely.geometry import Point fichier_dist = "C:\\DEV\\GIS\\rwanda\\excel_dist.txt" fichier_provinces = "C:\\DEV\\GIS\\rwanda\\rwa_provinces_osm.gpkg" output_file = "C:\\DEV\\GIS\\rwanda\\excel_dist_provinces.txt" prov_shp = geopandas.read_file(fichier_provinces) prov_shp.set_crs(epsg=4326, inplace=True) df_dist = pnd.read_csv(fichier_dist, sep='\t', encoding='ISO-8859–1') df_dist = df_dist.fillna('') print(df_dist) def add_prov(): global df_dist global prov_shp df_dist["province"] = "" df_dist['Latitude'] = df_dist['Latitude'].astype(str) df_dist['Longitude'] = df_dist['Longitude'].astype(str) for index, row_dist in df_dist.iterrows(): if len(row_dist["Latitude"]) > 0 and len(row_dist["Longitude"]) > 0: nom_jointure = row_dist["NOM_JOINTURE"].lower() collector = row_dist["COLLECTOR"].replace("&", "\&") coll_num = row_dist["COLL_NUM"].replace("&", "\&") lat = row_dist["Latitude"].replace(",", ".") long = row_dist["Longitude"].replace(",", ".")
# use ftp client to download all zip files from https://www2.census.gov/geo/tiger/TIGER2014/TRACT/ # unzip all to "*/" in tracts_path import os import geopandas as gpd import time start_time = time.time() tracts_path = '2014-tracts-by-state' gdf = gpd.GeoDataFrame() for folder in os.listdir(tracts_path): print(folder) tmp = gpd.read_file('{}/{}'.format(tracts_path, folder)) gdf = gdf.append(tmp) gdf = gdf.head() original_crs = tmp.crs gdf.crs = original_crs gdf = gdf.to_crs({'init': 'epsg:4326'}) gdf.to_file('us_tracts_2014') print('created shapefile with {} rows'.format(len(gdf))) print('finished in {:.1f} seconds'.format(time.time() - start_time))
def plot_density( data, *, year, group, kernel_function, cell_size, crs=None, bandwidth, show_title, ): if crs is None: crs = {'init': 'epsg:3067'} pop = get_xy(data) pad = bandwidth * 2 minx, miny, maxx, maxy = pop['geometry'].total_bounds minx -= pad miny -= pad maxx += pad maxy += pad w, h = maxx - minx, maxy - miny fig = figure( title=f"Density of {group.capitalize()} population in Vyborg in {year}", x_range=(minx, maxx), y_range=(miny, maxy), ) fig.title.visible = show_title fig.xaxis.major_tick_line_color = None fig.xaxis.minor_tick_line_color = None fig.yaxis.major_tick_line_color = None fig.yaxis.minor_tick_line_color = None fig.xaxis.major_label_text_font_size = '0pt' fig.yaxis.major_label_text_font_size = '0pt' fig.xgrid.visible = False fig.ygrid.visible = False water = gpd.read_file('water_clip.shp') water.crs = {'init': 'epsg:4326'} water.geometry = water.geometry.to_crs(crs) water = get_xy(water) water_src = GeoJSONDataSource(geojson=water.to_json()) density = kernel_density_surface( data, group=group, bandwidth=bandwidth, cell_size=cell_size, kernel_function=kernel_function, ) fig.image( [density], minx, miny, w, h, palette=grey(10)[::-1], ) fig.patches( xs='x', ys='y', source=water_src, fill_color='#59d0ff', fill_alpha=0.8, line_color=None, line_width=0, ) return fig
def run_cmip(): """ """ # Initialize OGGM and set up the default run parameters vascaling.initialize(logging_level='DEBUG') rgi_version = '62' cfg.PARAMS['border'] = 80 # CLUSTER paths wdir = os.environ.get('WORKDIR', '') utils.mkdir(wdir) cfg.PATHS['working_dir'] = wdir outdir = os.environ.get('OUTDIR', '') utils.mkdir(outdir) # define the baseline climate CRU or HISTALP cfg.PARAMS['baseline_climate'] = 'CRU' # set the mb hyper parameters accordingly cfg.PARAMS['prcp_scaling_factor'] = 3 cfg.PARAMS['temp_melt'] = 0 cfg.PARAMS['temp_all_solid'] = 4 cfg.PARAMS['prcp_default_gradient'] = 4e-4 cfg.PARAMS['run_mb_calibration'] = False # set minimum ice thickness to include in glacier length computation # this reduces weird spikes in length records cfg.PARAMS['min_ice_thick_for_length'] = 0.1 # the bias is defined to be zero during the calibration process, # which is why we don't use it here to reproduce the results cfg.PARAMS['use_bias_for_run'] = True # read RGI entry for the glaciers as DataFrame # containing the outline area as shapefile # RGI glaciers rgi_reg = os.environ.get('RGI_REG', '') if rgi_reg not in ['{:02d}'.format(r) for r in range(1, 20)]: raise RuntimeError('Need an RGI Region') rgi_ids = gpd.read_file( utils.get_rgi_region_file(rgi_reg, version=rgi_version)) # For greenland we omit connectivity level 2 if rgi_reg == '05': rgi_ids = rgi_ids.loc[rgi_ids['Connect'] != 2] # get and set path to intersect shapefile intersects_db = utils.get_rgi_intersects_region_file(region=rgi_reg) cfg.set_intersects_db(intersects_db) # operational run, all glaciers should run cfg.PARAMS['continue_on_error'] = True # Module logger log = logging.getLogger(__name__) log.workflow('Starting run for RGI reg {}'.format(rgi_reg)) # Go - get the pre-processed glacier directories base_url = 'https://cluster.klima.uni-bremen.de/' \ '~moberrauch/prepro_vas_paper/' gdirs = workflow.init_glacier_directories(rgi_ids, from_prepro_level=3, prepro_base_url=base_url, prepro_rgi_version=rgi_version) # read gcm list gcms = pd.read_csv('/home/www/oggm/cmip6/all_gcm_list.csv', index_col=0) # iterate over all specified GCMs for gcm in sys.argv[1:]: # iterate over all SSPs (Shared Socioeconomic Pathways) df1 = gcms.loc[gcms.gcm == gcm] for ssp in df1.ssp.unique(): df2 = df1.loc[df1.ssp == ssp] assert len(df2) == 2 # get temperature projections ft = df2.loc[df2['var'] == 'tas'].iloc[0] # get precipitation projections fp = df2.loc[df2['var'] == 'pr'].iloc[0].path rid = ft.fname.replace('_r1i1p1f1_tas.nc', '') ft = ft.path log.workflow('Starting run for {}'.format(rid)) workflow.execute_entity_task(gcm_climate.process_cmip_data, gdirs, # recognize the climate file for later filesuffix='_' + rid, # temperature projections fpath_temp=ft, # precip projections fpath_precip=fp, year_range=('1981', '2020')) workflow.execute_entity_task(vascaling.run_from_climate_data, gdirs, # use gcm_data, not climate_historical climate_filename='gcm_data', # use a different scenario climate_input_filesuffix='_' + rid, # this is important! Start from 2019 init_model_filesuffix='_historical', # recognize the run for later output_filesuffix=rid, return_value=False) gcm_dir = os.path.join(outdir, 'RGI' + rgi_reg, gcm) utils.mkdir(gcm_dir) utils.compile_run_output(gdirs, input_filesuffix=rid, path=os.path.join(gcm_dir, rid + '.nc')) log.workflow('OGGM Done')
import geopandas as gp import contextily as cx import matplotlib.pyplot as plt from shapely import geometry data = gp.read_file("dataset/milano_municipi/Municipi.shx").to_crs(epsg=3857) incidenti = gp.read_file( "dataset/incidenti/inc_strad_milano_2016.geojson").to_crs(epsg=3857) incidenti_per_municipio = {} for m in data['MUNICIPIO']: incidenti_per_municipio[m] = 0 for m, poly in zip(data['MUNICIPIO'], data['geometry']): poly = geometry.Polygon(poly) for point in incidenti['geometry']: point = geometry.Point(point) if poly.contains(point): incidenti_per_municipio[m] += 1 data.index = data['MUNICIPIO'] inc = gp.GeoSeries(incidenti_per_municipio).sort_index() data['Incidenti'] = inc layer_m = data.plot(column='Incidenti', cmap='OrRd', alpha=0.5, figsize=(9, 7)) cx.add_basemap(ax=layer_m) plt.axis('off') plt.show()
for val in count_dict.values(): if val < args.samples_per_label: return False return True # Local testing constraint # shp_files = shp_files[:2] for shpfile in shp_files: tries = 0 shp_id = shpfile.split("_")[-1][:-4].upper() shape_data = gpd.read_file(SHAPEFILE_DIR + shpfile) # reset the sample count dict per image sample_count = { "L": 0, "W": 0, "I": 0, } # read in associated GeoTIFF file tiff_file = [g for g in tiff_files if shp_id in g] print(tiff_file[0]) if len(tiff_file): src = rasterio.open(TIFF_DIR + tiff_file[0])
def read_lines_dams(gdrive): # Read in data t0 = datetime.datetime.now() ## NABD nabd_dams = gp.read_file(gdrive + "nabd/nabd_fish_barriers_2012.shp", usecols=[ 'COMID', 'NIDID', 'Norm_stor', 'Max_stor', 'Year_compl', 'Purposes', 'geometry' ]) #read in NABD from Drive nabd_dams = nabd_dams.drop_duplicates( subset='NIDID', keep="first") #drop everything after first duplicate nabd_dams["DamID"] = range(len(nabd_dams.COMID)) #add DamID nabd_dams = pd.DataFrame(nabd_dams) nabd_dams['Grand_flag'] = np.zeros(len(nabd_dams)) #add flag column ## GRanD grand = pd.read_csv(gdrive + "other_dam_datasets/Reservoir_Attributes.csv", usecols=['GRAND_ID', 'NABD_ID']) #read in NABD from Drive #Filter out dams without NABD IDs grand['NABD_ID'] = grand['NABD_ID'].fillna(0) grand = grand[grand['NABD_ID'] != 0] #Merge NABD and GRanD nabd = pd.merge(nabd_dams, grand, left_on='NIDID', right_on='NABD_ID', how='left') nabd['GRAND_ID'] = nabd['GRAND_ID'].fillna(0) nabd.loc[nabd.GRAND_ID != 0, 'Grand_flag'] = 1 #if a GRanD ID exists, make flag =1 #Merge NABD and GRanD nabd = pd.merge(nabd_dams, grand, left_on='NIDID', right_on='NABD_ID', how='left') nabd['GRAND_ID'] = nabd['GRAND_ID'].fillna(0) nabd.loc[nabd.GRAND_ID != 0, 'Grand_flag'] = 1 #if a GRanD ID exists, make flag =1 ## NHD flowlines = pd.read_csv(gdrive + "nhd/NHDFlowlines.csv", usecols=[ 'Hydroseq', 'UpHydroseq', 'DnHydroseq', 'REACHCODE', 'LENGTHKM', 'StartFlag', 'FTYPE', 'COMID', 'WKT', 'QE_MA', 'QC_MA', 'StreamOrde' ]) #all NHD Flowlines #Filter the flowlines to select by HUC 2 flowlines['HUC2'] = flowlines['REACHCODE'] / ( 10**12) #convert Reachcode to HUC 2 format flowlines['HUC4'] = flowlines['REACHCODE'] / ( 10**10) #convert Reachcode to HUC 4 format flowlines['HUC8'] = flowlines['REACHCODE'] / ( 10**6) #convert Reachcode to HUC 4 format flowlines[['HUC2', 'HUC4', 'HUC8']] = flowlines[['HUC2', 'HUC4', 'HUC8' ]].apply(np.floor) #round down to integer #round the hydroseq values because of bug flowlines[['UpHydroseq', 'DnHydroseq', 'Hydroseq' ]] = flowlines[['UpHydroseq', 'DnHydroseq', 'Hydroseq']].round(decimals=0) read_flag = 1 t1 = datetime.datetime.now() print("Time to read in flowlines and dams:", (t1 - t0)) return flowlines, nabd
def echantillonnage_pix(path_depot, path_mnt, path_metriques, output, nbPoints, minDistance): ''' :param path_depot: Chemin de la couche de dépôts (str) :param path_mnt: Chemin du MNT à échantillonner (str) :param path_metriques: Chemin du répertoire contenant les métriques à échantillonner (str) :param output: Chemin du fichier de sortie (str) :param nbPoints: Nombre de points voulus (int) :param minDistance: Distance minimale à respecter entre les points (int) :return: Couche de points échantillonnés aléatoirement sur le MNT avec les valeurs des métriques comme attribut (shp) ''' print('***ÉCHANTILLONNAGE PAR PIXEL***') # Création du cadre du MNT print('Création du cadre...') cadre, epsg, nodata = creation_cadre(path_mnt) # Lecture de la couche de dépôts et reprojection si nécessaire print('Lecture de la couche de dépôts...') depot = gpd.read_file(path_depot) if str(depot.crs) != epsg: print('Reprojection...') depot.crs = epsg # Regroupement de la couche de dépôts print('Regroupement couche de dépôts...') depot_reg = dissolve(depot) # Création du buffer autour de la couche de dépôts à la valeur de la distance minimale print('Création du buffer...') buff = creation_buffer(depot_reg, minDistance, epsg, 1, 1) # # Clip du buffer aux dimension du cadre print('Clip du buffer...') buff_clip = gpd.clip(buff, cadre) # Création de la zone extérieure: différence entre le cadre et le buffer clippé print('Création zone externe...') zone_ext = difference(cadre, buff_clip, epsg) # Comparaison de superficie entre les dépôts et la zone extérieure pour fixer la limite du nombre de points print('Comparaison...') plus_petite_zone = None plus_grande_zone = None zone = None if comparaison_area(depot_reg, zone_ext): plus_petite_zone = depot_reg plus_grande_zone = zone_ext zone = 1 print('Plus petite zone: couche de dépôts ') else: plus_petite_zone = zone_ext plus_grande_zone = depot_reg zone = 0 print('Plus petite zone: zone extérieure') # Échantillonnage de la plus petite zone print('Échantillonnage petite zone...') ech_petite_zone = echantillon_pixel(plus_petite_zone, minDistance, nbPoints, epsg, zone) #ech_petite_zone.to_file(r'C:\Users\home\Documents\Documents\APP3\ech_petite_zone.shp') # Échantillonnage de la plus grande zone selon le nombre de points contenu dans la petite zone if zone == 1: zone = 0 elif zone == 0: zone = 1 print('Échantillonnage grande zone...') nbPoints_petite = len(ech_petite_zone) ech_grande_zone = echantillon_pixel(plus_grande_zone, minDistance, nbPoints_petite, epsg, zone) #ech_grande_zone.to_file(r'C:\Users\home\Documents\Documents\APP3\ech_grande_zone.shp') print('Terminé') # Combinaison des deux zones print('Combinaison des échantillons...') ech_total = gpd.GeoDataFrame(pd.concat([ech_petite_zone, ech_grande_zone], ignore_index=True), crs=epsg) if not os.path.exists(os.path.dirname(output)): os.makedirs(os.path.dirname(output)) ech_total.to_file(output) # Extraction des valeurs des métriques print('Extraction des valeurs des métriques') extract_value_metrique(output, path_metriques) print('Terminé')
bandwidth=100, cell_size=10, kernel_function=biweight, ) density = pd.DataFrame({ 'orthodox': density_orthodox.flatten(), 'total': density_total.flatten(), }) s = MinMax(density, 'orthodox', 'total') return s if __name__ == '__main__': os.chdir('../data') points = gpd.read_file('points1878.geojson') year = 1920 points['geometry'].crs = {'init': 'epsg:3067'} points = prepare_point_data(points, 'NUMBER', 'NUMBER2') pop_data = prepare_pop_data(pd.read_csv(f'{year}.csv')) pop_data = aggregate_sum(pop_data, ['plot_number'], [ 'other_christian', 'orthodox', 'other_religion', 'lutheran', ]) data = merge_dataframes( location_data=points, other_data=pop_data, on_location='NUMBER', on_other='plot_number',
def get_data(self): if self.masking == True: df = gpd.read_file(self.geometry) geom = df['geometry'][0] if self.mgrs_tile != 'None': url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1]-1, geom.bounds[3]+1, geom.bounds[0]-1, geom.bounds[2]+1, self.token, self.scale,self.mgrs_tile) else: url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1], geom.bounds[3], geom.bounds[0], geom.bounds[2], self.token, self.scale) else: if self.mgrs_tile != 'None': url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale, self.mgrs_tile) else: url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale) # if self.mgrs_tile != 'None': # url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.mgrs_tile) # else: # url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.token, self.mgrs_tile) #print(url) result = requests.get(url) with open(self.fname, 'wb') as f: f.write(result.content) f.close() #if image is sentinel2 tiled transform to EPSG:4326 dst_crs = 'EPSG:4326' with rasterio.open(self.fname) as src: transform, width, height = calculate_default_transform( src.crs, dst_crs, src.width, src.height, *src.bounds) kwargs = src.meta.copy() kwargs.update({ 'crs': dst_crs, 'transform': transform, 'width': width, 'height': height, 'dtype': 'float32', 'bbox': src.bounds }) new_fname = '4326_{}'.format(self.fname) with rasterio.open(new_fname, 'w', **kwargs) as dst: for i in range(1, src.count + 1): reproject( source=rasterio.band(src, i), destination=rasterio.band(dst, i), src_transform=src.transform, src_crs=src.crs, dst_transform=transform, dst_crs=dst_crs, resampling=Resampling.nearest) src = rasterio.open(new_fname) out_image = src.read(1) out_image = out_image.astype(np.float32) out_image[out_image == src.nodata] = 'nan' out_meta = src.meta.copy() #out_meta.update({"offset": src.offsets[0], # "scale": src.scales[0]}) if self.masking == True: with fiona.open(self.geometry, "r") as shapefile: features = [feature["geometry"] for feature in shapefile] out_image, out_transform = mask(src, features, crop=True) out_image = out_image.astype(np.float32) out_image[out_image == src.nodata] = 'nan' out_image = out_image[0,:,:] out_meta = src.meta.copy() out_meta.update({"driver": "GTiff", "height": out_image.shape[0], "width": out_image.shape[1], "transform": out_transform, "bbox": src.bounds}) with rasterio.open(self.fname, 'w', **out_meta) as dst: dst.write_band(1, out_image) return out_image, out_meta
def get_data(self): if self.masking == True: df = gpd.read_file(self.geometry) geom = df['geometry'][0] if self.mgrs_tile != 'None': url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1]-1, geom.bounds[3]+1, geom.bounds[0]-1, geom.bounds[2]+1, self.token, self.scale,self.mgrs_tile) else: url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, geom.bounds[1], geom.bounds[3], geom.bounds[0], geom.bounds[2], self.token, self.scale) else: if self.mgrs_tile != 'None': url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}&mgrs_tile={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale, self.mgrs_tile) else: url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&filter=false&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&scale={}".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.scale) # if self.mgrs_tile != 'None': # url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&subset=Lat({},{})&subset=Long({},{})&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.min_lat, self.max_lat, self.min_long, self.max_long, self.token, self.mgrs_tile) # else: # url = "https://{}/wcs?service=WCS&Request=GetCoverage&version=2.0.0&subset=unix({})&format=image/tiff&CoverageId={}&token={}&mgrs_tile={}&filter=false".format(self.endpoint,self.time_t,self.collection, self.token, self.mgrs_tile) #print(url) result = requests.get(url) with open(self.fname, 'wb') as f: f.write(result.content) f.close() #if image is sentinel2 tiled transform to EPSG:4326 src = rasterio.open(self.fname) out_image = src.read(1) out_image = out_image.astype(np.float32) out_image[out_image == src.nodata] = 'nan' out_meta = src.meta.copy() out_meta.update({"bbox": src.bounds}) if 'CAMS' in self.collection: out_meta.update({"offset": src.offsets[0], "scale": src.scales[0]}) if self.masking == True: with fiona.open(self.geometry, "r") as shapefile: features = [feature["geometry"] for feature in shapefile] out_image, out_transform = mask(src, features, crop=True) out_image = out_image.astype(np.float32) out_image[out_image == src.nodata] = 'nan' out_image = out_image[0,:,:] out_meta = src.meta.copy() if 'CAMS' in self.collection: out_meta.update({"driver": "GTiff", "height": out_image.shape[0], "width": out_image.shape[1], "transform": out_transform, "offset": src.offsets[0], "scale": src.scales[0]}) else: out_meta.update({"driver": "GTiff", "height": out_image.shape[0], "width": out_image.shape[1], "transform": out_transform}) return out_image, out_meta
from fiona.crs import from_epsg import os # Script location basepath = os.path.dirname(os.path.realpath(__file__)) # Projections: http://scitools.org.uk/cartopy/docs/v0.15/crs/projections.html # Weather stations: # Filepath country_fp = os.path.join(basepath, 'data', 'Finland_boundaries.shp') station_fp = os.path.join(basepath, 'data', "Weather_stations_all_Finland.txt") outfp = os.path.join(basepath, 'img', "FMI_stations_70_years_older.png") # Read data data = gpd.read_file(country_fp) stations = pd.read_csv(station_fp, sep='\t', encoding='latin1', header=None, names=[ 'Name', 'FMISID', 'LPNN', 'WMO', 'Lat', 'Lon', 'Elevation', 'Groups', 'Started' ]) # Prepare stations into GeoDataFrame # ---------------------------------- # Create Point geometries from coordinates stations['geometry'] = None stations['geometry'] = stations.apply(
def plot(): map_path = file_path + "/resources/sf_block_groups/sf_block_groups_nowater.geojson" coc_path = file_path + "/resources/sf_block_groups/coc" plot_path = file_path + "/resources/sf_data/sf_overspace_plot_data.json" fig_path = file_path + "/results/sf_change_overspace.pdf" # Read data. with open(plot_path, "r") as plot_file: data = json.loads(plot_file.read().strip("\n")) coc = gpd.read_file(coc_path) coc = coc[coc["GEOID"].astype("int") - coc["GEOID"].astype("int") % 1000000 == 6075000000] coc = coc[coc["GEOID"].astype("int") != 6075017902] coc = coc[coc["COCFLAG__1"] == 1] coc = coc.to_crs({"init": "epsg:4326"}) map = gpd.read_file(map_path) map["geoid"] = map["stfid"].astype("int") map = map[["geoid", "geometry"]] map["bg_lng"] = map.centroid.apply(lambda p: p.x) map["bg_lat"] = map.centroid.apply(lambda p: p.y) map = map[map["geoid"] != 60750179021] # Get supply curve data sup = pd.DataFrame.from_dict(data["sup"]) sup["geoid"] = data["index"] sup = sup[sup["geoid"] != 60750601001] sup = sup[sup["geoid"] != 60750604001] sup = sup[sup["geoid"] != 60750332011] sup = sup[sup["geoid"] != 60750610002] sup = sup[sup["geoid"] != 60750264022] sup = sup[sup["geoid"] != 60750258002] sup[sup["geoid"] == 60750610001] = 1 sup = map.merge(sup, on="geoid", how="left") # Get price curve data pri = pd.DataFrame.from_dict(data["pri"]) pri["geoid"] = data["index"] pri = map.merge(pri, on="geoid", how="left") # Plot parameter and setting. font = FontProperties() font.set_weight("bold") font.set_size(10) matplotlib.rcParams.update({"font.size": 6}) alpha = 0.5 alpha2 = 0.3 k = 2 bar_cons = 0.66 bar_mv = 0.27 for i in [0, 1, 2, 3, 4]: ax[i].set_xlim([-122.513, -122.355]) ax[i].set_ylim([37.707, 37.833]) ax[i].set_axis_off() ax[i].xaxis.set_major_locator(plt.NullLocator()) ax[i].yaxis.set_major_locator(plt.NullLocator()) coc.plot(ax=ax[i], linewidth=0.5, alpha=0) app_list = ["uber", "lyft", "taxi"] cmap = "RdYlGn" f = 0 for i in [0, 1, 2]: sup["plot"] = sup[app_list[i]] #/ sup["area"] * 581 knn = neighbors.KNeighborsRegressor(k, "distance") # Fill empty area. train_x = sup[["plot", "bg_lat", "bg_lng"]].dropna()[["bg_lat", "bg_lng"]].values train_y = sup["plot"].dropna().values predict_x = sup[["bg_lat", "bg_lng"]].values sup["plot"] = knn.fit(train_x, train_y).predict(predict_x) vmin = sup["plot"].min() vmax = sup["plot"].quantile(0.95) # plot sup.plot(ax=ax[i], linewidth=0, column="plot", cmap=cmap, alpha=alpha, k=10, vmin=vmin, vmax=vmax) ax[i].set_title(upperfirst(app_list[i]) + " Supply", fontproperties=font) fig = ax[i].get_figure() cax = fig.add_axes([0.128 + 0.087 * i, 0.07, 0.07, 0.02]) sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax)) sm._A = [] fig.colorbar(sm, cax=cax, alpha=alpha2, extend="both", orientation="horizontal") cmap = "RdYlGn_r" f = 2 for i in [3, 4]: pri["plot"] = (pri[app_list[i - 3]] - 1) * 100 knn = neighbors.KNeighborsRegressor(k, "distance") # Fill empty area. train_x = pri[["plot", "bg_lat", "bg_lng"]].dropna()[["bg_lat", "bg_lng"]].values train_y = pri["plot"].dropna().values predict_x = pri[["bg_lat", "bg_lng"]].values pri["plot"] = knn.fit(train_x, train_y).predict(predict_x) vmin = 0 vmax = 12 print pri["plot"].max() - pri["plot"].min() print pri["plot"].std() # plot pri.plot(ax=ax[i], linewidth=0, column="plot", cmap=cmap, alpha=alpha, k=10, vmin=vmin, vmax=vmax) ax[i].set_title(upperfirst(app_list[i - 3]) + " Price", fontproperties=font) fig = ax[i].get_figure() cax = fig.add_axes([0.128 + 0.087 * i, 0.07, 0.07, 0.02]) sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax)) sm._A = [] fig.colorbar(sm, cax=cax, alpha=alpha2, extend="both", orientation="horizontal") map_path = file_path + "/resources/nyc_block_groups/nyc_bg_with_data_acs15.geojson" plot_path = file_path + "/resources/nyc_data/nyc_overspace_plot_data.json" fig_path = file_path + "/results/nyc_change_overspace.pdf" # Read data. with open(plot_path, "r") as plot_file: data = json.loads(plot_file.read().strip("\n")) map = gpd.read_file(map_path) coc = map.sort_values("income")[:80] map = map[map["population"].astype("float") > 10.0] map["geoid"] = map["geo_id"].astype("int") map = map[["geoid", "geometry"]] map["bg_lng"] = map.centroid.apply(lambda p: p.x) map["bg_lat"] = map.centroid.apply(lambda p: p.y) # Get supply curve data sup = pd.DataFrame.from_dict(data["sup"]) sup["geoid"] = data["index"] sup = map.merge(sup, on="geoid", how="left") # Get price curve data pri = pd.DataFrame.from_dict(data["pri"]) pri["geoid"] = data["index"] pri = pri[pri["uber"] > 1.0] pri = pri[pri["lyft"] > 1.0] pri = map.merge(pri, on="geoid", how="left") # Plot parameter and setting. bar_cons = 0.66 bar_mv = 0.27 for i in [5, 6, 7, 8]: ax[i].set_xlim([-74.055, -73.88]) ax[i].set_ylim([40.64, 40.90]) ax[i].set_axis_off() ax[i].xaxis.set_major_locator(plt.NullLocator()) ax[i].yaxis.set_major_locator(plt.NullLocator()) coc.plot(ax=ax[i], linewidth=0.5, alpha=0) app_list = ["uber", "lyft"] cmap = "RdYlGn" f = 0 for i in [5, 6]: sup["plot"] = sup[app_list[i - 5]] vmin = sup["plot"].min() if i == 5: vmax = 7 #sup["plot"].quantile(0.9) else: vmax = 5 # plot sup.plot(ax=ax[i], linewidth=0, column="plot", cmap=cmap, alpha=alpha, k=10, vmin=vmin, vmax=vmax) ax[i].set_title(upperfirst(app_list[i - 5]) + " Supply", fontproperties=font) fig = ax[i].get_figure() cax = fig.add_axes([0.132 + 0.087 * i, 0.07, 0.07, 0.02]) sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax)) sm._A = [] fig.colorbar(sm, cax=cax, alpha=alpha2, extend="both", orientation="horizontal") cmap = "RdYlGn_r" f = 2 for i in [7, 8]: pri["plot"] = (pri[app_list[i - 3 - 4]] - 1) * 100 knn = neighbors.KNeighborsRegressor(k, "distance") # Fill empty area. train_x = pri[["plot", "bg_lat", "bg_lng"]].dropna()[["bg_lat", "bg_lng"]].values train_y = pri["plot"].dropna().values predict_x = pri[["bg_lat", "bg_lng"]].values pri["plot"] = knn.fit(train_x, train_y).predict(predict_x) vmin = 0 if i == 7: vmax = 2.5 #sup["plot"].quantile(0.9) else: vmax = 7 print pri["plot"].max() - pri["plot"].min() print pri["plot"].std() # plot pri.plot(ax=ax[i], linewidth=0, column="plot", cmap=cmap, alpha=alpha, k=10, vmin=vmin, vmax=vmax) ax[i].set_title(upperfirst(app_list[i - 3 - 4]) + " Price", fontproperties=font) fig = ax[i].get_figure() cax = fig.add_axes([0.132 + 0.087 * i, 0.07, 0.07, 0.02]) sm = plt.cm.ScalarMappable(cmap=cmap, norm=plt.Normalize(vmin=vmin, vmax=vmax)) sm._A = [] fig.colorbar(sm, cax=cax, alpha=alpha2, extend="both", orientation="horizontal")