class TestLineStringPlotting(unittest.TestCase): def setUp(self): self.N = 10 values = np.arange(self.N) self.lines = GeoSeries([LineString([(0, i), (9, i)]) for i in xrange(self.N)]) self.df = GeoDataFrame({'geometry': self.lines, 'values': values}) def test_single_color(self): ax = self.lines.plot(color='green') _check_colors(ax.get_lines(), ['green']*self.N) ax = self.df.plot(color='green') _check_colors(ax.get_lines(), ['green']*self.N) ax = self.df.plot(column='values', color='green') _check_colors(ax.get_lines(), ['green']*self.N) def test_style_kwargs(self): # linestyle ax = self.lines.plot(linestyle='dashed') ls = [l.get_linestyle() for l in ax.get_lines()] assert ls == ['--'] * self.N ax = self.df.plot(linestyle='dashed') ls = [l.get_linestyle() for l in ax.get_lines()] assert ls == ['--'] * self.N ax = self.df.plot(column='values', linestyle='dashed') ls = [l.get_linestyle() for l in ax.get_lines()] assert ls == ['--'] * self.N
def main(): # Read in Data grid = GeoDataFrame.from_file(igrid) points = pd.read_csv(ipoints) # create geopoints geopoints = points2geo(points, lat, lon) # match projection info: ## Points - should already be in wgs84 geopoints.crs =wgs84 geopoints['geometry'] = geopoints['geometry'].to_crs(epsg=4326) ## Grid - project from meters to wgs84 grid.crs = gridproj grid['geometry'] = grid['geometry'].to_crs(epsg=4326) # create uid to groupby grid['id'] = [i for i in range(len(grid))] # Spatial join points to grid join_inner_df = sjoin(grid, geopoints, how="inner") # Group by the uid and geometry - return mean join_inner_df = join_inner_df.groupby(['id','geometry'])['Decibel'].mean() # join_inner_df = join_inner_df.groupby(['id','geometry'])['Decibel'].max() # Create geodataframe & reset the index of the file output = GeoDataFrame(join_inner_df) output = output.reset_index() # output # write to file output.to_file(ofile)
def to_file(self, filename, driver="ESRI Shapefile", **kwargs): from geopandas import GeoDataFrame data = GeoDataFrame({"geometry": self, "id":self.index.values}, index=self.index) data.crs = self.crs data.to_file(filename, driver, **kwargs)
def test_overlay(dfs, how, use_sindex, expected_features): """ Basic overlay test with small dummy example dataframes (from docs). Results obtained using QGIS 2.16 (Vector -> Geoprocessing Tools -> Intersection / Union / ...), saved to GeoJSON and pasted here """ df1, df2 = dfs result = overlay(df1, df2, how=how, use_sindex=use_sindex) # construction of result if how == 'identity': expected = pd.concat([ GeoDataFrame.from_features(expected_features['intersection']), GeoDataFrame.from_features(expected_features['difference']) ], ignore_index=True) else: expected = GeoDataFrame.from_features(expected_features[how]) # TODO needed adaptations to result # if how == 'union': # result = result.drop(['idx1', 'idx2'], axis=1).sort_values(['col1', 'col2']).reset_index(drop=True) # elif how in ('intersection', 'identity'): # result = result.drop(['idx1', 'idx2'], axis=1) assert_geodataframe_equal(result, expected) # for difference also reversed if how == 'difference': result = overlay(df2, df1, how=how, use_sindex=use_sindex) expected = GeoDataFrame.from_features( expected_features['difference_inverse']) assert_geodataframe_equal(result, expected)
def setUp(self): N = 10 # Data from http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip # saved as geopandas/examples/nybb_13a.zip. if not os.path.exists(os.path.join('examples', 'nybb_13a.zip')): with open(os.path.join('examples', 'nybb_13a.zip'), 'w') as f: response = urllib2.urlopen('http://www.nyc.gov/html/dcp/download/bytes/nybb_13a.zip') f.write(response.read()) self.df = GeoDataFrame.from_file( '/nybb_13a/nybb.shp', vfs='zip://examples/nybb_13a.zip') self.tempdir = tempfile.mkdtemp() self.boros = np.array(['Staten Island', 'Queens', 'Brooklyn', 'Manhattan', 'Bronx']) self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry' : Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) # Try to create the database, skip the db tests if something goes # wrong # If you'd like these tests to run, create a database called # 'test_geopandas' and enable postgis in it: # > createdb test_geopandas # > psql -c "CREATE EXTENSION postgis" -d test_geopandas try: self._create_db() self.run_db_test = True except (NameError, OperationalError): # NameError is thrown if psycopg2 fails to import at top of file # OperationalError is thrown if we can't connect to the database self.run_db_test = False
def read_file(filename, **kwargs): """ Returns a GeoDataFrame from a file. *filename* is either the absolute or relative path to the file to be opened and *kwargs* are keyword args to be passed to the method when opening the file. Note: This method does not attempt to align rows. Properties that are not present in all features of the source file will not be properly aligned. This should be fixed. """ geoms = [] columns = defaultdict(lambda: []) bbox = kwargs.pop('bbox', None) with fiona.open(filename, **kwargs) as f: crs = f.crs if bbox != None: assert len(bbox)==4 f_filt = f.filter(bbox=bbox) else: f_filt = f for rec in f_filt: geoms.append(shape(rec['geometry'])) for key, value in rec['properties'].iteritems(): columns[key].append(value) geom = GeoSeries(geoms) df = GeoDataFrame(columns) df['geometry'] = geom df.crs = crs return df
class TestPolygonPlotting(unittest.TestCase): def setUp(self): t1 = Polygon([(0, 0), (1, 0), (1, 1)]) t2 = Polygon([(1, 0), (2, 0), (2, 1)]) self.polys = GeoSeries([t1, t2]) self.df = GeoDataFrame({'geometry': self.polys, 'values': [0, 1]}) return def test_single_color(self): ax = self.polys.plot(color='green') _check_colors(ax.patches, ['green']*2, alpha=0.5) ax = self.df.plot(color='green') _check_colors(ax.patches, ['green']*2, alpha=0.5) ax = self.df.plot(column='values', color='green') _check_colors(ax.patches, ['green']*2, alpha=0.5) def test_vmin_vmax(self): # when vmin == vmax, all polygons should be the same color ax = self.df.plot(column='values', categorical=True, vmin=0, vmax=0) cmap = get_cmap('Set1', 2) self.assertEqual(ax.patches[0].get_facecolor(), ax.patches[1].get_facecolor())
def setup_method(self): nybb_filename = geopandas.datasets.get_path('nybb') self.polydf = read_file(nybb_filename) self.polydf = self.polydf[['geometry', 'BoroName', 'BoroCode']] self.polydf = self.polydf.rename(columns={'geometry': 'myshapes'}) self.polydf = self.polydf.set_geometry('myshapes') self.polydf['manhattan_bronx'] = 5 self.polydf.loc[3:4, 'manhattan_bronx'] = 6 # Merged geometry manhattan_bronx = self.polydf.loc[3:4, ] others = self.polydf.loc[0:2, ] collapsed = [others.geometry.unary_union, manhattan_bronx.geometry.unary_union] merged_shapes = GeoDataFrame( {'myshapes': collapsed}, geometry='myshapes', index=pd.Index([5, 6], name='manhattan_bronx')) # Different expected results self.first = merged_shapes.copy() self.first['BoroName'] = ['Staten Island', 'Manhattan'] self.first['BoroCode'] = [5, 1] self.mean = merged_shapes.copy() self.mean['BoroCode'] = [4, 1.5]
def test_plot_GeoDataFrame_with_kwargs(self): """ Test plotting a simple GeoDataFrame consisting of a series of polygons with increasing values using various extra kwargs. """ clf() filename = 'poly_plot_with_kwargs.png' ts = np.linspace(0, 2*pi, 10, endpoint=False) # Build GeoDataFrame from a series of triangles wrapping around in a ring # and a second column containing a list of increasing values. r1 = 1.0 # radius of inner ring boundary r2 = 1.5 # radius of outer ring boundary def make_triangle(t0, t1): return Polygon([(r1*cos(t0), r1*sin(t0)), (r2*cos(t0), r2*sin(t0)), (r1*cos(t1), r1*sin(t1))]) polys = GeoSeries([make_triangle(t0, t1) for t0, t1 in zip(ts, ts[1:])]) values = np.arange(len(polys)) df = GeoDataFrame({'geometry': polys, 'values': values}) # Plot the GeoDataFrame using various keyword arguments to see if they are honoured ax = df.plot(column='values', cmap=cm.RdBu, vmin=+2, vmax=None, figsize=(8, 4)) self._compare_images(ax=ax, filename=filename)
def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry' : [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename)
class TestFrameSindex: def setup_method(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} self.df = GeoDataFrame(data, geometry='location') def test_sindex(self): self.df.crs = {'init': 'epsg:4326'} assert self.df.sindex.size == 5 hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4), objects=True)) assert len(hits) == 2 assert hits[0].object == 3 def test_lazy_build(self): assert self.df._sindex is None assert self.df.sindex.size == 5 assert self.df._sindex is not None def test_sindex_rebuild_on_set_geometry(self): # First build the sindex assert self.df.sindex is not None self.df.set_geometry( [Point(x, y) for x, y in zip(range(5, 10), range(5, 10))], inplace=True) assert self.df._sindex_generated is False
class TestFrameSindex(unittest.TestCase): def setUp(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} self.df = GeoDataFrame(data, geometry='location') def test_sindex(self): self.df.crs = {'init': 'epsg:4326'} self.assertEqual(self.df.sindex.size, 5) hits = list(self.df.sindex.intersection((2.5, 2.5, 4, 4), objects=True)) self.assertEqual(len(hits), 2) self.assertEqual(hits[0].object, 3) def test_lazy_build(self): self.assert_(self.df._sindex is None) self.assertEqual(self.df.sindex.size, 5) self.assert_(self.df._sindex is not None) def test_sindex_rebuild_on_set_geometry(self): # First build the sindex self.assert_(self.df.sindex is not None) self.df.set_geometry( [Point(x, y) for x, y in zip(range(5, 10), range(5, 10))], inplace=True) self.assert_(self.df._sindex_valid == False)
def test_empty_plot(self): s = GeoSeries([]) with pytest.warns(UserWarning): ax = s.plot() assert len(ax.collections) == 0 df = GeoDataFrame([]) with pytest.warns(UserWarning): ax = df.plot() assert len(ax.collections) == 0
def as_dataframe(self): # Mimic BaseIO.as_dataframe() but with GeoDataFrame # (also, key_field is always set) from geopandas import GeoDataFrame key = self.get_key_field() data = [self.item_dict(row) for row in self.values()] df = GeoDataFrame(data) df.set_index(key, inplace=True) return df
def try_open_geojson(file_path): replaced = False try: gdf = GeoDataFrame.from_file(file_path) except fiona_err.CPLE_AppDefinedError as err: replace_geojson_id_field(file_path) replaced = True gdf = GeoDataFrame.from_file(file_path) return gdf, replaced
def main(): # read in data and fix headers data = pd.read_csv('/Users/Jozo/Dropbox/UBC/cirs/energyexplorer/data/optimized/energy/consumption/ceei_2010_metrovan.csv') data.columns = fixheaders(data) # electricity (mostly renewable), heating fuel (fossil fuels), & transporation (fossil fuels) by LocalGovtName groupedData =data.groupby(['LocalGovtName', 'LocalGovtType', 'MeasurementDesc', 'Sector'])['EnergyGJ'].sum() groupedData.to_csv('/Users/Jozo/Dropbox/UBC/cirs/energyexplorer/data/optimized/energy/consumption/ceei_2010_metrovan_grouped.csv') # read in grouped data newdata = pd.read_csv('/Users/Jozo/Dropbox/UBC/cirs/energyexplorer/data/optimized/energy/consumption/ceei_2010_metrovan_grouped.csv', header=False) newdata.columns = ['city','ctype', 'sector', 'desc', 'consump'] # Add in metro vancouver names: newdata['metroname'] = pd.Series() for i in np.arange(0,len(newdata), 1): if newdata.ctype[i] == 'City' or newdata.ctype[i] == 'Village': newdata.metroname[i] = str(newdata.ctype[i] + " of " + newdata.city[i]) elif newdata.city[i] == 'Delta' or newdata.city[i] == 'Langley' and newdata.ctype[i] == 'District Municipality': newdata.metroname[i] = str('Township' + " of " + newdata.city[i]) elif newdata.city[i] == 'Maple Ridge' or newdata.city[i] == 'North Vancouver' or newdata.city[i] == 'West Vancouver' and newdata.ctype[i] == 'District Municipality': newdata.metroname[i] = str('District' + " of " + newdata.city[i]) elif newdata.city[i] == 'Bowen Island' and newdata.ctype[i] == 'Island Municipality': newdata.metroname[i] = str('Village' + " of " + newdata.city[i]) elif newdata.city[i] == 'Metro-Vancouver' and newdata.ctype[i] == 'Regional District Unincorporated Areas': newdata.metroname[i] = str('Electoral Area A') elif newdata.city[i] == 'Metro-Vancouver' and newdata.ctype[i] == 'Regional District': newdata.metroname[i] = str(newdata.city[i] + " " + newdata.ctype[i]) newdata.to_csv('/Users/Jozo/Dropbox/UBC/cirs/energyexplorer/data/optimized/energy/consumption/ceei_2010_metrovan_grouped_metronames.csv') # Pivot data newdata_pivot =newdata.pivot(index = 'metroname',columns ='sector', values='consump') #fill na with 0 newdata_pivot = newdata_pivot.fillna(0) # fix columns newdata_pivot.columns = fixheaders(newdata_pivot) newdata_pivot.to_csv('/Users/Jozo/Dropbox/UBC/cirs/energyexplorer/data/optimized/energy/consumption/ceei_2010_metrovan_grouped_metronames_formatted.csv', index='metroname') ''' --- merge with shapefile --- ''' metrovan = GeoDataFrame.from_file('/Users/Jozo/Dropbox/UBC/CIRs/EnergyExplorer/data/optimized/metroVan/metroVan.shp') joindata = pd.read_csv('/Users/Jozo/Dropbox/UBC/cirs/energyexplorer/data/optimized/energy/consumption/ceei_2010_metrovan_grouped_metronames_formatted.csv', header=False) output = GeoDataFrame.merge(metrovan, joindata, left_on="NAMEMUNI", right_on="metroname") outputcol = [i for i in joindata.columns] outputcol.append('geometry') output = output[outputcol] output.to_file('/Users/Jozo/Dropbox/UBC/CIRs/EnergyExplorer/data/optimized/energy/consumption/ceei_2010/ceei_2010_metrovan_formatted.shp')
def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, 'int.shp') int_types = [np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename)
def test_drop_duplicates_frame(): # currently, dropping duplicates in a geodataframe produces a TypeError # better behavior would be dropping the duplicated points gdf_len = 3 dup_gdf = GeoDataFrame({'geometry': [Point(0, 0) for _ in range(gdf_len)], 'value1': range(gdf_len)}) dropped_geometry = dup_gdf.drop_duplicates(subset="geometry") assert len(dropped_geometry) == 1 dropped_all = dup_gdf.drop_duplicates() assert len(dropped_all) == gdf_len
def dfs(request): s1 = GeoSeries([Polygon([(0, 0), (2, 0), (2, 2), (0, 2)]), Polygon([(2, 2), (4, 2), (4, 4), (2, 4)])]) s2 = GeoSeries([Polygon([(1, 1), (3, 1), (3, 3), (1, 3)]), Polygon([(3, 3), (5, 3), (5, 5), (3, 5)])]) df1 = GeoDataFrame({'geometry': s1, 'col1': [1, 2]}) df2 = GeoDataFrame({'geometry': s2, 'col2': [1, 2]}) if request.param: df1.index = ['row1', 'row2'] return df1, df2
def test_to_file_with_point_z(self): """Test that 3D geometries are retained in writes (GH #612).""" tempfilename = os.path.join(self.tempdir, 'test_3Dpoint.shp') point3d = Point(0, 0, 500) point2d = Point(1, 1) df = GeoDataFrame({'a': [1, 2]}, geometry=[point3d, point2d], crs={}) df.to_file(tempfilename) df_read = GeoDataFrame.from_file(tempfilename) assert_geoseries_equal(df.geometry, df_read.geometry)
def test_to_file_with_poly_z(self): """Test that 3D geometries are retained in writes (GH #612).""" tempfilename = os.path.join(self.tempdir, 'test_3Dpoly.shp') poly3d = Polygon([[0, 0, 5], [0, 1, 5], [1, 1, 5], [1, 0, 5]]) poly2d = Polygon([[0, 0], [0, 1], [1, 1], [1, 0]]) df = GeoDataFrame({'a': [1, 2]}, geometry=[poly3d, poly2d], crs={}) df.to_file(tempfilename) df_read = GeoDataFrame.from_file(tempfilename) assert_geoseries_equal(df.geometry, df_read.geometry)
def setup_method(self): self.N = 10 self.points = GeoSeries(Point(i, i) for i in range(self.N)) values = np.arange(self.N) self.df = GeoDataFrame({'geometry': self.points, 'values': values}) multipoint1 = MultiPoint(self.points) multipoint2 = rotate(multipoint1, 90) self.df2 = GeoDataFrame({'geometry': [multipoint1, multipoint2], 'values': [0, 1]})
def setup_method(self): t1 = Polygon([(0, 0), (1, 0), (1, 1)]) t2 = Polygon([(1, 0), (2, 0), (2, 1)]) self.polys = GeoSeries([t1, t2], index=list('AB')) self.df = GeoDataFrame({'geometry': self.polys, 'values': [0, 1]}) multipoly1 = MultiPolygon([t1, t2]) multipoly2 = rotate(multipoly1, 180) self.df2 = GeoDataFrame({'geometry': [multipoly1, multipoly2], 'values': [0, 1]})
class TestPointZPlotting: def setup_method(self): self.N = 10 self.points = GeoSeries(Point(i, i, i) for i in range(self.N)) values = np.arange(self.N) self.df = GeoDataFrame({'geometry': self.points, 'values': values}) def test_plot(self): # basic test that points with z coords don't break plotting self.df.plot()
class TestMerging(unittest.TestCase): def setUp(self): self.gseries = GeoSeries([Point(i, i) for i in range(3)]) self.series = pd.Series([1, 2, 3]) self.gdf = GeoDataFrame({'geometry': self.gseries, 'values': range(3)}) self.df = pd.DataFrame({'col1': [1, 2, 3], 'col2': [0.1, 0.2, 0.3]}) def _check_metadata(self, gdf, geometry_column_name='geometry', crs=None): self.assertEqual(gdf._geometry_column_name, geometry_column_name) self.assertEqual(gdf.crs, crs) def test_merge(self): res = self.gdf.merge(self.df, left_on='values', right_on='col1') # check result is a GeoDataFrame self.assert_(isinstance(res, GeoDataFrame)) # check geometry property gives GeoSeries self.assert_(isinstance(res.geometry, GeoSeries)) # check metadata self._check_metadata(res) ## test that crs and other geometry name are preserved self.gdf.crs = {'init' :'epsg:4326'} self.gdf = (self.gdf.rename(columns={'geometry': 'points'}) .set_geometry('points')) res = self.gdf.merge(self.df, left_on='values', right_on='col1') self.assert_(isinstance(res, GeoDataFrame)) self.assert_(isinstance(res.geometry, GeoSeries)) self._check_metadata(res, 'points', self.gdf.crs) def test_concat_axis0(self): res = pd.concat([self.gdf, self.gdf]) self.assertEqual(res.shape, (6, 2)) self.assert_(isinstance(res, GeoDataFrame)) self.assert_(isinstance(res.geometry, GeoSeries)) self._check_metadata(res) def test_concat_axis1(self): res = pd.concat([self.gdf, self.df], axis=1) self.assertEqual(res.shape, (3, 4)) self.assert_(isinstance(res, GeoDataFrame)) self.assert_(isinstance(res.geometry, GeoSeries)) self._check_metadata(res)
def _combine_data( *, location_data: gpd.GeoDataFrame, population_data: pd.DataFrame=None, location_index='NUMBER', **kwargs ) -> gpd.GeoDataFrame: location_data = utils.split_plots(location_data, location_index) if isinstance(population_data, pd.DataFrame): location_data = location_data.set_index(location_index) location_data = location_data.join(population_data, **kwargs) return location_data
class TestLineStringPlotting(unittest.TestCase): def setUp(self): self.N = 10 values = np.arange(self.N) self.lines = GeoSeries([LineString([(0, i), (4, i+0.5), (9, i)]) for i in xrange(self.N)], index=list('ABCDEFGHIJ')) self.df = GeoDataFrame({'geometry': self.lines, 'values': values}) def test_single_color(self): ax = self.lines.plot(color='green') _check_colors(self.N, ax.collections[0], ['green']*self.N) ax = self.df.plot(color='green') _check_colors(self.N, ax.collections[0], ['green']*self.N) with warnings.catch_warnings(record=True) as _: # don't print warning # 'color' overrides 'column' ax = self.df.plot(column='values', color='green') _check_colors(self.N, ax.collections[0], ['green']*self.N) def test_style_kwargs(self): def linestyle_tuple_to_string(tup): """ Converts a linestyle of the form `(offset, onoffseq)`, as documented in `Collections.set_linestyle`, to a string representation, namely one of: { 'dashed', 'dotted', 'dashdot', 'solid' }. """ from matplotlib.backend_bases import GraphicsContextBase reverse_idx = dict((v, k) for k, v in GraphicsContextBase.dashd.items()) return reverse_idx[tup] # linestyle ax = self.lines.plot(linestyle='dashed') ls = [linestyle_tuple_to_string(l) for l in ax.collections[0].get_linestyles()] assert ls == ['dashed'] ax = self.df.plot(linestyle='dashed') ls = [linestyle_tuple_to_string(l) for l in ax.collections[0].get_linestyles()] assert ls == ['dashed'] ax = self.df.plot(column='values', linestyle='dashed') ls = [linestyle_tuple_to_string(l) for l in ax.collections[0].get_linestyles()] assert ls == ['dashed']
def test_column_values(): """ Check that the dataframe plot method returns same values with an input string (column in df), pd.Series, or np.array """ # Build test data t1 = Polygon([(0, 0), (1, 0), (1, 1)]) t2 = Polygon([(1, 0), (2, 0), (2, 1)]) polys = GeoSeries([t1, t2], index=list('AB')) df = GeoDataFrame({'geometry': polys, 'values': [0, 1]}) # Test with continous values ax = df.plot(column='values') colors = ax.collections[0].get_facecolors() ax = df.plot(column=df['values']) colors_series = ax.collections[0].get_facecolors() np.testing.assert_array_equal(colors, colors_series) ax = df.plot(column=df['values'].values) colors_array = ax.collections[0].get_facecolors() np.testing.assert_array_equal(colors, colors_array) # Test with categorical values ax = df.plot(column='values', categorical=True) colors = ax.collections[0].get_facecolors() ax = df.plot(column=df['values'], categorical=True) colors_series = ax.collections[0].get_facecolors() np.testing.assert_array_equal(colors, colors_series) ax = df.plot(column=df['values'].values, categorical=True) colors_array = ax.collections[0].get_facecolors() np.testing.assert_array_equal(colors, colors_array) # Check raised error: is df rows number equal to column legth? with pytest.raises(ValueError, match="different number of rows"): ax = df.plot(column=np.array([1, 2, 3]))
def test_explode_geodataframe(self, index_name): s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({'col': [1, 2], 'geometry': s}) df.index.name = index_name test_df = df.explode() expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({'col': [1, 1, 2], 'geometry': expected_s}) expected_index = MultiIndex([[0, 1], [0, 1]], # levels [[0, 0, 1], [0, 1, 0]], # labels/codes names=[index_name, None]) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df)
class Bench: # extensions for different file types to test params = [".shp", ".json", ".gpkg"] param_names = ["ext"] def setup(self, ext): self.driver_dict = {".shp": "ESRI Shapefile", ".json": "GeoJSON", ".gpkg": "GPKG"} driver = self.driver_dict[ext] num_points = 20000 xs = np.random.rand(num_points) ys = np.random.rand(num_points) self.points = GeoSeries([Point(x, y) for (x, y) in zip(xs, ys)]) self.df = GeoDataFrame({"geometry": self.points, "x": xs, "y": ys, "s": np.zeros(num_points, dtype="object")}) self.tmpdir = tempfile.mkdtemp() self.series_filename = os.path.join(self.tmpdir, "series" + ext) self.frame_filename = os.path.join(self.tmpdir, "frame" + ext) self.points.to_file(self.series_filename, driver=driver) self.df.to_file(self.frame_filename, driver=driver) def teardown(self, ext): shutil.rmtree(self.tmpdir) def time_write_frame(self, ext): driver = self.driver_dict[ext] with tempfile.TemporaryDirectory() as tmpdir: out_filename = os.path.join(tmpdir, "frame" + ext) self.df.to_file(out_filename, driver=driver) def time_write_series(self, ext): driver = self.driver_dict[ext] with tempfile.TemporaryDirectory() as tmpdir: out_filename = os.path.join(tmpdir, "series" + ext) self.points.to_file(out_filename, driver=driver) def time_read_frame(self, ext): frame = GeoDataFrame.from_file(self.frame_filename) def time_read_series(self, ext): points = GeoSeries.from_file(self.series_filename) def time_read_series_from_frame(self, ext): points = GeoSeries.from_file(self.frame_filename)
def get_gdf(self): # coordinate system parameters crs = {'init': 'epsg:4326'} return (GeoDataFrame(self.get_names(), crs=crs, geometry=self.get_geo()))
import geopandas as gpd
def df(): return GeoDataFrame({'geometry': [Point(x, x) for x in range(3)], 'value1': np.arange(3, dtype='int64'), 'value2': np.array([1, 2, 1], dtype='int64')})
# dataset df = pd.read_csv("data/small.csv") print(f"read {len(df)}") pd.to_numeric(df.longitude) pd.to_numeric(df.latitude) print("to_numeric") df = df[(df['longitude'] <= 180) & (df['longitude'] >= -180)] df = df[(df['latitude'] <= 90) & (df['latitude'] >= -90)] print(f"filter {len(df)}") # dataset to map gdf = GeoDataFrame( df.drop(['longitude', 'latitude'], axis=1), crs={'init': 'epsg:4326'}, geometry=[Point(xy) for xy in zip(df.longitude, df.latitude)]) print("geo") # plot settings fig, ax = plt.subplots() ax.set_aspect('equal') # plot world = geopandas.read_file(geopandas.datasets.get_path('naturalearth_lowres')) world.plot(ax=ax, color='white', edgecolor='black', linewidths=0.1) gdf.plot(ax=ax, markersize=0.0001, alpha=0.3) print("plot") # save fig plt.savefig("img/flow.png", dpi=1200, bbox_inches="tight")
def __geo_interface__(self): """Returns a GeoSeries as a python feature collection """ from geopandas import GeoDataFrame return GeoDataFrame({'geometry': self}).__geo_interface__
def plot_shoreline(shoreline: gpd.GeoDataFrame, ax): """ Plot shoreline. """ ax = shoreline.plot(ax=ax, linewidth=0.25, label="Shoreline", color="black") return ax
def _split_exposure_highlow(exp_sub, mode, High_Value_Area_gdf): """ divide litpop exposure into high-value exposure and low-value exposure according to area queried in OSM, re-assign all low values to high-value centroids Parameters: exp_sub (exposure) mode (str) Returns: exp_sub_high (exposure) """ exp_sub_high = pd.DataFrame(columns=exp_sub.columns) exp_sub_low = pd.DataFrame(columns=exp_sub.columns) for i, pt in enumerate(exp_sub.geometry): if pt.within(High_Value_Area_gdf.loc[0]['geometry']): exp_sub_high = exp_sub_high.append(exp_sub.iloc[i]) else: exp_sub_low = exp_sub_low.append(exp_sub.iloc[i]) exp_sub_high = GeoDataFrame(exp_sub_high, crs=exp_sub.crs, geometry=exp_sub_high.geometry) exp_sub_low = GeoDataFrame(exp_sub_low, crs=exp_sub.crs, geometry=exp_sub_low.geometry) if mode == "nearest": # assign asset values of low-value points to nearest point in high-value df. pointsToAssign = exp_sub_high.geometry.unary_union exp_sub_high["addedValNN"] = 0 for i in range(0, len(exp_sub_low)): nearest = exp_sub_high.geometry == nearest_points(exp_sub_low.iloc[i].geometry, \ pointsToAssign)[1] #point exp_sub_high.addedValNN.loc[nearest] = exp_sub_low.iloc[i].value exp_sub_high["combinedValNN"] = exp_sub_high[['addedValNN', 'value']].sum(axis=1) exp_sub_high.rename(columns={'value': 'value_old', 'combinedValNN': 'value'},\ inplace=True) elif mode == "even": # assign asset values of low-value points evenly to points in high-value df. exp_sub_high['addedValeven'] = sum( exp_sub_low.value) / len(exp_sub_high) exp_sub_high["combinedValeven"] = exp_sub_high[[ 'addedValeven', 'value' ]].sum(axis=1) exp_sub_high.rename(columns={'value': 'value_old', 'combinedValeven': 'value'},\ inplace=True) elif mode == "proportional": #assign asset values of low-value points proportionally to value of points in high-value df. exp_sub_high['addedValprop'] = 0 for i in range(0, len(exp_sub_high)): asset_factor = exp_sub_high.iloc[i].value / sum(exp_sub_high.value) exp_sub_high.addedValprop.iloc[i] = asset_factor * sum( exp_sub_low.value) exp_sub_high["combinedValprop"] = exp_sub_high[[ 'addedValprop', 'value' ]].sum(axis=1) exp_sub_high.rename(columns={'value': 'value_old', 'combinedValprop': 'value'},\ inplace=True) else: print( "No proper re-assignment mode set. Please choose either nearest, even or proportional." ) return exp_sub_high
class TestPointPlotting: def setup_method(self): self.N = 10 self.points = GeoSeries(Point(i, i) for i in range(self.N)) values = np.arange(self.N) self.df = GeoDataFrame({"geometry": self.points, "values": values}) self.df["exp"] = (values * 10)**3 multipoint1 = MultiPoint(self.points) multipoint2 = rotate(multipoint1, 90) self.df2 = GeoDataFrame({ "geometry": [multipoint1, multipoint2], "values": [0, 1] }) def test_figsize(self): ax = self.points.plot(figsize=(1, 1)) np.testing.assert_array_equal(ax.figure.get_size_inches(), (1, 1)) ax = self.df.plot(figsize=(1, 1)) np.testing.assert_array_equal(ax.figure.get_size_inches(), (1, 1)) def test_default_colors(self): # # without specifying values -> uniform color # GeoSeries ax = self.points.plot() _check_colors(self.N, ax.collections[0].get_facecolors(), [MPL_DFT_COLOR] * self.N) # GeoDataFrame ax = self.df.plot() _check_colors(self.N, ax.collections[0].get_facecolors(), [MPL_DFT_COLOR] * self.N) # # with specifying values -> different colors for all 10 values ax = self.df.plot(column="values") cmap = plt.get_cmap() expected_colors = cmap(np.arange(self.N) / (self.N - 1)) _check_colors(self.N, ax.collections[0].get_facecolors(), expected_colors) def test_colormap(self): # without specifying values but cmap specified -> no uniform color # but different colors for all points # GeoSeries ax = self.points.plot(cmap="RdYlGn") cmap = plt.get_cmap("RdYlGn") exp_colors = cmap(np.arange(self.N) / (self.N - 1)) _check_colors(self.N, ax.collections[0].get_facecolors(), exp_colors) ax = self.df.plot(cmap="RdYlGn") _check_colors(self.N, ax.collections[0].get_facecolors(), exp_colors) # # with specifying values -> different colors for all 10 values ax = self.df.plot(column="values", cmap="RdYlGn") cmap = plt.get_cmap("RdYlGn") _check_colors(self.N, ax.collections[0].get_facecolors(), exp_colors) # when using a cmap with specified lut -> limited number of different # colors ax = self.points.plot(cmap=plt.get_cmap("Set1", lut=5)) cmap = plt.get_cmap("Set1", lut=5) exp_colors = cmap(list(range(5)) * 3) _check_colors(self.N, ax.collections[0].get_facecolors(), exp_colors) def test_single_color(self): ax = self.points.plot(color="green") _check_colors(self.N, ax.collections[0].get_facecolors(), ["green"] * self.N) ax = self.df.plot(color="green") _check_colors(self.N, ax.collections[0].get_facecolors(), ["green"] * self.N) # check rgba tuple GH1178 ax = self.df.plot(color=(0.5, 0.5, 0.5)) _check_colors(self.N, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5)] * self.N) ax = self.df.plot(color=(0.5, 0.5, 0.5, 0.5)) _check_colors(self.N, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5, 0.5)] * self.N) with pytest.raises(TypeError): self.df.plot(color="not color") with warnings.catch_warnings(record=True) as _: # don't print warning # 'color' overrides 'column' ax = self.df.plot(column="values", color="green") _check_colors(self.N, ax.collections[0].get_facecolors(), ["green"] * self.N) def test_markersize(self): ax = self.points.plot(markersize=10) assert ax.collections[0].get_sizes() == [10] ax = self.df.plot(markersize=10) assert ax.collections[0].get_sizes() == [10] ax = self.df.plot(column="values", markersize=10) assert ax.collections[0].get_sizes() == [10] ax = self.df.plot(markersize="values") assert (ax.collections[0].get_sizes() == self.df["values"]).all() ax = self.df.plot(column="values", markersize="values") assert (ax.collections[0].get_sizes() == self.df["values"]).all() def test_style_kwargs(self): ax = self.points.plot(edgecolors="k") assert (ax.collections[0].get_edgecolor() == [0, 0, 0, 1]).all() def test_legend(self): with warnings.catch_warnings(record=True) as _: # don't print warning # legend ignored if color is given. ax = self.df.plot(column="values", color="green", legend=True) assert len(ax.get_figure().axes) == 1 # no separate legend axis # legend ignored if no column is given. ax = self.df.plot(legend=True) assert len(ax.get_figure().axes) == 1 # no separate legend axis # # Continuous legend # the colorbar matches the Point colors ax = self.df.plot(column="values", cmap="RdYlGn", legend=True) point_colors = ax.collections[0].get_facecolors() cbar_colors = ax.get_figure().axes[1].collections[0].get_facecolors() # first point == bottom of colorbar np.testing.assert_array_equal(point_colors[0], cbar_colors[0]) # last point == top of colorbar np.testing.assert_array_equal(point_colors[-1], cbar_colors[-1]) # # Categorical legend # the colorbar matches the Point colors ax = self.df.plot(column="values", categorical=True, legend=True) point_colors = ax.collections[0].get_facecolors() cbar_colors = ax.get_legend().axes.collections[0].get_facecolors() # first point == bottom of colorbar np.testing.assert_array_equal(point_colors[0], cbar_colors[0]) # last point == top of colorbar np.testing.assert_array_equal(point_colors[-1], cbar_colors[-1]) # # Normalized legend # the colorbar matches the Point colors norm = matplotlib.colors.LogNorm(vmin=self.df[1:].exp.min(), vmax=self.df[1:].exp.max()) ax = self.df[1:].plot(column="exp", cmap="RdYlGn", legend=True, norm=norm) point_colors = ax.collections[0].get_facecolors() cbar_colors = ax.get_figure().axes[1].collections[0].get_facecolors() # first point == bottom of colorbar np.testing.assert_array_equal(point_colors[0], cbar_colors[0]) # last point == top of colorbar np.testing.assert_array_equal(point_colors[-1], cbar_colors[-1]) # colorbar generated proper long transition assert cbar_colors.shape == (256, 4) def test_subplots_norm(self): # colors of subplots are the same as for plot (norm is applied) cmap = matplotlib.cm.viridis_r norm = matplotlib.colors.Normalize(vmin=0, vmax=20) ax = self.df.plot(column="values", cmap=cmap, norm=norm) actual_colors_orig = ax.collections[0].get_facecolors() exp_colors = cmap(np.arange(10) / (20)) np.testing.assert_array_equal(exp_colors, actual_colors_orig) fig, ax = plt.subplots() self.df[1:].plot(column="values", ax=ax, norm=norm, cmap=cmap) actual_colors_sub = ax.collections[0].get_facecolors() np.testing.assert_array_equal(actual_colors_orig[1], actual_colors_sub[0]) def test_empty_plot(self): s = GeoSeries([]) with pytest.warns(UserWarning): ax = s.plot() assert len(ax.collections) == 0 df = GeoDataFrame([]) with pytest.warns(UserWarning): ax = df.plot() assert len(ax.collections) == 0 def test_multipoints(self): # MultiPoints ax = self.df2.plot() _check_colors(4, ax.collections[0].get_facecolors(), [MPL_DFT_COLOR] * 4) ax = self.df2.plot(column="values") cmap = plt.get_cmap() expected_colors = [cmap(0)] * self.N + [cmap(1)] * self.N _check_colors(2, ax.collections[0].get_facecolors(), expected_colors) ax = self.df2.plot(color=["r", "b"]) # colors are repeated for all components within a MultiPolygon _check_colors(2, ax.collections[0].get_facecolors(), ["r"] * 10 + ["b"] * 10)
class TestPolygonPlotting: def setup_method(self): t1 = Polygon([(0, 0), (1, 0), (1, 1)]) t2 = Polygon([(1, 0), (2, 0), (2, 1)]) self.polys = GeoSeries([t1, t2], index=list("AB")) self.df = GeoDataFrame({"geometry": self.polys, "values": [0, 1]}) multipoly1 = MultiPolygon([t1, t2]) multipoly2 = rotate(multipoly1, 180) self.df2 = GeoDataFrame({ "geometry": [multipoly1, multipoly2], "values": [0, 1] }) t3 = Polygon([(2, 0), (3, 0), (3, 1)]) df_nan = GeoDataFrame({"geometry": t3, "values": [np.nan]}) self.df3 = self.df.append(df_nan) def test_single_color(self): ax = self.polys.plot(color="green") _check_colors(2, ax.collections[0].get_facecolors(), ["green"] * 2) # color only sets facecolor _check_colors(2, ax.collections[0].get_edgecolors(), ["k"] * 2) ax = self.df.plot(color="green") _check_colors(2, ax.collections[0].get_facecolors(), ["green"] * 2) _check_colors(2, ax.collections[0].get_edgecolors(), ["k"] * 2) # check rgba tuple GH1178 ax = self.df.plot(color=(0.5, 0.5, 0.5)) _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5)] * 2) ax = self.df.plot(color=(0.5, 0.5, 0.5, 0.5)) _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5, 0.5)] * 2) with pytest.raises(TypeError): self.df.plot(color="not color") with warnings.catch_warnings(record=True) as _: # don't print warning # 'color' overrides 'values' ax = self.df.plot(column="values", color="green") _check_colors(2, ax.collections[0].get_facecolors(), ["green"] * 2) def test_vmin_vmax(self): # when vmin == vmax, all polygons should be the same color # non-categorical ax = self.df.plot(column="values", categorical=False, vmin=0, vmax=0) actual_colors = ax.collections[0].get_facecolors() np.testing.assert_array_equal(actual_colors[0], actual_colors[1]) # categorical ax = self.df.plot(column="values", categorical=True, vmin=0, vmax=0) actual_colors = ax.collections[0].get_facecolors() np.testing.assert_array_equal(actual_colors[0], actual_colors[1]) # vmin vmax set correctly for array with NaN (GitHub issue 877) ax = self.df3.plot(column="values") actual_colors = ax.collections[0].get_facecolors() assert np.any(np.not_equal(actual_colors[0], actual_colors[1])) def test_style_kwargs(self): # facecolor overrides default cmap when color is not set ax = self.polys.plot(facecolor="k") _check_colors(2, ax.collections[0].get_facecolors(), ["k"] * 2) # facecolor overrides more general-purpose color when both are set ax = self.polys.plot(color="red", facecolor="k") # TODO with new implementation, color overrides facecolor # _check_colors(2, ax.collections[0], ['k']*2, alpha=0.5) # edgecolor ax = self.polys.plot(edgecolor="red") np.testing.assert_array_equal([(1, 0, 0, 1)], ax.collections[0].get_edgecolors()) ax = self.df.plot("values", edgecolor="red") np.testing.assert_array_equal([(1, 0, 0, 1)], ax.collections[0].get_edgecolors()) # alpha sets both edge and face ax = self.polys.plot(facecolor="g", edgecolor="r", alpha=0.4) _check_colors(2, ax.collections[0].get_facecolors(), ["g"] * 2, alpha=0.4) _check_colors(2, ax.collections[0].get_edgecolors(), ["r"] * 2, alpha=0.4) # check rgba tuple GH1178 for face and edge ax = self.df.plot(facecolor=(0.5, 0.5, 0.5), edgecolor=(0.4, 0.5, 0.6)) _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5)] * 2) _check_colors(2, ax.collections[0].get_edgecolors(), [(0.4, 0.5, 0.6)] * 2) ax = self.df.plot(facecolor=(0.5, 0.5, 0.5, 0.5), edgecolor=(0.4, 0.5, 0.6, 0.5)) _check_colors(2, ax.collections[0].get_facecolors(), [(0.5, 0.5, 0.5, 0.5)] * 2) _check_colors(2, ax.collections[0].get_edgecolors(), [(0.4, 0.5, 0.6, 0.5)] * 2) def test_legend_kwargs(self): ax = self.df.plot( column="values", categorical=True, legend=True, legend_kwds={"frameon": False}, ) assert ax.get_legend().get_frame_on() is False def test_colorbar_kwargs(self): # Test if kwargs are passed to colorbar label_txt = "colorbar test" ax = self.df.plot( column="values", categorical=False, legend=True, legend_kwds={"label": label_txt}, ) assert ax.get_figure().axes[1].get_ylabel() == label_txt ax = self.df.plot( column="values", categorical=False, legend=True, legend_kwds={ "label": label_txt, "orientation": "horizontal" }, ) assert ax.get_figure().axes[1].get_xlabel() == label_txt def test_multipolygons(self): # MultiPolygons ax = self.df2.plot() assert len(ax.collections[0].get_paths()) == 4 _check_colors(4, ax.collections[0].get_facecolors(), [MPL_DFT_COLOR] * 4) ax = self.df2.plot("values") cmap = plt.get_cmap(lut=2) # colors are repeated for all components within a MultiPolygon expected_colors = [cmap(0), cmap(0), cmap(1), cmap(1)] _check_colors(4, ax.collections[0].get_facecolors(), expected_colors) ax = self.df2.plot(color=["r", "b"]) # colors are repeated for all components within a MultiPolygon _check_colors(4, ax.collections[0].get_facecolors(), ["r", "r", "b", "b"]) def test_subplots_norm(self): # colors of subplots are the same as for plot (norm is applied) cmap = matplotlib.cm.viridis_r norm = matplotlib.colors.Normalize(vmin=0, vmax=10) ax = self.df.plot(column="values", cmap=cmap, norm=norm) actual_colors_orig = ax.collections[0].get_facecolors() exp_colors = cmap(np.arange(2) / (10)) np.testing.assert_array_equal(exp_colors, actual_colors_orig) fig, ax = plt.subplots() self.df[1:].plot(column="values", ax=ax, norm=norm, cmap=cmap) actual_colors_sub = ax.collections[0].get_facecolors() np.testing.assert_array_equal(actual_colors_orig[1], actual_colors_sub[0])
class TestLineStringPlotting: def setup_method(self): self.N = 10 values = np.arange(self.N) self.lines = GeoSeries( [ LineString([(0, i), (4, i + 0.5), (9, i)]) for i in range(self.N) ], index=list("ABCDEFGHIJ"), ) self.df = GeoDataFrame({"geometry": self.lines, "values": values}) multiline1 = MultiLineString(self.lines.loc["A":"B"].values) multiline2 = MultiLineString(self.lines.loc["C":"D"].values) self.df2 = GeoDataFrame({ "geometry": [multiline1, multiline2], "values": [0, 1] }) def test_single_color(self): ax = self.lines.plot(color="green") _check_colors(self.N, ax.collections[0].get_colors(), ["green"] * self.N) ax = self.df.plot(color="green") _check_colors(self.N, ax.collections[0].get_colors(), ["green"] * self.N) # check rgba tuple GH1178 ax = self.df.plot(color=(0.5, 0.5, 0.5, 0.5)) _check_colors(self.N, ax.collections[0].get_colors(), [(0.5, 0.5, 0.5, 0.5)] * self.N) ax = self.df.plot(color=(0.5, 0.5, 0.5, 0.5)) _check_colors(self.N, ax.collections[0].get_colors(), [(0.5, 0.5, 0.5, 0.5)] * self.N) with pytest.raises(TypeError): self.df.plot(color="not color") with warnings.catch_warnings(record=True) as _: # don't print warning # 'color' overrides 'column' ax = self.df.plot(column="values", color="green") _check_colors(self.N, ax.collections[0].get_colors(), ["green"] * self.N) def test_style_kwargs(self): # linestyle (style patterns depend on linewidth, therefore pin to 1) linestyle = "dashed" linewidth = 1 ax = self.lines.plot(linestyle=linestyle, linewidth=linewidth) exp_ls = _style_to_linestring_onoffseq(linestyle, linewidth) for ls in ax.collections[0].get_linestyles(): assert ls[0] == exp_ls[0] assert ls[1] == exp_ls[1] ax = self.df.plot(linestyle=linestyle, linewidth=linewidth) for ls in ax.collections[0].get_linestyles(): assert ls[0] == exp_ls[0] assert ls[1] == exp_ls[1] ax = self.df.plot(column="values", linestyle=linestyle, linewidth=linewidth) for ls in ax.collections[0].get_linestyles(): assert ls[0] == exp_ls[0] assert ls[1] == exp_ls[1] def test_subplots_norm(self): # colors of subplots are the same as for plot (norm is applied) cmap = matplotlib.cm.viridis_r norm = matplotlib.colors.Normalize(vmin=0, vmax=20) ax = self.df.plot(column="values", cmap=cmap, norm=norm) actual_colors_orig = ax.collections[0].get_edgecolors() exp_colors = cmap(np.arange(10) / (20)) np.testing.assert_array_equal(exp_colors, actual_colors_orig) fig, ax = plt.subplots() self.df[1:].plot(column="values", ax=ax, norm=norm, cmap=cmap) actual_colors_sub = ax.collections[0].get_edgecolors() np.testing.assert_array_equal(actual_colors_orig[1], actual_colors_sub[0]) def test_multilinestrings(self): # MultiLineStrings ax = self.df2.plot() assert len(ax.collections[0].get_paths()) == 4 _check_colors(4, ax.collections[0].get_facecolors(), [MPL_DFT_COLOR] * 4) ax = self.df2.plot("values") cmap = plt.get_cmap(lut=2) # colors are repeated for all components within a MultiLineString expected_colors = [cmap(0), cmap(0), cmap(1), cmap(1)] _check_colors(4, ax.collections[0].get_facecolors(), expected_colors) ax = self.df2.plot(color=["r", "b"]) # colors are repeated for all components within a MultiLineString _check_colors(4, ax.collections[0].get_facecolors(), ["r", "r", "b", "b"])
def setup_method(self): self.N = 10 self.points = GeoSeries(Point(i, i, i) for i in range(self.N)) values = np.arange(self.N) self.df = GeoDataFrame({"geometry": self.points, "values": values})
def _read_file(filename, bbox=None, mask=None, rows=None, **kwargs): """ Returns a GeoDataFrame from a file or URL. .. versionadded:: 0.7.0 mask, rows Parameters ---------- filename : str, path object or file-like object Either the absolute or relative path to the file or URL to be opened, or any object with a read() method (such as an open file or StringIO) bbox : tuple | GeoDataFrame or GeoSeries | shapely Geometry, default None Filter features by given bounding box, GeoSeries, GeoDataFrame or a shapely geometry. CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame. Cannot be used with mask. mask : dict | GeoDataFrame or GeoSeries | shapely Geometry, default None Filter for features that intersect with the given dict-like geojson geometry, GeoSeries, GeoDataFrame or shapely geometry. CRS mis-matches are resolved if given a GeoSeries or GeoDataFrame. Cannot be used with bbox. rows : int or slice, default None Load in specific rows by passing an integer (first `n` rows) or a slice() object. **kwargs : Keyword args to be passed to the `open` or `BytesCollection` method in the fiona library when opening the file. For more information on possible keywords, type: ``import fiona; help(fiona.open)`` Examples -------- >>> df = geopandas.read_file("nybb.shp") # doctest: +SKIP Specifying layer of GPKG: >>> df = geopandas.read_file("file.gpkg", layer='cities') # doctest: +SKIP Reading only first 10 rows: >>> df = geopandas.read_file("nybb.shp", rows=10) # doctest: +SKIP Reading only geometries intersecting ``mask``: >>> df = geopandas.read_file("nybb.shp", mask=polygon) # doctest: +SKIP Reading only geometries intersecting ``bbox``: >>> df = geopandas.read_file("nybb.shp", bbox=(0, 10, 0, 20)) # doctest: +SKIP Returns ------- :obj:`geopandas.GeoDataFrame` or :obj:`pandas.DataFrame` : If `ignore_geometry=True` a :obj:`pandas.DataFrame` will be returned. Notes ----- The format drivers will attempt to detect the encoding of your data, but may fail. In this case, the proper encoding can be specified explicitly by using the encoding keyword parameter, e.g. ``encoding='utf-8'``. """ _check_fiona("'read_file' function") if _is_url(filename): req = _urlopen(filename) path_or_bytes = req.read() reader = fiona.BytesCollection elif pd.api.types.is_file_like(filename): data = filename.read() path_or_bytes = data.encode("utf-8") if isinstance(data, str) else data reader = fiona.BytesCollection else: # Opening a file via URL or file-like-object above automatically detects a # zipped file. In order to match that behavior, attempt to add a zip scheme # if missing. if _is_zip(str(filename)): parsed = fiona.parse_path(str(filename)) if isinstance(parsed, fiona.path.ParsedPath): # If fiona is able to parse the path, we can safely look at the scheme # and update it to have a zip scheme if necessary. schemes = (parsed.scheme or "").split("+") if "zip" not in schemes: parsed.scheme = "+".join(["zip"] + schemes) filename = parsed.name elif isinstance(parsed, fiona.path.UnparsedPath ) and not str(filename).startswith("/vsi"): # If fiona is unable to parse the path, it might have a Windows drive # scheme. Try adding zip:// to the front. If the path starts with "/vsi" # it is a legacy GDAL path type, so let it pass unmodified. filename = "zip://" + parsed.name path_or_bytes = filename reader = fiona.open with fiona_env(): with reader(path_or_bytes, **kwargs) as features: # In a future Fiona release the crs attribute of features will # no longer be a dict, but will behave like a dict. So this should # be forwards compatible crs = (features.crs["init"] if features.crs and "init" in features.crs else features.crs_wkt) # handle loading the bounding box if bbox is not None: if isinstance(bbox, (GeoDataFrame, GeoSeries)): bbox = tuple(bbox.to_crs(crs).total_bounds) elif isinstance(bbox, BaseGeometry): bbox = bbox.bounds assert len(bbox) == 4 # handle loading the mask elif isinstance(mask, (GeoDataFrame, GeoSeries)): mask = mapping(mask.to_crs(crs).unary_union) elif isinstance(mask, BaseGeometry): mask = mapping(mask) # setup the data loading filter if rows is not None: if isinstance(rows, int): rows = slice(rows) elif not isinstance(rows, slice): raise TypeError("'rows' must be an integer or a slice.") f_filt = features.filter(rows.start, rows.stop, rows.step, bbox=bbox, mask=mask) elif any((bbox, mask)): f_filt = features.filter(bbox=bbox, mask=mask) else: f_filt = features # get list of columns columns = list(features.schema["properties"]) if kwargs.get("ignore_geometry", False): return pd.DataFrame( [record["properties"] for record in f_filt], columns=columns) return GeoDataFrame.from_features(f_filt, crs=crs, columns=columns + ["geometry"])
class TestDataFrame(unittest.TestCase): def setUp(self): N = 10 nybb_filename = download_nybb() self.df = read_file('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) with fiona.open('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) as f: self.schema = f.schema self.tempdir = tempfile.mkdtemp() self.boros = self.df['BoroName'] self.crs = {'init': 'epsg:4326'} self.df2 = GeoDataFrame([ {'geometry': Point(x, y), 'value1': x + y, 'value2': x * y} for x, y in zip(range(N), range(N))], crs=self.crs) self.df3 = read_file('examples/null_geom.geojson') self.line_paths = self.df3['Name'] def tearDown(self): shutil.rmtree(self.tempdir) def test_df_init(self): self.assertTrue(type(self.df2) is GeoDataFrame) self.assertTrue(self.df2.crs == self.crs) def test_different_geo_colname(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') locs = GeoSeries(data['location'], crs=self.crs) assert_geoseries_equal(df.geometry, locs) self.assert_('geometry' not in df) self.assertEqual(df.geometry.name, 'location') # internal implementation detail self.assertEqual(df._geometry_column_name, 'location') geom2 = [Point(x, y) for x, y in zip(range(5, 10), range(5))] df2 = df.set_geometry(geom2, crs='dummy_crs') self.assert_('geometry' in df2) self.assert_('location' in df2) self.assertEqual(df2.crs, 'dummy_crs') self.assertEqual(df2.geometry.crs, 'dummy_crs') # reset so it outputs okay df2.crs = df.crs assert_geoseries_equal(df2.geometry, GeoSeries(geom2, crs=df2.crs)) # for right now, non-geometry comes back as series assert_geoseries_equal(df2['location'], df['location'], check_series_type=False, check_dtype=False) def test_geo_getitem(self): data = {"A": range(5), "B": range(-5, 0), "location": [Point(x, y) for x, y in zip(range(5), range(5))]} df = GeoDataFrame(data, crs=self.crs, geometry='location') self.assert_(isinstance(df.geometry, GeoSeries)) df['geometry'] = df["A"] self.assert_(isinstance(df.geometry, GeoSeries)) self.assertEqual(df.geometry[0], data['location'][0]) # good if this changed in the future self.assert_(not isinstance(df['geometry'], GeoSeries)) self.assert_(isinstance(df['location'], GeoSeries)) data["geometry"] = [Point(x + 1, y - 1) for x, y in zip(range(5), range(5))] df = GeoDataFrame(data, crs=self.crs) self.assert_(isinstance(df.geometry, GeoSeries)) self.assert_(isinstance(df['geometry'], GeoSeries)) # good if this changed in the future self.assert_(not isinstance(df['location'], GeoSeries)) def test_geometry_property(self): assert_geoseries_equal(self.df.geometry, self.df['geometry'], check_dtype=True, check_index_type=True) df = self.df.copy() new_geom = [Point(x, y) for x, y in zip(range(len(self.df)), range(len(self.df)))] df.geometry = new_geom new_geom = GeoSeries(new_geom, index=df.index, crs=df.crs) assert_geoseries_equal(df.geometry, new_geom) assert_geoseries_equal(df['geometry'], new_geom) # new crs gs = GeoSeries(new_geom, crs="epsg:26018") df.geometry = gs self.assertEqual(df.crs, "epsg:26018") def test_geometry_property_errors(self): with self.assertRaises(AttributeError): df = self.df.copy() del df['geometry'] df.geometry # list-like error with self.assertRaises(ValueError): df = self.df2.copy() df.geometry = 'value1' # list-like error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = 'apple' # non-geometry error with self.assertRaises(TypeError): df = self.df.copy() df.geometry = list(range(df.shape[0])) with self.assertRaises(KeyError): df = self.df.copy() del df['geometry'] df['geometry'] # ndim error with self.assertRaises(ValueError): df = self.df.copy() df.geometry = df def test_set_geometry(self): geom = GeoSeries([Point(x, y) for x, y in zip(range(5), range(5))]) original_geom = self.df.geometry df2 = self.df.set_geometry(geom) self.assert_(self.df is not df2) assert_geoseries_equal(df2.geometry, geom) assert_geoseries_equal(self.df.geometry, original_geom) assert_geoseries_equal(self.df['geometry'], self.df.geometry) # unknown column with self.assertRaises(ValueError): self.df.set_geometry('nonexistent-column') # ndim error with self.assertRaises(ValueError): self.df.set_geometry(self.df) # new crs - setting should default to GeoSeries' crs gs = GeoSeries(geom, crs="epsg:26018") new_df = self.df.set_geometry(gs) self.assertEqual(new_df.crs, "epsg:26018") # explicit crs overrides self and dataframe new_df = self.df.set_geometry(gs, crs="epsg:27159") self.assertEqual(new_df.crs, "epsg:27159") self.assertEqual(new_df.geometry.crs, "epsg:27159") # Series should use dataframe's new_df = self.df.set_geometry(geom.values) self.assertEqual(new_df.crs, self.df.crs) self.assertEqual(new_df.geometry.crs, self.df.crs) def test_set_geometry_col(self): g = self.df.geometry g_simplified = g.simplify(100) self.df['simplified_geometry'] = g_simplified df2 = self.df.set_geometry('simplified_geometry') # Drop is false by default self.assert_('simplified_geometry' in df2) assert_geoseries_equal(df2.geometry, g_simplified) # If True, drops column and renames to geometry df3 = self.df.set_geometry('simplified_geometry', drop=True) self.assert_('simplified_geometry' not in df3) assert_geoseries_equal(df3.geometry, g_simplified) def test_set_geometry_inplace(self): geom = [Point(x, y) for x, y in zip(range(5), range(5))] ret = self.df.set_geometry(geom, inplace=True) self.assert_(ret is None) geom = GeoSeries(geom, index=self.df.index, crs=self.df.crs) assert_geoseries_equal(self.df.geometry, geom) def test_set_geometry_series(self): # Test when setting geometry with a Series that # alignment will occur # # Reverse the index order # Set the Series to be Point(i,i) where i is the index self.df.index = range(len(self.df)-1, -1, -1) d = {} for i in range(len(self.df)): d[i] = Point(i, i) g = GeoSeries(d) # At this point, the DataFrame index is [4,3,2,1,0] and the # GeoSeries index is [0,1,2,3,4]. Make sure set_geometry aligns # them to match indexes df = self.df.set_geometry(g) for i, r in df.iterrows(): self.assertAlmostEqual(i, r['geometry'].x) self.assertAlmostEqual(i, r['geometry'].y) def test_to_json(self): text = self.df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_geom_col(self): df = self.df.copy() df['geom'] = df['geometry'] df['geometry'] = np.arange(len(df)) df.set_geometry('geom', inplace=True) text = df.to_json() data = json.loads(text) self.assertTrue(data['type'] == 'FeatureCollection') self.assertTrue(len(data['features']) == 5) def test_to_json_na(self): # Set a value as nan and make sure it's written self.df.loc[self.df['BoroName']=='Queens', 'Shape_Area'] = np.nan text = self.df.to_json() data = json.loads(text) self.assertTrue(len(data['features']) == 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(props['Shape_Area'] is None) def test_to_json_bad_na(self): # Check that a bad na argument raises error with self.assertRaises(ValueError): text = self.df.to_json(na='garbage') def test_to_json_dropna(self): self.df.loc[self.df['BoroName']=='Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName']=='Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='drop') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] if props['BoroName'] == 'Queens': self.assertEqual(len(props), 3) self.assertTrue('Shape_Area' not in props) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertEqual(len(props), 3) self.assertTrue('Shape_Leng' not in props) self.assertTrue('Shape_Area' in props) else: self.assertEqual(len(props), 4) def test_to_json_keepna(self): self.df.loc[self.df['BoroName']=='Queens', 'Shape_Area'] = np.nan self.df.loc[self.df['BoroName']=='Bronx', 'Shape_Leng'] = np.nan text = self.df.to_json(na='keep') data = json.loads(text) self.assertEqual(len(data['features']), 5) for f in data['features']: props = f['properties'] self.assertEqual(len(props), 4) if props['BoroName'] == 'Queens': self.assertTrue(np.isnan(props['Shape_Area'])) # Just make sure setting it to nan in a different row # doesn't affect this one self.assertTrue('Shape_Leng' in props) elif props['BoroName'] == 'Bronx': self.assertTrue(np.isnan(props['Shape_Leng'])) self.assertTrue('Shape_Area' in props) def test_copy(self): df2 = self.df.copy() self.assertTrue(type(df2) is GeoDataFrame) self.assertEqual(self.df.crs, df2.crs) def test_to_file(self): """ Test to_file and from_file """ tempfilename = os.path.join(self.tempdir, 'boros.shp') self.df.to_file(tempfilename) # Read layer back in df = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df) self.assertTrue(len(df) == 5) self.assertTrue(np.alltrue(df['BoroName'].values == self.boros)) # Write layer with null geometry out to file tempfilename = os.path.join(self.tempdir, 'null_geom.shp') self.df3.to_file(tempfilename) # Read layer back in df3 = GeoDataFrame.from_file(tempfilename) self.assertTrue('geometry' in df3) self.assertTrue(len(df3) == 2) self.assertTrue(np.alltrue(df3['Name'].values == self.line_paths)) def test_to_file_types(self): """ Test various integer type columns (GH#93) """ tempfilename = os.path.join(self.tempdir, 'int.shp') int_types = [np.int, np.int8, np.int16, np.int32, np.int64, np.intp, np.uint8, np.uint16, np.uint32, np.uint64, np.long] geometry = self.df2.geometry data = dict((str(i), np.arange(len(geometry), dtype=dtype)) for i, dtype in enumerate(int_types)) df = GeoDataFrame(data, geometry=geometry) df.to_file(tempfilename) def test_mixed_types_to_file(self): """ Test that mixed geometry types raise error when writing to file """ tempfilename = os.path.join(self.tempdir, 'test.shp') s = GeoDataFrame({'geometry': [Point(0, 0), Polygon([(0, 0), (1, 0), (1, 1)])]}) with self.assertRaises(ValueError): s.to_file(tempfilename) def test_to_file_schema(self): """ Ensure that the file is written according to the schema if it is specified """ try: from collections import OrderedDict except ImportError: from ordereddict import OrderedDict tempfilename = os.path.join(self.tempdir, 'test.shp') properties = OrderedDict([ ('Shape_Leng', 'float:19.11'), ('BoroName', 'str:40'), ('BoroCode', 'int:10'), ('Shape_Area', 'float:19.11'), ]) schema = {'geometry': 'Polygon', 'properties': properties} # Take the first 2 features to speed things up a bit self.df.iloc[:2].to_file(tempfilename, schema=schema) with fiona.open(tempfilename) as f: result_schema = f.schema self.assertEqual(result_schema, schema) def test_bool_index(self): # Find boros with 'B' in their name df = self.df[self.df['BoroName'].str.contains('B')] self.assertTrue(len(df) == 2) boros = df['BoroName'].values self.assertTrue('Brooklyn' in boros) self.assertTrue('Bronx' in boros) self.assertTrue(type(df) is GeoDataFrame) def test_transform(self): df2 = self.df2.copy() df2.crs = {'init': 'epsg:26918', 'no_defs': True} lonlat = df2.to_crs(epsg=4326) utm = lonlat.to_crs(epsg=26918) self.assertTrue(all(df2['geometry'].geom_almost_equals(utm['geometry'], decimal=2))) def test_from_features(self): nybb_filename = download_nybb() with fiona.open('/nybb_14a_av/nybb.shp', vfs='zip://' + nybb_filename) as f: features = list(f) crs = f.crs df = GeoDataFrame.from_features(features, crs=crs) df.rename(columns=lambda x: x.lower(), inplace=True) validate_boro_df(self, df) self.assert_(df.crs == crs) def test_from_features_unaligned_properties(self): p1 = Point(1, 1) f1 = {'type': 'Feature', 'properties': {'a': 0}, 'geometry': p1.__geo_interface__} p2 = Point(2, 2) f2 = {'type': 'Feature', 'properties': {'b': 1}, 'geometry': p2.__geo_interface__} p3 = Point(3, 3) f3 = {'type': 'Feature', 'properties': {'a': 2}, 'geometry': p3.__geo_interface__} df = GeoDataFrame.from_features([f1, f2, f3]) result = df[['a', 'b']] expected = pd.DataFrame.from_dict([{'a': 0, 'b': np.nan}, {'a': np.nan, 'b': 1}, {'a': 2, 'b': np.nan}]) assert_frame_equal(expected, result) def test_from_postgis_default(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = "SELECT * FROM nybb;" df = GeoDataFrame.from_postgis(sql, con) finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_from_postgis_custom_geom_col(self): con = connect('test_geopandas') if con is None or not create_db(self.df): raise unittest.case.SkipTest() try: sql = """SELECT borocode, boroname, shape_leng, shape_area, geom AS __geometry__ FROM nybb;""" df = GeoDataFrame.from_postgis(sql, con, geom_col='__geometry__') finally: if PANDAS_NEW_SQL_API: # It's not really a connection, it's an engine con = con.connect() con.close() validate_boro_df(self, df) def test_dataframe_to_geodataframe(self): df = pd.DataFrame({"A": range(len(self.df)), "location": list(self.df.geometry)}, index=self.df.index) gf = df.set_geometry('location', crs=self.df.crs) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf, GeoDataFrame) assert_geoseries_equal(gf.geometry, self.df.geometry) self.assertEqual(gf.geometry.name, 'location') self.assert_('geometry' not in gf) gf2 = df.set_geometry('location', crs=self.df.crs, drop=True) self.assertIsInstance(df, pd.DataFrame) self.assertIsInstance(gf2, GeoDataFrame) self.assertEqual(gf2.geometry.name, 'geometry') self.assert_('geometry' in gf2) self.assert_('location' not in gf2) self.assert_('location' in df) # should be a copy df.ix[0, "A"] = 100 self.assertEqual(gf.ix[0, "A"], 0) self.assertEqual(gf2.ix[0, "A"], 0) with self.assertRaises(ValueError): df.set_geometry('location', inplace=True) def test_geodataframe_geointerface(self): self.assertEqual(self.df.__geo_interface__['type'], 'FeatureCollection') self.assertEqual(len(self.df.__geo_interface__['features']), self.df.shape[0]) def test_geodataframe_geojson_no_bbox(self): geo = self.df._to_geo(na="null", show_bbox=False) self.assertFalse('bbox' in geo.keys()) for feature in geo['features']: self.assertFalse('bbox' in feature.keys()) def test_geodataframe_geojson_bbox(self): geo = self.df._to_geo(na="null", show_bbox=True) self.assertTrue('bbox' in geo.keys()) self.assertEqual(len(geo['bbox']), 4) self.assertTrue(isinstance(geo['bbox'], tuple)) for feature in geo['features']: self.assertTrue('bbox' in feature.keys()) def test_pickle(self): filename = os.path.join(self.tempdir, 'df.pkl') self.df.to_pickle(filename) unpickled = pd.read_pickle(filename) assert_frame_equal(self.df, unpickled) self.assertEqual(self.df.crs, unpickled.crs)
def from_shp(cls, shp_path): assert shp_path.endswith(".shp"), "Not a valid shapefile." try: return cls(GeoDataFrame.from_file(shp_path)) except Exception as err: print(err)
def query(self, **kwargs): """ A query function to extract data out of MapServer layers. I've exposed every option here Parameters ========== where: str, required sql query string. out_fields: list or str, (default: '*') fields to pass from the header out return_geometry: bool, (default: True) bool describing whether to return geometry or just the dataframe geometry_precision: str, (default: None) a number of significant digits to which the output of the query should be truncated out_sr: int or str, (default: None) ESRI WKID spatial reference into which to reproject the geodata return_ids_only: bool, (default: False) bool stating to only return ObjectIDs return_z: bool, (default: False) whether to return z components of shp-z return_m: bool, (default: False) whether to return m components of shp-m pkg : str (default: 'pysal') what geometry type to provide in the results of the query. Uses PySAL shapes by default. Supports "shapely," which constructs shapely shapes instead of pysal shape, and "geopandas," which packs shapely shapes into a GeoPandas dataframe. strict : bool (default: True) whether to throw an error if invalid polygons are provided from the API (True) or just warn that at least one polygon is invalid (False) Returns ======= Dataframe or GeoDataFrame containing entries from the geodatabase Notes ===== Most of the time, this should be used leaning on the SQL "where" argument: cxn.query(where='GEOID LIKE "06*"') In most cases, you'll be querying against layers, not MapServices overall. """ #parse args pkg = kwargs.pop('pkg', 'pysal') gpize = kwargs.pop('gpize', False) strict = kwargs.pop('strict', False) if pkg.lower() == 'geopandas': pkg = 'shapely' gpize = True kwargs = {''.join(k.split('_')): v for k, v in diter(kwargs)} #construct query string self._basequery = copy.deepcopy(_basequery) for k, v in diter(kwargs): try: self._basequery[k] = v except KeyError: raise KeyError("Option '{k}' not recognized, check parameters") qstring = '&'.join( ['{}={}'.format(k, v) for k, v in diter(self._basequery)]) self._last_query = self._baseurl + '/query?' + qstring #run query resp = r.get(self._last_query + '&f=json') resp.raise_for_status() datadict = resp.json() #convert to output format try: features = datadict['features'] except KeyError: code, msg = datadict['error']['code'], datadict['error']['message'] details = datadict['error']['details'] if details is []: details = 'Mapserver provided no detailed error' raise KeyError(('Response from API is malformed. You may have ' 'submitted too many queries, or experienced ' 'significant network connectivity issues.\n' '(API ERROR {}:{}({}))'.format(code, msg, details))) todf = [] for i, feature in enumerate(features): locfeat = gpsr.__dict__[datadict['geometryType']](feature) todf.append(locfeat['properties']) todf[i].update({'geometry': locfeat['geometry']}) df = pd.DataFrame(todf) outdf = gpsr.convert_geometries(df, pkg, strict=strict) if gpize: try: from geopandas import GeoDataFrame outdf = GeoDataFrame(outdf) except: print('Geopandas dataframe conversion failed! Continuing...') outdf.crs = datadict.pop('spatialReference', {}) return outdf
import geopandas from geopandas import GeoDataFrame, GeoSeries import matplotlib.pyplot as plt from matplotlib.colors import Normalize import matplotlib.cm as cm import seaborn as sns from shapely.geometry import Point, Polygon import numpy as np import googlemaps from datetime import datetime plt.rcParams["figure.figsize"] = [8, 6] import pickle # Get the shape-file for NYC boros = GeoDataFrame.from_file( './Borough Boundaries/geo_export_e66b8353-e3f3-49c0-9a5e-4622cdc09c91.shp') boros = boros.set_index('boro_code') boros = boros.sort_index() # Plot and color by borough boros.plot(column='boro_name') plt.show() plt.gca().set_xlim([-74.05, -73.7]) plt.gca().set_ylim([40.57, 40.91]) # make a grid of latitude-longitude values xmin, xmax, ymin, ymax = -74.05, -73.85, 40.65, 40.9 xmin_nyc, xmax_nyc, ymin_nyc, ymax_nyc = -74.05, -73.7, 40.57, 40.91 xx, yy = np.meshgrid(np.linspace(xmin_nyc, xmax_nyc, 140), np.linspace(ymin_nyc, ymax_nyc, 140))
def compute_experiment(n_components, spatial_scale_geometry, temporal_filter, output_name, threshold=1e-02): latitude_field_name = 'LATITUD' longitude_field_name = 'LONGITUD' date_field_name = 'FECHA' min_date, max_date = temporal_filter figure_name = output_name + '_IC_' + str(n_components) df = pd.read_csv( '/home/jrudascas/Downloads/Copia de 06. verify_enrich_nuse_11022020.csv' ) boros = GeoDataFrame.from_file(spatial_scale_geometry) df['DATE'] = pd.to_datetime(df[date_field_name]).dt.strftime('%Y-%m-%d') df = df[(df['DATE'] >= min_date) & (df['DATE'] <= max_date)] print(df.shape) gdf = GeoDataFrame( df.drop([latitude_field_name, longitude_field_name], axis=1), geometry=[ Point(xy) for xy in zip(df[longitude_field_name], df[latitude_field_name]) ]) #gdf['DATE'] = pd.to_datetime(gdf[date_field_name]).dt.strftime('%Y%m%d') gdf = gdf[:1000] in_map_by_geometry = np.array( [gdf.geometry.within(geom) for geom in boros.geometry]) # print(in_map_by_geometry.shape) raw = [] date_sequence = [ d.strftime('%Y-%m-%d') for d in pd.date_range(min_date, max_date, freq='D') ] for pos, val in enumerate(boros.geometry): gdf_by_geometry = gdf[in_map_by_geometry[pos]] gdf_by_geometry_grouped = pd.DataFrame( gdf_by_geometry.groupby(['DATE']).size(), columns=["EVENTS"]).sort_index() values_fitted = [] for i, value in enumerate(date_sequence): values_fitted.append( 0 if value not in gdf_by_geometry_grouped.index. values else gdf_by_geometry_grouped.loc[value]['EVENTS']) raw.append(values_fitted) np_raw = np.array(raw) np_raw[np.isnan(np_raw)] = 0 np_raw[np.isinf(np_raw)] = 0 np_raw[np.isneginf(np_raw)] = 0 df_additional = pd.read_csv( '/home/jrudascas/Downloads/arboles_localidad.csv') np_raw = np.delete(np_raw, (8), axis=0) #It is removed Sumapaz locality tensor = np.zeros((np_raw.shape[0], np_raw.shape[1], 19, 19)) print(tensor.shape) tensor[:, :, 0, 0] = np_raw tensor[0, 0, :, 0] = df_additional['ARBOLES'] tensor[0, 0, 0, :] = df_additional['LUMINARIA'] W, H = parafac(tensor, rank=n_components, init='random', tol=10e-6) return W, H
location=[viasat.latitude.iloc[i], viasat.longitude.iloc[i]], popup=viasat.ID.iloc[i], radius=6, color="black", fill=True, fill_color="black", fill_opacity=1).add_to(my_map) my_map.save("matched_route.html") ###################################################### # build a geodataframe with VIASAT data geometry = [Point(xy) for xy in zip(viasat.longitude, viasat.latitude)] # viasat = viasat.drop(['longitude', 'latitude'], axis=1) crs = {'init': 'epsg:4326'} viasat_gdf = GeoDataFrame(viasat, crs=crs, geometry=geometry) # viasat_gdf.plot() # Buffer the points by some units (unit is kilometer) buffer = viasat_gdf.buffer(0.00025) #50 meters # this is a geoseries # buffer.plot() # make a dataframe buffer_viasat = pd.DataFrame(buffer) buffer_viasat.columns = ['geometry'] type(buffer_viasat) # transform a geoseries into a geodataframe # https://gis.stackexchange.com/questions/266098/how-to-convert-a-geoserie-to-a-geodataframe-with-geopandas ## circumscript the area of the track (buffer zone) # union = buffer.unary_union # envelope = union.envelope
'Station ID', 'GTFS Stop ID', 'Stop Name', 'Borough', 'GTFS Latitude', 'GTFS Longitude' ]) df_stations.columns = [ 'STATION_ID', 'STOP_ID', 'STOP_NAME', 'BOROUGH', 'LATITUDE', 'LONGITUDE' ] # convert to geodataframe geometry = [ Point(xy) for xy in zip(df_stations.LATITUDE, df_stations.LONGITUDE) ] df_stations = df_stations.drop(['LATITUDE', 'LONGITUDE'], axis=1) crs = {'init': 'epsg:4326'} geodf_stations = GeoDataFrame(df_stations, crs=crs, geometry=geometry) geodf_stations.info() geodf_stations.head() # In[8]: #add a new geometry to geodf_stations of a circle of X miles around each station #new design uses polygons that will be loaded from a shape file so drawing buffer circles around the stations will not be required #X = 0.01 #geodf_stations['CIRCLE'] = geodf_stations.geometry.buffer(X) #geodf_stations.geometry.name #geodf_stations = geodf_stations.rename(columns={'geometry':'POINT'}).set_geometry('CIRCLE') #geodf_stations.geometry.name #geodf_stations.info() #geodf_stations.head()
def _map_images( plot_file_format: List[str], result_df: GeoDataFrame, filepaths: List[Union[str, Path]], bands: List[int] = [1, 2, 3], aoi: GeoDataFrame = None, show_images=True, show_features=False, name_column: str = "id", save_html: Path = None, ) -> folium.Map: """ Displays data.json, and if available, one or multiple results geotiffs. Args: plot_file_format: List of accepted image file formats e.g. [".png"] result_df: GeoDataFrame with scene geometries. aoi: GeoDataFrame of aoi. filepaths: Paths to images to plot. Optional, by default picks up the last downloaded results. show_images: Shows images if True (default). show_features: Show features if True. For quicklooks maps is set to False. name_column: Name of the feature property that provides the Feature/Layer name. save_html: The path for saving folium map as html file. With default None, no file is saved. """ if result_df.shape[0] > 100: result_df = result_df.iloc[:100] logger.info( "Only the first 100 results will be displayed to avoid memory " "issues.") centroid = box(*result_df.total_bounds).centroid m = folium_base_map( lat=centroid.y, lon=centroid.x, ) df_bounds = result_df.bounds list_bounds = df_bounds.values.tolist() raster_filepaths = [ path for path in filepaths if Path(path).suffix in plot_file_format ] try: feature_names = result_df[name_column].to_list() except KeyError: feature_names = [""] * len(result_df.index) if aoi is not None: aoi_style = VECTOR_STYLE.copy() aoi_style["color"] = "red" folium.GeoJson( aoi, name="aoi", style_function=lambda x: aoi_style, highlight_function=lambda x: HIGHLIGHT_STYLE, ).add_to(m) if show_features: for idx, row in result_df.iterrows(): # type: ignore try: feature_name = row.loc[name_column] except KeyError: feature_name = "" layer_name = f"Feature {idx + 1} - {feature_name}" f = folium.GeoJson( row["geometry"], name=layer_name, style_function=lambda x: VECTOR_STYLE, highlight_function=lambda x: HIGHLIGHT_STYLE, ) folium.Popup( f"{layer_name}: {row.drop('geometry', axis=0).to_json()}" ).add_to(f) f.add_to(m) if show_images and raster_filepaths: if len(bands) != 3: if len(bands) == 1: bands = bands * 3 # plot as grayband else: raise ValueError( "Parameter bands can only contain one or three bands.") for idx, (raster_fp, feature_name) in enumerate( zip(raster_filepaths, feature_names)): with rasterio.open(raster_fp) as src: if src.meta["crs"] is None: dst_array = src.read(bands) minx, miny, maxx, maxy = list_bounds[idx] else: # Folium requires 4326, streaming blocks are 3857 with WarpedVRT(src, crs="EPSG:4326") as vrt: dst_array = vrt.read(bands) minx, miny, maxx, maxy = vrt.bounds m.add_child( folium.raster_layers.ImageOverlay( np.moveaxis(np.stack(dst_array), 0, 2), bounds=[[miny, minx], [maxy, maxx]], # different order. name=f"Image {idx + 1} - {feature_name}", )) # Collapse layer control with too many features. collapsed = bool(result_df.shape[0] > 4) folium.LayerControl(position="bottomleft", collapsed=collapsed).add_to(m) if save_html: save_html = Path(save_html) if not save_html.exists(): save_html.mkdir(parents=True, exist_ok=True) filepath = save_html / "final_map.html" with filepath.open("w") as f: f.write(m._repr_html_()) return m
def export_geo_data_file(file_path: str, activity_dataframe: pandas.DataFrame): """ Export a GeoJSON-encoded file of geospatial data from all activities. The exported file contains the name, ID, type, distance, total elevation gain, and a LineString representing the trace of each activity. Arguments: file_path - The path of the file to export the geospatial activity data to. activity_dataframe - A pandas DataFrame containing the activity data. """ print('Geo: Processing geospatial data') # Convert the activity polylines into coordinates activity_dataframe.loc[:, 'map_coordinates'] = ( activity_dataframe.loc[:, 'map'].apply(_decode_polyline)) # Create a new DataFrame containing only activities with geospatial # data activity_map_dataframe = (activity_dataframe.loc[ activity_dataframe.map_coordinates.isnull() == False, :].copy()) # Convert the coordinates into Shapely points activity_map_dataframe.loc[:, 'map_points'] = ( activity_map_dataframe.loc[:, 'map_coordinates'].apply( _create_shapely_point)) # Convert the Shapely points into LineStrings activity_map_dataframe.loc[:, 'map_linestring'] = ( activity_map_dataframe.loc[:, 'map_points'].apply(LineString)) # Convert the activity distances from m to km activity_map_dataframe.loc[:, 'distance'] = activity_map_dataframe.loc[:, 'distance'] / 1000 # Create a pandas GeoDataFrame from the activities map DataFrame and # format the column names activity_map_geodataframe = GeoDataFrame(activity_map_dataframe[[ 'name', 'id', 'type', 'start_date_local_formatted', 'distance', 'moving_time_formatted', 'total_elevation_gain', 'map_linestring' ]], geometry='map_linestring') activity_map_geodataframe.rename(columns={ 'name': 'Name', 'id': 'ID', 'type': 'Type', 'start_date_local_formatted': 'Start date', 'distance': 'Distance (km)', 'moving_time_formatted': 'Moving time', 'total_elevation_gain': 'Total elevation gain (m)' }, inplace=True) # Export the GeoDataFrame to a file in GeoJSON format print('Geo: Exporting geospatial data to {}'.format(file_path)) activity_map_geodataframe.to_file(file_path, driver='GeoJSON', encoding='utf8')
def test_astype(self): arr = from_shapely(self.geoms, crs=27700) df = GeoDataFrame({"col1": [0, 1]}, geometry=arr) df2 = df.astype({"col1": str}) assert df2.crs == self.osgb
inv = pd.read_excel(choice + '/islims_inventory.xlsx') wo = pd.read_excel(choice + '/islims_workorders.xlsx') wo = wo.rename(columns={'woID': 'WoID'}) NCR = pd.read_excel(choice + '/NCR.xlsx') isf_wo = pd.merge(isf, wo, how='left', on='WoID') isf_wo = isf_wo.drop(['srchAssetID', 'gpscoordinateX', 'gpscoordinateY', 'initialproblemID', \ 'resolveddatetime', 'entereddate', 'finalresolutionID'], axis = 1) isf_wo_inv = pd.merge(isf_wo, inv, how='left', on='inventoryID') isf_wo_inv = isf_wo_inv.drop(['gpscoordinateX', 'gpscoordinateY'], axis=1) #%% geo join # Setting up data into geopandas geometry = [Point(xy) for xy in zip(isf_wo_inv['gpsX'], isf_wo_inv['gpsY'])] gLights = GeoDataFrame(isf_wo_inv, geometry=geometry) gLights = gLights.drop_duplicates(subset=['WoID']) geometry = [Point(xy) for xy in zip(NCR['gpsX'], NCR['gpsY'])] gNCR = GeoDataFrame(NCR, geometry=geometry) BUFFER = .000625 # 1/4th of a city block in radius of Maryland coordinates. #BUFFER = .00125 # 1/2 of a city block in radius of Maryland coordinates. gLights_Buff = gLights.assign(geometry=lambda x: x.geometry.buffer(BUFFER)) # Overwrites geometry variable with a buffer centered at the point of interest. A.k.a. applies the function geometry(x) to gNCR and saves it as geometry. #%% Filtering Matched_Lights = gpd.sjoin(gLights_Buff, gNCR, 'left') Matched_Lights['Tdelta'] = [0] * len(Matched_Lights) # Counter to be used
def test_dataframe(self): arr = from_shapely(self.geoms, crs=27700) df = GeoDataFrame(geometry=arr) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb arr = from_shapely(self.geoms) s = GeoSeries(arr, crs=27700) df = GeoDataFrame(geometry=s) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb # different passed CRS than array CRS is ignored with pytest.warns(FutureWarning, match="CRS mismatch"): df = GeoDataFrame(geometry=s, crs=4326) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb with pytest.warns(FutureWarning, match="CRS mismatch"): GeoDataFrame(geometry=s, crs=4326) with pytest.warns(FutureWarning, match="CRS mismatch"): GeoDataFrame({"data": [1, 2], "geometry": s}, crs=4326) with pytest.warns(FutureWarning, match="CRS mismatch"): GeoDataFrame(df, crs=4326).crs # manually change CRS arr = from_shapely(self.geoms) s = GeoSeries(arr, crs=27700) df = GeoDataFrame(geometry=s) df.crs = 4326 assert df.crs == self.wgs assert df.geometry.crs == self.wgs assert df.geometry.values.crs == self.wgs df = GeoDataFrame(self.geoms, columns=["geom"], crs=27700) assert df.crs == self.osgb df = df.set_geometry("geom") assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb assert df.geom.crs == self.osgb assert df.geom.values.crs == self.osgb df = GeoDataFrame(geometry=self.geoms, crs=27700) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb df = GeoDataFrame(crs=27700) df = df.set_geometry(self.geoms) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb # new geometry with set CRS has priority over GDF CRS df = GeoDataFrame(crs=27700) df = df.set_geometry(self.geoms, crs=4326) assert df.crs == self.wgs assert df.geometry.crs == self.wgs assert df.geometry.values.crs == self.wgs arr = from_shapely(self.geoms) s = GeoSeries(arr, crs=27700) df = GeoDataFrame() df = df.set_geometry(s) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb arr = from_shapely(self.geoms, crs=27700) df = GeoDataFrame() df = df.set_geometry(arr) assert df.crs == self.osgb assert df.geometry.crs == self.osgb assert df.geometry.values.crs == self.osgb arr = from_shapely(self.geoms) df = GeoDataFrame({"col1": [1, 2], "geometry": arr}, crs=4326) assert df.crs == self.wgs assert df.geometry.crs == self.wgs assert df.geometry.values.crs == self.wgs arr = from_shapely(self.geoms, crs=4326) df = GeoDataFrame({"col1": [1, 2], "geometry": arr}) assert df.crs == self.wgs assert df.geometry.crs == self.wgs assert df.geometry.values.crs == self.wgs # geometry column without geometry df = GeoDataFrame({"geometry": [0, 1]}) df.crs = 27700 assert df.crs == self.osgb
def main(): world = GeoDataFrame.from_file('ne_10m_admin_0_countries.shp').sort_values( by='NAME').set_index('ISO_A3') listDf = world.index.tolist() processMap(world, listDf, 0.5)
def test_crs_axis_order__always_xy(): df = GeoDataFrame(geometry=[Point(-1683723, 6689139)], crs="epsg:26918") lonlat = df.to_crs("epsg:4326") test_lonlat = GeoDataFrame(geometry=[Point(-110.1399901, 55.1350011)], crs="epsg:4326") assert_geodataframe_equal(lonlat, test_lonlat, check_less_precise=True)
def plotmaps(df_WF, df_SS, df_LS): fig, ax = plt.subplots() fig.set_size_inches(40, 20) world = gdp.read_file(gdp.datasets.get_path('naturalearth_lowres')) ax.set_aspect('equal') world.plot(ax=ax, color='white', edgecolor='grey') crs = {'init': 'WSG:84'} #world = world[['continent', 'geometry', 'pop_est']] if len(df_WF) > 0: geometry = [ Point(xy) for xy in zip(df_WF.Longitude.astype(float), df_WF.Latitude.astype(float)) ] geo_df = GeoDataFrame(df_WF, crs=crs, geometry=geometry) geo_df.plot(ax=ax, color='red', marker='X', markersize=5) if len(df_SS) > 0: geometry_SS = [ Point(xy) for xy in zip(df_SS.Longitude.astype(float), df_SS.Latitude.astype(float)) ] geo_ss = GeoDataFrame(df_SS, crs=crs, geometry=geometry_SS) geo_ss.plot(ax=ax, column='TITLE', marker='o', markersize=5) if len(df_LS) > 0: geometry_LS = [ Point(xy) for xy in zip(df_LS.Longitude.astype(float), df_LS.Latitude.astype(float)) ] geo_ls = GeoDataFrame(df_LS, crs=crs, geometry=geometry_LS) geo_ls.plot(ax=ax, color='black', marker='*', markersize=5) plt.axis('off') ax.legend() plt.title('Earth Observations for the Period ' + (datetime.datetime.now() - dateutil.relativedelta.relativedelta( months=1)).strftime('%d %B %Y %H:%M') + ' untill ' + datetime.datetime.now().strftime('%d %B %Y %H:%M'), fontsize=20) plt.savefig( os.path.join(os.getcwd(), 'TEMP') + '\\' + datetime.datetime.strftime(datetime.datetime.now(), dateformat) + '_' + 'chart.png')
def add_qgeometry( self, kind: str, component_name: str, geometry: dict, subtract: bool = False, helper: bool = False, layer: Union[int, str] = 1, # chip will be here chip: str = 'main', **other_options): """Main interface to add qgeometries. Arguments: kind (str): Must be in get_element_types ('path', 'poly', etc.). component_name (str): Component name. geometry (dict): Dict of shapely geometry. subtract (bool): Subtract - passed through. Defaults to False. helper (bool): Helper - passed through. Defaults to False. layer (Union[int, str]): Layer - passed through. Defaults to 1. chip (str): Chip name - passed through. Defaults to 'main'. **other_options (object): Other_options - passed through. """ # TODO: Add unit test # ensure correct types if not isinstance(subtract, bool): subtract = subtract in TRUE_BOOLS if not isinstance(helper, bool): helper = helper in TRUE_BOOLS if not (kind in self.get_element_types()): self.logger.error( f'Creator user error: Unknown element kind=`{kind}`' f'Kind must be in {self.get_element_types()}. This failed for component' f'name = `{component_name}`.\n' f' The call was with subtract={subtract} and helper={helper}' f' and layer={layer}, and options={other_options}') #Checks if (any) of the geometry are MultiPolygons, and breaks them up into #individual polygons. Rounds the coordinate sequences of those values to avoid #numerical errors. rounding_val = self.design.template_options['PRECISION'] new_dict = Dict() for key, item in geometry.items(): if isinstance(geometry[key], MultiPolygon): temp_multi = geometry[key] shape_count = 0 for shape_temp in temp_multi.geoms: new_dict[key + '_' + str(shape_count)] = round_coordinate_sequence( shape_temp, rounding_val) shape_count += 1 else: new_dict[key] = round_coordinate_sequence(item, rounding_val) geometry = new_dict # Create options TODO: Might want to modify this (component_name -> component_id) # Give warning if length is to be fillet's and not long enough. self.check_lengths(geometry, kind, component_name, layer, chip, **other_options) # Create options options = dict(component=component_name, subtract=subtract, helper=helper, layer=int(layer), chip=chip, **other_options) #replaces line above to generate the options. #for keyC in design.qgeometry.tables[kind].columns: # if keyC != 'geometry': # options[keyC] = ???[keyC] -> alternative manner to pass options to the add_qgeometry function? # instead have the add_qeometry in baseComponent generate the dict? #Could we just append rather than make a new table each time? This seems slow table = self.tables[kind] # assert that all names in options are in table columns! TODO: New approach will not be wanting #to do this (maybe check that all columns are in options?) df = GeoDataFrame.from_dict(geometry, orient='index', columns=['geometry']) df.index.name = 'name' df = df.reset_index() df = df.assign(**options) # Set new table. Unfortunately, this creates a new instance. Can just direct append self.tables[kind] = table.append(df, sort=False, ignore_index=True)
def setup_method(self): self.t1 = Polygon([(0, 0), (1, 0), (1, 1)]) self.t2 = Polygon([(0, 0), (1, 1), (0, 1)]) self.t3 = Polygon([(2, 0), (3, 0), (3, 1)]) self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) self.inner_sq = Polygon([(0.25, 0.25), (0.75, 0.25), (0.75, 0.75), (0.25, 0.75)]) self.nested_squares = Polygon(self.sq.boundary, [self.inner_sq.boundary]) self.p0 = Point(5, 5) self.p3d = Point(5, 5, 5) self.g0 = GeoSeries([ self.t1, self.t2, self.sq, self.inner_sq, self.nested_squares, self.p0, None, ]) self.g1 = GeoSeries([self.t1, self.sq]) self.g2 = GeoSeries([self.sq, self.t1]) self.g3 = GeoSeries([self.t1, self.t2]) self.g3.crs = {"init": "epsg:4326", "no_defs": True} self.g4 = GeoSeries([self.t2, self.t1]) self.g4.crs = {"init": "epsg:4326", "no_defs": True} self.g_3d = GeoSeries([self.p0, self.p3d]) self.na = GeoSeries([self.t1, self.t2, Polygon()]) self.na_none = GeoSeries([self.t1, None]) self.a1 = self.g1.copy() self.a1.index = ["A", "B"] self.a2 = self.g2.copy() self.a2.index = ["B", "C"] self.esb = Point(-73.9847, 40.7484) self.sol = Point(-74.0446, 40.6893) self.landmarks = GeoSeries([self.esb, self.sol], crs={ "init": "epsg:4326", "no_defs": True }) self.l1 = LineString([(0, 0), (0, 1), (1, 1)]) self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)]) self.g5 = GeoSeries([self.l1, self.l2]) self.g6 = GeoSeries([self.p0, self.t3]) self.empty = GeoSeries([]) self.all_none = GeoSeries([None, None]) self.empty_poly = Polygon() # Crossed lines self.l3 = LineString([(0, 0), (1, 1)]) self.l4 = LineString([(0, 1), (1, 0)]) self.crossed_lines = GeoSeries([self.l3, self.l4]) # Placeholder for testing, will just drop in different geometries # when needed self.gdf1 = GeoDataFrame({ "geometry": self.g1, "col0": [1.0, 2.0], "col1": ["geo", "pandas"] }) self.gdf2 = GeoDataFrame({ "geometry": self.g1, "col3": [4, 5], "col4": ["rand", "string"] })
class TestGeomMethods: def setup_method(self): self.t1 = Polygon([(0, 0), (1, 0), (1, 1)]) self.t2 = Polygon([(0, 0), (1, 1), (0, 1)]) self.t3 = Polygon([(2, 0), (3, 0), (3, 1)]) self.sq = Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]) self.inner_sq = Polygon([(0.25, 0.25), (0.75, 0.25), (0.75, 0.75), (0.25, 0.75)]) self.nested_squares = Polygon(self.sq.boundary, [self.inner_sq.boundary]) self.p0 = Point(5, 5) self.p3d = Point(5, 5, 5) self.g0 = GeoSeries([ self.t1, self.t2, self.sq, self.inner_sq, self.nested_squares, self.p0, None, ]) self.g1 = GeoSeries([self.t1, self.sq]) self.g2 = GeoSeries([self.sq, self.t1]) self.g3 = GeoSeries([self.t1, self.t2]) self.g3.crs = {"init": "epsg:4326", "no_defs": True} self.g4 = GeoSeries([self.t2, self.t1]) self.g4.crs = {"init": "epsg:4326", "no_defs": True} self.g_3d = GeoSeries([self.p0, self.p3d]) self.na = GeoSeries([self.t1, self.t2, Polygon()]) self.na_none = GeoSeries([self.t1, None]) self.a1 = self.g1.copy() self.a1.index = ["A", "B"] self.a2 = self.g2.copy() self.a2.index = ["B", "C"] self.esb = Point(-73.9847, 40.7484) self.sol = Point(-74.0446, 40.6893) self.landmarks = GeoSeries([self.esb, self.sol], crs={ "init": "epsg:4326", "no_defs": True }) self.l1 = LineString([(0, 0), (0, 1), (1, 1)]) self.l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1)]) self.g5 = GeoSeries([self.l1, self.l2]) self.g6 = GeoSeries([self.p0, self.t3]) self.empty = GeoSeries([]) self.all_none = GeoSeries([None, None]) self.empty_poly = Polygon() # Crossed lines self.l3 = LineString([(0, 0), (1, 1)]) self.l4 = LineString([(0, 1), (1, 0)]) self.crossed_lines = GeoSeries([self.l3, self.l4]) # Placeholder for testing, will just drop in different geometries # when needed self.gdf1 = GeoDataFrame({ "geometry": self.g1, "col0": [1.0, 2.0], "col1": ["geo", "pandas"] }) self.gdf2 = GeoDataFrame({ "geometry": self.g1, "col3": [4, 5], "col4": ["rand", "string"] }) def _test_unary_real(self, op, expected, a): """ Tests for 'area', 'length', 'is_valid', etc. """ fcmp = assert_series_equal self._test_unary(op, expected, a, fcmp) def _test_unary_topological(self, op, expected, a): if isinstance(expected, GeoPandasBase): fcmp = assert_geoseries_equal else: def fcmp(a, b): assert a.equals(b) self._test_unary(op, expected, a, fcmp) def _test_binary_topological(self, op, expected, a, b, *args, **kwargs): """ Tests for 'intersection', 'union', 'symmetric_difference', etc. """ if isinstance(expected, GeoPandasBase): fcmp = assert_geoseries_equal else: def fcmp(a, b): assert geom_equals(a, b) if isinstance(b, GeoPandasBase): right_df = True else: right_df = False self._binary_op_test(op, expected, a, b, fcmp, True, right_df, *args, **kwargs) def _test_binary_real(self, op, expected, a, b, *args, **kwargs): fcmp = assert_series_equal self._binary_op_test(op, expected, a, b, fcmp, True, False, *args, **kwargs) def _test_binary_operator(self, op, expected, a, b): """ The operators only have GeoSeries on the left, but can have GeoSeries or GeoDataFrame on the right. """ if isinstance(expected, GeoPandasBase): fcmp = assert_geoseries_equal else: def fcmp(a, b): assert geom_equals(a, b) if isinstance(b, GeoPandasBase): right_df = True else: right_df = False self._binary_op_test(op, expected, a, b, fcmp, False, right_df) def _binary_op_test(self, op, expected, left, right, fcmp, left_df, right_df, *args, **kwargs): """ This is a helper to call a function on GeoSeries and GeoDataFrame arguments. For example, 'intersection' is a member of both GeoSeries and GeoDataFrame and can take either GeoSeries or GeoDataFrame inputs. This function has the ability to test all four combinations of input types. Parameters ---------- expected : str The operation to be tested. e.g., 'intersection' left: GeoSeries right: GeoSeries fcmp: function Called with the result of the operation and expected. It should assert if the result is incorrect left_df: bool If the left input should also be called with a GeoDataFrame right_df: bool Indicates whether the right input should be called with a GeoDataFrame """ def _make_gdf(s): n = len(s) col1 = string.ascii_lowercase[:n] col2 = range(n) return GeoDataFrame( { "geometry": s.values, "col1": col1, "col2": col2 }, index=s.index, crs=s.crs, ) # Test GeoSeries.op(GeoSeries) result = getattr(left, op)(right, *args, **kwargs) fcmp(result, expected) if left_df: # Test GeoDataFrame.op(GeoSeries) gdf_left = _make_gdf(left) result = getattr(gdf_left, op)(right, *args, **kwargs) fcmp(result, expected) if right_df: # Test GeoSeries.op(GeoDataFrame) gdf_right = _make_gdf(right) result = getattr(left, op)(gdf_right, *args, **kwargs) fcmp(result, expected) if left_df: # Test GeoDataFrame.op(GeoDataFrame) result = getattr(gdf_left, op)(gdf_right, *args, **kwargs) fcmp(result, expected) def _test_unary(self, op, expected, a, fcmp): # GeoSeries, (GeoSeries or geometry) result = getattr(a, op) fcmp(result, expected) # GeoDataFrame, (GeoSeries or geometry) gdf = self.gdf1.set_geometry(a) result = getattr(gdf, op) fcmp(result, expected) # TODO reenable for all operations once we use pyproj > 2 # def test_crs_warning(self): # # operations on geometries should warn for different CRS # no_crs_g3 = self.g3.copy() # no_crs_g3.crs = None # with pytest.warns(UserWarning): # self._test_binary_topological('intersection', self.g3, # self.g3, no_crs_g3) def test_intersection(self): self._test_binary_topological("intersection", self.t1, self.g1, self.g2) self._test_binary_topological("intersection", self.all_none, self.g1, self.empty) def test_union_series(self): self._test_binary_topological("union", self.sq, self.g1, self.g2) def test_union_polygon(self): self._test_binary_topological("union", self.sq, self.g1, self.t2) def test_symmetric_difference_series(self): self._test_binary_topological("symmetric_difference", self.sq, self.g3, self.g4) def test_symmetric_difference_poly(self): expected = GeoSeries([GeometryCollection(), self.sq], crs=self.g3.crs) self._test_binary_topological("symmetric_difference", expected, self.g3, self.t1) def test_difference_series(self): expected = GeoSeries([GeometryCollection(), self.t2]) self._test_binary_topological("difference", expected, self.g1, self.g2) def test_difference_poly(self): expected = GeoSeries([self.t1, self.t1]) self._test_binary_topological("difference", expected, self.g1, self.t2) def test_geo_op_empty_result(self): l1 = LineString([(0, 0), (1, 1)]) l2 = LineString([(2, 2), (3, 3)]) expected = GeoSeries([GeometryCollection()]) # binary geo resulting in empty geometry result = GeoSeries([l1]).intersection(l2) assert_geoseries_equal(result, expected) # binary geo empty result with right GeoSeries result = GeoSeries([l1]).intersection(GeoSeries([l2])) assert_geoseries_equal(result, expected) # unary geo resulting in emtpy geometry result = GeoSeries([GeometryCollection()]).convex_hull assert_geoseries_equal(result, expected) def test_boundary(self): l1 = LineString([(0, 0), (1, 0), (1, 1), (0, 0)]) l2 = LineString([(0, 0), (1, 0), (1, 1), (0, 1), (0, 0)]) expected = GeoSeries([l1, l2], index=self.g1.index, crs=self.g1.crs) self._test_unary_topological("boundary", expected, self.g1) def test_area(self): expected = Series(np.array([0.5, 1.0]), index=self.g1.index) self._test_unary_real("area", expected, self.g1) expected = Series(np.array([0.5, np.nan]), index=self.na_none.index) self._test_unary_real("area", expected, self.na_none) def test_bounds(self): # Set columns to get the order right expected = DataFrame( { "minx": [0.0, 0.0], "miny": [0.0, 0.0], "maxx": [1.0, 1.0], "maxy": [1.0, 1.0], }, index=self.g1.index, columns=["minx", "miny", "maxx", "maxy"], ) result = self.g1.bounds assert_frame_equal(expected, result) gdf = self.gdf1.set_geometry(self.g1) result = gdf.bounds assert_frame_equal(expected, result) def test_bounds_empty(self): # test bounds of empty GeoSeries # https://github.com/geopandas/geopandas/issues/1195 s = GeoSeries([]) result = s.bounds expected = DataFrame(columns=["minx", "miny", "maxx", "maxy"], index=s.index, dtype="float64") assert_frame_equal(result, expected) def test_unary_union(self): p1 = self.t1 p2 = Polygon([(2, 0), (3, 0), (3, 1)]) expected = unary_union([p1, p2]) g = GeoSeries([p1, p2]) self._test_unary_topological("unary_union", expected, g) def test_contains(self): expected = [True, False, True, False, False, False, False] assert_array_dtype_equal(expected, self.g0.contains(self.t1)) def test_length(self): expected = Series(np.array([2 + np.sqrt(2), 4]), index=self.g1.index) self._test_unary_real("length", expected, self.g1) expected = Series(np.array([2 + np.sqrt(2), np.nan]), index=self.na_none.index) self._test_unary_real("length", expected, self.na_none) def test_crosses(self): expected = [False, False, False, False, False, False, False] assert_array_dtype_equal(expected, self.g0.crosses(self.t1)) expected = [False, True] assert_array_dtype_equal(expected, self.crossed_lines.crosses(self.l3)) def test_disjoint(self): expected = [False, False, False, False, False, True, False] assert_array_dtype_equal(expected, self.g0.disjoint(self.t1)) def test_relate(self): expected = Series( [ "212101212", "212101212", "212FF1FF2", "2FFF1FFF2", "FF2F112F2", "FF0FFF212", None, ], index=self.g0.index, ) assert_array_dtype_equal(expected, self.g0.relate(self.inner_sq)) expected = Series(["FF0FFF212", None], index=self.g6.index) assert_array_dtype_equal(expected, self.g6.relate(self.na_none)) def test_distance(self): expected = Series(np.array([np.sqrt((5 - 1)**2 + (5 - 1)**2), np.nan]), self.na_none.index) assert_array_dtype_equal(expected, self.na_none.distance(self.p0)) expected = Series(np.array([np.sqrt(4**2 + 4**2), np.nan]), self.g6.index) assert_array_dtype_equal(expected, self.g6.distance(self.na_none)) def test_intersects(self): expected = [True, True, True, True, True, False, False] assert_array_dtype_equal(expected, self.g0.intersects(self.t1)) expected = [True, False] assert_array_dtype_equal(expected, self.na_none.intersects(self.t2)) expected = np.array([], dtype=bool) assert_array_dtype_equal(expected, self.empty.intersects(self.t1)) expected = np.array([], dtype=bool) assert_array_dtype_equal(expected, self.empty.intersects(self.empty_poly)) expected = [False] * 7 assert_array_dtype_equal(expected, self.g0.intersects(self.empty_poly)) def test_overlaps(self): expected = [True, True, False, False, False, False, False] assert_array_dtype_equal(expected, self.g0.overlaps(self.inner_sq)) expected = [False, False] assert_array_dtype_equal(expected, self.g4.overlaps(self.t1)) def test_touches(self): expected = [False, True, False, False, False, False, False] assert_array_dtype_equal(expected, self.g0.touches(self.t1)) def test_within(self): expected = [True, False, False, False, False, False, False] assert_array_dtype_equal(expected, self.g0.within(self.t1)) expected = [True, True, True, True, True, False, False] assert_array_dtype_equal(expected, self.g0.within(self.sq)) def test_is_valid(self): expected = Series(np.array([True] * len(self.g1)), self.g1.index) self._test_unary_real("is_valid", expected, self.g1) def test_is_empty(self): expected = Series(np.array([False] * len(self.g1)), self.g1.index) self._test_unary_real("is_empty", expected, self.g1) def test_is_ring(self): expected = Series(np.array([True] * len(self.g1)), self.g1.index) self._test_unary_real("is_ring", expected, self.g1) def test_is_simple(self): expected = Series(np.array([True] * len(self.g1)), self.g1.index) self._test_unary_real("is_simple", expected, self.g1) def test_has_z(self): expected = Series([False, True], self.g_3d.index) self._test_unary_real("has_z", expected, self.g_3d) def test_xy_points(self): expected_x = [-73.9847, -74.0446] expected_y = [40.7484, 40.6893] assert_array_dtype_equal(expected_x, self.landmarks.geometry.x) assert_array_dtype_equal(expected_y, self.landmarks.geometry.y) def test_xy_polygons(self): # accessing x attribute in polygon geoseries should raise an error with pytest.raises(ValueError): _ = self.gdf1.geometry.x # and same for accessing y attribute in polygon geoseries with pytest.raises(ValueError): _ = self.gdf1.geometry.y def test_centroid(self): polygon = Polygon([(-1, -1), (1, -1), (1, 1), (-1, 1)]) point = Point(0, 0) polygons = GeoSeries([polygon for i in range(3)]) points = GeoSeries([point for i in range(3)]) assert_geoseries_equal(polygons.centroid, points) def test_convex_hull(self): # the convex hull of a square should be the same as the square squares = GeoSeries([self.sq for i in range(3)]) assert_geoseries_equal(squares, squares.convex_hull) def test_exterior(self): exp_exterior = GeoSeries([LinearRing(p.boundary) for p in self.g3]) for expected, computed in zip(exp_exterior, self.g3.exterior): assert computed.equals(expected) def test_interiors(self): original = GeoSeries([self.t1, self.nested_squares]) # This is a polygon with no interior. expected = [] assert original.interiors[0] == expected # This is a polygon with an interior. expected = LinearRing(self.inner_sq.boundary) assert original.interiors[1][0].equals(expected) def test_interpolate(self): expected = GeoSeries([Point(0.5, 1.0), Point(0.75, 1.0)]) self._test_binary_topological("interpolate", expected, self.g5, 0.75, normalized=True) expected = GeoSeries([Point(0.5, 1.0), Point(1.0, 0.5)]) self._test_binary_topological("interpolate", expected, self.g5, 1.5) def test_interpolate_distance_array(self): expected = GeoSeries([Point(0.0, 0.75), Point(1.0, 0.5)]) self._test_binary_topological("interpolate", expected, self.g5, np.array([0.75, 1.5])) expected = GeoSeries([Point(0.5, 1.0), Point(0.0, 1.0)]) self._test_binary_topological("interpolate", expected, self.g5, np.array([0.75, 1.5]), normalized=True) def test_interpolate_distance_wrong_length(self): distances = np.array([1, 2, 3]) with pytest.raises(ValueError): self.g5.interpolate(distances) def test_interpolate_distance_wrong_index(self): distances = Series([1, 2], index=[99, 98]) with pytest.raises(ValueError): self.g5.interpolate(distances) def test_project(self): expected = Series([2.0, 1.5], index=self.g5.index) p = Point(1.0, 0.5) self._test_binary_real("project", expected, self.g5, p) expected = Series([1.0, 0.5], index=self.g5.index) self._test_binary_real("project", expected, self.g5, p, normalized=True) def test_affine_transform(self): # 45 degree reflection matrix matrix = [0, 1, 1, 0, 0, 0] expected = self.g4 res = self.g3.affine_transform(matrix) assert_geoseries_equal(expected, res) def test_translate_tuple(self): trans = self.sol.x - self.esb.x, self.sol.y - self.esb.y assert self.landmarks.translate(*trans)[0].equals(self.sol) res = self.gdf1.set_geometry(self.landmarks).translate(*trans)[0] assert res.equals(self.sol) def test_rotate(self): angle = 98 expected = self.g4 o = Point(0, 0) res = self.g4.rotate(angle, origin=o).rotate(-angle, origin=o) assert geom_almost_equals(self.g4, res) res = self.gdf1.set_geometry(self.g4).rotate(angle, origin=Point(0, 0)) assert geom_almost_equals(expected, res.rotate(-angle, origin=o)) def test_scale(self): expected = self.g4 scale = 2.0, 1.0 inv = tuple(1.0 / i for i in scale) o = Point(0, 0) res = self.g4.scale(*scale, origin=o).scale(*inv, origin=o) assert geom_almost_equals(expected, res) res = self.gdf1.set_geometry(self.g4).scale(*scale, origin=o) res = res.scale(*inv, origin=o) assert geom_almost_equals(expected, res) def test_skew(self): expected = self.g4 skew = 45.0 o = Point(0, 0) # Test xs res = self.g4.skew(xs=skew, origin=o).skew(xs=-skew, origin=o) assert geom_almost_equals(expected, res) res = self.gdf1.set_geometry(self.g4).skew(xs=skew, origin=o) res = res.skew(xs=-skew, origin=o) assert geom_almost_equals(expected, res) # Test ys res = self.g4.skew(ys=skew, origin=o).skew(ys=-skew, origin=o) assert geom_almost_equals(expected, res) res = self.gdf1.set_geometry(self.g4).skew(ys=skew, origin=o) res = res.skew(ys=-skew, origin=o) assert geom_almost_equals(expected, res) def test_buffer(self): original = GeoSeries([Point(0, 0)]) expected = GeoSeries( [Polygon(((5, 0), (0, -5), (-5, 0), (0, 5), (5, 0)))]) calculated = original.buffer(5, resolution=1) assert geom_almost_equals(expected, calculated) def test_buffer_args(self): args = dict(cap_style=3, join_style=2, mitre_limit=2.5) calculated_series = self.g0.buffer(10, **args) for original, calculated in zip(self.g0, calculated_series): if original is None: assert calculated is None else: expected = original.buffer(10, **args) assert calculated.equals(expected) def test_buffer_distance_array(self): original = GeoSeries([self.p0, self.p0]) expected = GeoSeries([ Polygon(((6, 5), (5, 4), (4, 5), (5, 6), (6, 5))), Polygon(((10, 5), (5, 0), (0, 5), (5, 10), (10, 5))), ]) calculated = original.buffer(np.array([1, 5]), resolution=1) assert_geoseries_equal(calculated, expected, check_less_precise=True) def test_buffer_distance_wrong_length(self): original = GeoSeries([self.p0, self.p0]) distances = np.array([1, 2, 3]) with pytest.raises(ValueError): original.buffer(distances) def test_buffer_distance_wrong_index(self): original = GeoSeries([self.p0, self.p0], index=[0, 1]) distances = Series(data=[1, 2], index=[99, 98]) with pytest.raises(ValueError): original.buffer(distances) def test_buffer_empty_none(self): p = Polygon([(0, 0), (0, 1), (1, 1), (1, 0)]) s = GeoSeries([p, GeometryCollection(), None]) result = s.buffer(0) assert_geoseries_equal(result, s) result = s.buffer(np.array([0, 0, 0])) assert_geoseries_equal(result, s) def test_envelope(self): e = self.g3.envelope assert np.all(e.geom_equals(self.sq)) assert isinstance(e, GeoSeries) assert self.g3.crs == e.crs def test_total_bounds(self): bbox = self.sol.x, self.sol.y, self.esb.x, self.esb.y assert isinstance(self.landmarks.total_bounds, np.ndarray) assert tuple(self.landmarks.total_bounds) == bbox df = GeoDataFrame({ "geometry": self.landmarks, "col1": range(len(self.landmarks)) }) assert tuple(df.total_bounds) == bbox def test_explode_geoseries(self): s = GeoSeries([ MultiPoint([(0, 0), (1, 1)]), MultiPoint([(2, 2), (3, 3), (4, 4)]) ]) s.index.name = "test_index_name" expected_index_name = ["test_index_name", None] index = [(0, 0), (0, 1), (1, 0), (1, 1), (1, 2)] expected = GeoSeries( [Point(0, 0), Point(1, 1), Point(2, 2), Point(3, 3), Point(4, 4)], index=MultiIndex.from_tuples(index, names=expected_index_name), ) assert_geoseries_equal(expected, s.explode()) @pytest.mark.parametrize("index_name", [None, "test"]) def test_explode_geodataframe(self, index_name): s = GeoSeries([MultiPoint([Point(1, 2), Point(2, 3)]), Point(5, 5)]) df = GeoDataFrame({"col": [1, 2], "geometry": s}) df.index.name = index_name test_df = df.explode() expected_s = GeoSeries([Point(1, 2), Point(2, 3), Point(5, 5)]) expected_df = GeoDataFrame({"col": [1, 1, 2], "geometry": expected_s}) expected_index = MultiIndex( [[0, 1], [0, 1]], # levels [[0, 0, 1], [0, 1, 0]], # labels/codes names=[index_name, None], ) expected_df = expected_df.set_index(expected_index) assert_frame_equal(test_df, expected_df) # # Test '&', '|', '^', and '-' # The left can only be a GeoSeries. The right hand side can be a # GeoSeries, GeoDataFrame or Shapely geometry # def test_intersection_operator(self): self._test_binary_operator("__and__", self.t1, self.g1, self.g2) def test_union_operator(self): self._test_binary_operator("__or__", self.sq, self.g1, self.g2) def test_union_operator_polygon(self): self._test_binary_operator("__or__", self.sq, self.g1, self.t2) def test_symmetric_difference_operator(self): self._test_binary_operator("__xor__", self.sq, self.g3, self.g4) def test_difference_series2(self): expected = GeoSeries([GeometryCollection(), self.t2]) self._test_binary_operator("__sub__", expected, self.g1, self.g2) def test_difference_poly2(self): expected = GeoSeries([self.t1, self.t1]) self._test_binary_operator("__sub__", expected, self.g1, self.t2)